linux/arch/mips/math-emu/sp_maddf.c
<<
>>
Prefs
   1/*
   2 * IEEE754 floating point arithmetic
   3 * single precision: MADDF.f (Fused Multiply Add)
   4 * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft])
   5 *
   6 * MIPS floating point support
   7 * Copyright (C) 2015 Imagination Technologies, Ltd.
   8 * Author: Markos Chandras <markos.chandras@imgtec.com>
   9 *
  10 *  This program is free software; you can distribute it and/or modify it
  11 *  under the terms of the GNU General Public License as published by the
  12 *  Free Software Foundation; version 2 of the License.
  13 */
  14
  15#include "ieee754sp.h"
  16
  17
  18static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
  19                                 union ieee754sp y, enum maddf_flags flags)
  20{
  21        int re;
  22        int rs;
  23        unsigned int rm;
  24        u64 rm64;
  25        u64 zm64;
  26        int s;
  27
  28        COMPXSP;
  29        COMPYSP;
  30        COMPZSP;
  31
  32        EXPLODEXSP;
  33        EXPLODEYSP;
  34        EXPLODEZSP;
  35
  36        FLUSHXSP;
  37        FLUSHYSP;
  38        FLUSHZSP;
  39
  40        ieee754_clearcx();
  41
  42        /*
  43         * Handle the cases when at least one of x, y or z is a NaN.
  44         * Order of precedence is sNaN, qNaN and z, x, y.
  45         */
  46        if (zc == IEEE754_CLASS_SNAN)
  47                return ieee754sp_nanxcpt(z);
  48        if (xc == IEEE754_CLASS_SNAN)
  49                return ieee754sp_nanxcpt(x);
  50        if (yc == IEEE754_CLASS_SNAN)
  51                return ieee754sp_nanxcpt(y);
  52        if (zc == IEEE754_CLASS_QNAN)
  53                return z;
  54        if (xc == IEEE754_CLASS_QNAN)
  55                return x;
  56        if (yc == IEEE754_CLASS_QNAN)
  57                return y;
  58
  59        if (zc == IEEE754_CLASS_DNORM)
  60                SPDNORMZ;
  61        /* ZERO z cases are handled separately below */
  62
  63        switch (CLPAIR(xc, yc)) {
  64
  65
  66        /*
  67         * Infinity handling
  68         */
  69        case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
  70        case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
  71                ieee754_setcx(IEEE754_INVALID_OPERATION);
  72                return ieee754sp_indef();
  73
  74        case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
  75        case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
  76        case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
  77        case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
  78        case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
  79                if ((zc == IEEE754_CLASS_INF) &&
  80                    ((!(flags & MADDF_NEGATE_PRODUCT) && (zs != (xs ^ ys))) ||
  81                     ((flags & MADDF_NEGATE_PRODUCT) && (zs == (xs ^ ys))))) {
  82                        /*
  83                         * Cases of addition of infinities with opposite signs
  84                         * or subtraction of infinities with same signs.
  85                         */
  86                        ieee754_setcx(IEEE754_INVALID_OPERATION);
  87                        return ieee754sp_indef();
  88                }
  89                /*
  90                 * z is here either not an infinity, or an infinity having the
  91                 * same sign as product (x*y) (in case of MADDF.D instruction)
  92                 * or product -(x*y) (in MSUBF.D case). The result must be an
  93                 * infinity, and its sign is determined only by the value of
  94                 * (flags & MADDF_NEGATE_PRODUCT) and the signs of x and y.
  95                 */
  96                if (flags & MADDF_NEGATE_PRODUCT)
  97                        return ieee754sp_inf(1 ^ (xs ^ ys));
  98                else
  99                        return ieee754sp_inf(xs ^ ys);
 100
 101        case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
 102        case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
 103        case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
 104        case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
 105        case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
 106                if (zc == IEEE754_CLASS_INF)
 107                        return ieee754sp_inf(zs);
 108                if (zc == IEEE754_CLASS_ZERO) {
 109                        /* Handle cases +0 + (-0) and similar ones. */
 110                        if ((!(flags & MADDF_NEGATE_PRODUCT)
 111                                        && (zs == (xs ^ ys))) ||
 112                            ((flags & MADDF_NEGATE_PRODUCT)
 113                                        && (zs != (xs ^ ys))))
 114                                /*
 115                                 * Cases of addition of zeros of equal signs
 116                                 * or subtraction of zeroes of opposite signs.
 117                                 * The sign of the resulting zero is in any
 118                                 * such case determined only by the sign of z.
 119                                 */
 120                                return z;
 121
 122                        return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
 123                }
 124                /* x*y is here 0, and z is not 0, so just return z */
 125                return z;
 126
 127        case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
 128                SPDNORMX;
 129                /* fall through */
 130
 131        case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
 132                if (zc == IEEE754_CLASS_INF)
 133                        return ieee754sp_inf(zs);
 134                SPDNORMY;
 135                break;
 136
 137        case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
 138                if (zc == IEEE754_CLASS_INF)
 139                        return ieee754sp_inf(zs);
 140                SPDNORMX;
 141                break;
 142
 143        case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
 144                if (zc == IEEE754_CLASS_INF)
 145                        return ieee754sp_inf(zs);
 146                /* continue to real computations */
 147        }
 148
 149        /* Finally get to do some computation */
 150
 151        /*
 152         * Do the multiplication bit first
 153         *
 154         * rm = xm * ym, re = xe + ye basically
 155         *
 156         * At this point xm and ym should have been normalized.
 157         */
 158
 159        /* rm = xm * ym, re = xe+ye basically */
 160        assert(xm & SP_HIDDEN_BIT);
 161        assert(ym & SP_HIDDEN_BIT);
 162
 163        re = xe + ye;
 164        rs = xs ^ ys;
 165        if (flags & MADDF_NEGATE_PRODUCT)
 166                rs ^= 1;
 167
 168        /* Multiple 24 bit xm and ym to give 48 bit results */
 169        rm64 = (uint64_t)xm * ym;
 170
 171        /* Shunt to top of word */
 172        rm64 = rm64 << 16;
 173
 174        /* Put explicit bit at bit 62 if necessary */
 175        if ((int64_t) rm64 < 0) {
 176                rm64 = rm64 >> 1;
 177                re++;
 178        }
 179
 180        assert(rm64 & (1 << 62));
 181
 182        if (zc == IEEE754_CLASS_ZERO) {
 183                /*
 184                 * Move explicit bit from bit 62 to bit 26 since the
 185                 * ieee754sp_format code expects the mantissa to be
 186                 * 27 bits wide (24 + 3 rounding bits).
 187                 */
 188                rm = XSPSRS64(rm64, (62 - 26));
 189                return ieee754sp_format(rs, re, rm);
 190        }
 191
 192        /* Move explicit bit from bit 23 to bit 62 */
 193        zm64 = (uint64_t)zm << (62 - 23);
 194        assert(zm64 & (1 << 62));
 195
 196        /* Make the exponents the same */
 197        if (ze > re) {
 198                /*
 199                 * Have to shift r fraction right to align.
 200                 */
 201                s = ze - re;
 202                rm64 = XSPSRS64(rm64, s);
 203                re += s;
 204        } else if (re > ze) {
 205                /*
 206                 * Have to shift z fraction right to align.
 207                 */
 208                s = re - ze;
 209                zm64 = XSPSRS64(zm64, s);
 210                ze += s;
 211        }
 212        assert(ze == re);
 213        assert(ze <= SP_EMAX);
 214
 215        /* Do the addition */
 216        if (zs == rs) {
 217                /*
 218                 * Generate 64 bit result by adding two 63 bit numbers
 219                 * leaving result in zm64, zs and ze.
 220                 */
 221                zm64 = zm64 + rm64;
 222                if ((int64_t)zm64 < 0) {        /* carry out */
 223                        zm64 = XSPSRS1(zm64);
 224                        ze++;
 225                }
 226        } else {
 227                if (zm64 >= rm64) {
 228                        zm64 = zm64 - rm64;
 229                } else {
 230                        zm64 = rm64 - zm64;
 231                        zs = rs;
 232                }
 233                if (zm64 == 0)
 234                        return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
 235
 236                /*
 237                 * Put explicit bit at bit 62 if necessary.
 238                 */
 239                while ((zm64 >> 62) == 0) {
 240                        zm64 <<= 1;
 241                        ze--;
 242                }
 243        }
 244
 245        /*
 246         * Move explicit bit from bit 62 to bit 26 since the
 247         * ieee754sp_format code expects the mantissa to be
 248         * 27 bits wide (24 + 3 rounding bits).
 249         */
 250        zm = XSPSRS64(zm64, (62 - 26));
 251
 252        return ieee754sp_format(zs, ze, zm);
 253}
 254
 255union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x,
 256                                union ieee754sp y)
 257{
 258        return _sp_maddf(z, x, y, 0);
 259}
 260
 261union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x,
 262                                union ieee754sp y)
 263{
 264        return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
 265}
 266