linux/arch/mips/math-emu/sp_maddf.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * IEEE754 floating point arithmetic
   4 * single precision: MADDF.f (Fused Multiply Add)
   5 * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft])
   6 *
   7 * MIPS floating point support
   8 * Copyright (C) 2015 Imagination Technologies, Ltd.
   9 * Author: Markos Chandras <markos.chandras@imgtec.com>
  10 */
  11
  12#include "ieee754sp.h"
  13
  14
  15static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
  16                                 union ieee754sp y, enum maddf_flags flags)
  17{
  18        int re;
  19        int rs;
  20        unsigned int rm;
  21        u64 rm64;
  22        u64 zm64;
  23        int s;
  24
  25        COMPXSP;
  26        COMPYSP;
  27        COMPZSP;
  28
  29        EXPLODEXSP;
  30        EXPLODEYSP;
  31        EXPLODEZSP;
  32
  33        FLUSHXSP;
  34        FLUSHYSP;
  35        FLUSHZSP;
  36
  37        ieee754_clearcx();
  38
  39        rs = xs ^ ys;
  40        if (flags & MADDF_NEGATE_PRODUCT)
  41                rs ^= 1;
  42        if (flags & MADDF_NEGATE_ADDITION)
  43                zs ^= 1;
  44
  45        /*
  46         * Handle the cases when at least one of x, y or z is a NaN.
  47         * Order of precedence is sNaN, qNaN and z, x, y.
  48         */
  49        if (zc == IEEE754_CLASS_SNAN)
  50                return ieee754sp_nanxcpt(z);
  51        if (xc == IEEE754_CLASS_SNAN)
  52                return ieee754sp_nanxcpt(x);
  53        if (yc == IEEE754_CLASS_SNAN)
  54                return ieee754sp_nanxcpt(y);
  55        if (zc == IEEE754_CLASS_QNAN)
  56                return z;
  57        if (xc == IEEE754_CLASS_QNAN)
  58                return x;
  59        if (yc == IEEE754_CLASS_QNAN)
  60                return y;
  61
  62        if (zc == IEEE754_CLASS_DNORM)
  63                SPDNORMZ;
  64        /* ZERO z cases are handled separately below */
  65
  66        switch (CLPAIR(xc, yc)) {
  67
  68
  69        /*
  70         * Infinity handling
  71         */
  72        case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
  73        case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
  74                ieee754_setcx(IEEE754_INVALID_OPERATION);
  75                return ieee754sp_indef();
  76
  77        case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
  78        case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
  79        case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
  80        case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
  81        case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
  82                if ((zc == IEEE754_CLASS_INF) && (zs != rs)) {
  83                        /*
  84                         * Cases of addition of infinities with opposite signs
  85                         * or subtraction of infinities with same signs.
  86                         */
  87                        ieee754_setcx(IEEE754_INVALID_OPERATION);
  88                        return ieee754sp_indef();
  89                }
  90                /*
  91                 * z is here either not an infinity, or an infinity having the
  92                 * same sign as product (x*y). The result must be an infinity,
  93                 * and its sign is determined only by the sign of product (x*y).
  94                 */
  95                return ieee754sp_inf(rs);
  96
  97        case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
  98        case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
  99        case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
 100        case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
 101        case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
 102                if (zc == IEEE754_CLASS_INF)
 103                        return ieee754sp_inf(zs);
 104                if (zc == IEEE754_CLASS_ZERO) {
 105                        /* Handle cases +0 + (-0) and similar ones. */
 106                        if (zs == rs)
 107                                /*
 108                                 * Cases of addition of zeros of equal signs
 109                                 * or subtraction of zeroes of opposite signs.
 110                                 * The sign of the resulting zero is in any
 111                                 * such case determined only by the sign of z.
 112                                 */
 113                                return z;
 114
 115                        return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
 116                }
 117                /* x*y is here 0, and z is not 0, so just return z */
 118                return z;
 119
 120        case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
 121                SPDNORMX;
 122                fallthrough;
 123        case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
 124                if (zc == IEEE754_CLASS_INF)
 125                        return ieee754sp_inf(zs);
 126                SPDNORMY;
 127                break;
 128
 129        case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
 130                if (zc == IEEE754_CLASS_INF)
 131                        return ieee754sp_inf(zs);
 132                SPDNORMX;
 133                break;
 134
 135        case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
 136                if (zc == IEEE754_CLASS_INF)
 137                        return ieee754sp_inf(zs);
 138                /* continue to real computations */
 139        }
 140
 141        /* Finally get to do some computation */
 142
 143        /*
 144         * Do the multiplication bit first
 145         *
 146         * rm = xm * ym, re = xe + ye basically
 147         *
 148         * At this point xm and ym should have been normalized.
 149         */
 150
 151        /* rm = xm * ym, re = xe+ye basically */
 152        assert(xm & SP_HIDDEN_BIT);
 153        assert(ym & SP_HIDDEN_BIT);
 154
 155        re = xe + ye;
 156
 157        /* Multiple 24 bit xm and ym to give 48 bit results */
 158        rm64 = (uint64_t)xm * ym;
 159
 160        /* Shunt to top of word */
 161        rm64 = rm64 << 16;
 162
 163        /* Put explicit bit at bit 62 if necessary */
 164        if ((int64_t) rm64 < 0) {
 165                rm64 = rm64 >> 1;
 166                re++;
 167        }
 168
 169        assert(rm64 & (1 << 62));
 170
 171        if (zc == IEEE754_CLASS_ZERO) {
 172                /*
 173                 * Move explicit bit from bit 62 to bit 26 since the
 174                 * ieee754sp_format code expects the mantissa to be
 175                 * 27 bits wide (24 + 3 rounding bits).
 176                 */
 177                rm = XSPSRS64(rm64, (62 - 26));
 178                return ieee754sp_format(rs, re, rm);
 179        }
 180
 181        /* Move explicit bit from bit 23 to bit 62 */
 182        zm64 = (uint64_t)zm << (62 - 23);
 183        assert(zm64 & (1 << 62));
 184
 185        /* Make the exponents the same */
 186        if (ze > re) {
 187                /*
 188                 * Have to shift r fraction right to align.
 189                 */
 190                s = ze - re;
 191                rm64 = XSPSRS64(rm64, s);
 192                re += s;
 193        } else if (re > ze) {
 194                /*
 195                 * Have to shift z fraction right to align.
 196                 */
 197                s = re - ze;
 198                zm64 = XSPSRS64(zm64, s);
 199                ze += s;
 200        }
 201        assert(ze == re);
 202        assert(ze <= SP_EMAX);
 203
 204        /* Do the addition */
 205        if (zs == rs) {
 206                /*
 207                 * Generate 64 bit result by adding two 63 bit numbers
 208                 * leaving result in zm64, zs and ze.
 209                 */
 210                zm64 = zm64 + rm64;
 211                if ((int64_t)zm64 < 0) {        /* carry out */
 212                        zm64 = XSPSRS1(zm64);
 213                        ze++;
 214                }
 215        } else {
 216                if (zm64 >= rm64) {
 217                        zm64 = zm64 - rm64;
 218                } else {
 219                        zm64 = rm64 - zm64;
 220                        zs = rs;
 221                }
 222                if (zm64 == 0)
 223                        return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
 224
 225                /*
 226                 * Put explicit bit at bit 62 if necessary.
 227                 */
 228                while ((zm64 >> 62) == 0) {
 229                        zm64 <<= 1;
 230                        ze--;
 231                }
 232        }
 233
 234        /*
 235         * Move explicit bit from bit 62 to bit 26 since the
 236         * ieee754sp_format code expects the mantissa to be
 237         * 27 bits wide (24 + 3 rounding bits).
 238         */
 239        zm = XSPSRS64(zm64, (62 - 26));
 240
 241        return ieee754sp_format(zs, ze, zm);
 242}
 243
 244union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x,
 245                                union ieee754sp y)
 246{
 247        return _sp_maddf(z, x, y, 0);
 248}
 249
 250union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x,
 251                                union ieee754sp y)
 252{
 253        return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
 254}
 255
 256union ieee754sp ieee754sp_madd(union ieee754sp z, union ieee754sp x,
 257                                union ieee754sp y)
 258{
 259        return _sp_maddf(z, x, y, 0);
 260}
 261
 262union ieee754sp ieee754sp_msub(union ieee754sp z, union ieee754sp x,
 263                                union ieee754sp y)
 264{
 265        return _sp_maddf(z, x, y, MADDF_NEGATE_ADDITION);
 266}
 267
 268union ieee754sp ieee754sp_nmadd(union ieee754sp z, union ieee754sp x,
 269                                union ieee754sp y)
 270{
 271        return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT|MADDF_NEGATE_ADDITION);
 272}
 273
 274union ieee754sp ieee754sp_nmsub(union ieee754sp z, union ieee754sp x,
 275                                union ieee754sp y)
 276{
 277        return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
 278}
 279