linux/arch/mips/math-emu/sp_maddf.c
<<
>>
Prefs
   1/*
   2 * IEEE754 floating point arithmetic
   3 * single precision: MADDF.f (Fused Multiply Add)
   4 * MADDF.fmt: FPR[fd] = FPR[fd] + (FPR[fs] x FPR[ft])
   5 *
   6 * MIPS floating point support
   7 * Copyright (C) 2015 Imagination Technologies, Ltd.
   8 * Author: Markos Chandras <markos.chandras@imgtec.com>
   9 *
  10 *  This program is free software; you can distribute it and/or modify it
  11 *  under the terms of the GNU General Public License as published by the
  12 *  Free Software Foundation; version 2 of the License.
  13 */
  14
  15#include "ieee754sp.h"
  16
  17enum maddf_flags {
  18        maddf_negate_product    = 1 << 0,
  19};
  20
  21static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
  22                                 union ieee754sp y, enum maddf_flags flags)
  23{
  24        int re;
  25        int rs;
  26        unsigned rm;
  27        unsigned short lxm;
  28        unsigned short hxm;
  29        unsigned short lym;
  30        unsigned short hym;
  31        unsigned lrm;
  32        unsigned hrm;
  33        unsigned t;
  34        unsigned at;
  35        int s;
  36
  37        COMPXSP;
  38        COMPYSP;
  39        COMPZSP;
  40
  41        EXPLODEXSP;
  42        EXPLODEYSP;
  43        EXPLODEZSP;
  44
  45        FLUSHXSP;
  46        FLUSHYSP;
  47        FLUSHZSP;
  48
  49        ieee754_clearcx();
  50
  51        switch (zc) {
  52        case IEEE754_CLASS_SNAN:
  53                ieee754_setcx(IEEE754_INVALID_OPERATION);
  54                return ieee754sp_nanxcpt(z);
  55        case IEEE754_CLASS_DNORM:
  56                SPDNORMZ;
  57        /* QNAN is handled separately below */
  58        }
  59
  60        switch (CLPAIR(xc, yc)) {
  61        case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN):
  62        case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN):
  63        case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN):
  64        case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN):
  65        case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN):
  66                return ieee754sp_nanxcpt(y);
  67
  68        case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN):
  69        case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN):
  70        case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO):
  71        case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM):
  72        case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM):
  73        case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
  74                return ieee754sp_nanxcpt(x);
  75
  76        case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
  77        case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
  78        case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
  79        case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
  80                return y;
  81
  82        case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
  83        case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
  84        case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
  85        case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
  86        case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF):
  87                return x;
  88
  89        /*
  90         * Infinity handling
  91         */
  92        case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
  93        case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
  94                if (zc == IEEE754_CLASS_QNAN)
  95                        return z;
  96                ieee754_setcx(IEEE754_INVALID_OPERATION);
  97                return ieee754sp_indef();
  98
  99        case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
 100        case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
 101        case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
 102        case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
 103        case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
 104                if (zc == IEEE754_CLASS_QNAN)
 105                        return z;
 106                return ieee754sp_inf(xs ^ ys);
 107
 108        case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
 109        case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
 110        case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
 111        case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
 112        case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
 113                if (zc == IEEE754_CLASS_INF)
 114                        return ieee754sp_inf(zs);
 115                /* Multiplication is 0 so just return z */
 116                return z;
 117
 118        case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
 119                SPDNORMX;
 120
 121        case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
 122                if (zc == IEEE754_CLASS_QNAN)
 123                        return z;
 124                else if (zc == IEEE754_CLASS_INF)
 125                        return ieee754sp_inf(zs);
 126                SPDNORMY;
 127                break;
 128
 129        case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
 130                if (zc == IEEE754_CLASS_QNAN)
 131                        return z;
 132                else if (zc == IEEE754_CLASS_INF)
 133                        return ieee754sp_inf(zs);
 134                SPDNORMX;
 135                break;
 136
 137        case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
 138                if (zc == IEEE754_CLASS_QNAN)
 139                        return z;
 140                else if (zc == IEEE754_CLASS_INF)
 141                        return ieee754sp_inf(zs);
 142                /* fall through to real computations */
 143        }
 144
 145        /* Finally get to do some computation */
 146
 147        /*
 148         * Do the multiplication bit first
 149         *
 150         * rm = xm * ym, re = xe + ye basically
 151         *
 152         * At this point xm and ym should have been normalized.
 153         */
 154
 155        /* rm = xm * ym, re = xe+ye basically */
 156        assert(xm & SP_HIDDEN_BIT);
 157        assert(ym & SP_HIDDEN_BIT);
 158
 159        re = xe + ye;
 160        rs = xs ^ ys;
 161        if (flags & maddf_negate_product)
 162                rs ^= 1;
 163
 164        /* shunt to top of word */
 165        xm <<= 32 - (SP_FBITS + 1);
 166        ym <<= 32 - (SP_FBITS + 1);
 167
 168        /*
 169         * Multiply 32 bits xm, ym to give high 32 bits rm with stickness.
 170         */
 171        lxm = xm & 0xffff;
 172        hxm = xm >> 16;
 173        lym = ym & 0xffff;
 174        hym = ym >> 16;
 175
 176        lrm = lxm * lym;        /* 16 * 16 => 32 */
 177        hrm = hxm * hym;        /* 16 * 16 => 32 */
 178
 179        t = lxm * hym; /* 16 * 16 => 32 */
 180        at = lrm + (t << 16);
 181        hrm += at < lrm;
 182        lrm = at;
 183        hrm = hrm + (t >> 16);
 184
 185        t = hxm * lym; /* 16 * 16 => 32 */
 186        at = lrm + (t << 16);
 187        hrm += at < lrm;
 188        lrm = at;
 189        hrm = hrm + (t >> 16);
 190
 191        rm = hrm | (lrm != 0);
 192
 193        /*
 194         * Sticky shift down to normal rounding precision.
 195         */
 196        if ((int) rm < 0) {
 197                rm = (rm >> (32 - (SP_FBITS + 1 + 3))) |
 198                    ((rm << (SP_FBITS + 1 + 3)) != 0);
 199                re++;
 200        } else {
 201                rm = (rm >> (32 - (SP_FBITS + 1 + 3 + 1))) |
 202                     ((rm << (SP_FBITS + 1 + 3 + 1)) != 0);
 203        }
 204        assert(rm & (SP_HIDDEN_BIT << 3));
 205
 206        /* And now the addition */
 207
 208        assert(zm & SP_HIDDEN_BIT);
 209
 210        /*
 211         * Provide guard,round and stick bit space.
 212         */
 213        zm <<= 3;
 214
 215        if (ze > re) {
 216                /*
 217                 * Have to shift r fraction right to align.
 218                 */
 219                s = ze - re;
 220                rm = XSPSRS(rm, s);
 221                re += s;
 222        } else if (re > ze) {
 223                /*
 224                 * Have to shift z fraction right to align.
 225                 */
 226                s = re - ze;
 227                zm = XSPSRS(zm, s);
 228                ze += s;
 229        }
 230        assert(ze == re);
 231        assert(ze <= SP_EMAX);
 232
 233        if (zs == rs) {
 234                /*
 235                 * Generate 28 bit result of adding two 27 bit numbers
 236                 * leaving result in zm, zs and ze.
 237                 */
 238                zm = zm + rm;
 239
 240                if (zm >> (SP_FBITS + 1 + 3)) { /* carry out */
 241                        zm = XSPSRS1(zm);
 242                        ze++;
 243                }
 244        } else {
 245                if (zm >= rm) {
 246                        zm = zm - rm;
 247                } else {
 248                        zm = rm - zm;
 249                        zs = rs;
 250                }
 251                if (zm == 0)
 252                        return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
 253
 254                /*
 255                 * Normalize in extended single precision
 256                 */
 257                while ((zm >> (SP_MBITS + 3)) == 0) {
 258                        zm <<= 1;
 259                        ze--;
 260                }
 261
 262        }
 263        return ieee754sp_format(zs, ze, zm);
 264}
 265
 266union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x,
 267                                union ieee754sp y)
 268{
 269        return _sp_maddf(z, x, y, 0);
 270}
 271
 272union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x,
 273                                union ieee754sp y)
 274{
 275        return _sp_maddf(z, x, y, maddf_negate_product);
 276}
 277