linux/arch/powerpc/kernel/vecemu.c
<<
>>
Prefs
   1/*
   2 * Routines to emulate some Altivec/VMX instructions, specifically
   3 * those that can trap when given denormalized operands in Java mode.
   4 */
   5#include <linux/kernel.h>
   6#include <linux/errno.h>
   7#include <linux/sched.h>
   8#include <asm/ptrace.h>
   9#include <asm/processor.h>
  10#include <asm/uaccess.h>
  11
  12/* Functions in vector.S */
  13extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b);
  14extern void vsubfp(vector128 *dst, vector128 *a, vector128 *b);
  15extern void vmaddfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
  16extern void vnmsubfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
  17extern void vrefp(vector128 *dst, vector128 *src);
  18extern void vrsqrtefp(vector128 *dst, vector128 *src);
  19extern void vexptep(vector128 *dst, vector128 *src);
  20
  21static unsigned int exp2s[8] = {
  22        0x800000,
  23        0x8b95c2,
  24        0x9837f0,
  25        0xa5fed7,
  26        0xb504f3,
  27        0xc5672a,
  28        0xd744fd,
  29        0xeac0c7
  30};
  31
  32/*
  33 * Computes an estimate of 2^x.  The `s' argument is the 32-bit
  34 * single-precision floating-point representation of x.
  35 */
  36static unsigned int eexp2(unsigned int s)
  37{
  38        int exp, pwr;
  39        unsigned int mant, frac;
  40
  41        /* extract exponent field from input */
  42        exp = ((s >> 23) & 0xff) - 127;
  43        if (exp > 7) {
  44                /* check for NaN input */
  45                if (exp == 128 && (s & 0x7fffff) != 0)
  46                        return s | 0x400000;    /* return QNaN */
  47                /* 2^-big = 0, 2^+big = +Inf */
  48                return (s & 0x80000000)? 0: 0x7f800000; /* 0 or +Inf */
  49        }
  50        if (exp < -23)
  51                return 0x3f800000;      /* 1.0 */
  52
  53        /* convert to fixed point integer in 9.23 representation */
  54        pwr = (s & 0x7fffff) | 0x800000;
  55        if (exp > 0)
  56                pwr <<= exp;
  57        else
  58                pwr >>= -exp;
  59        if (s & 0x80000000)
  60                pwr = -pwr;
  61
  62        /* extract integer part, which becomes exponent part of result */
  63        exp = (pwr >> 23) + 126;
  64        if (exp >= 254)
  65                return 0x7f800000;
  66        if (exp < -23)
  67                return 0;
  68
  69        /* table lookup on top 3 bits of fraction to get mantissa */
  70        mant = exp2s[(pwr >> 20) & 7];
  71
  72        /* linear interpolation using remaining 20 bits of fraction */
  73        asm("mulhwu %0,%1,%2" : "=r" (frac)
  74            : "r" (pwr << 12), "r" (0x172b83ff));
  75        asm("mulhwu %0,%1,%2" : "=r" (frac) : "r" (frac), "r" (mant));
  76        mant += frac;
  77
  78        if (exp >= 0)
  79                return mant + (exp << 23);
  80
  81        /* denormalized result */
  82        exp = -exp;
  83        mant += 1 << (exp - 1);
  84        return mant >> exp;
  85}
  86
  87/*
  88 * Computes an estimate of log_2(x).  The `s' argument is the 32-bit
  89 * single-precision floating-point representation of x.
  90 */
  91static unsigned int elog2(unsigned int s)
  92{
  93        int exp, mant, lz, frac;
  94
  95        exp = s & 0x7f800000;
  96        mant = s & 0x7fffff;
  97        if (exp == 0x7f800000) {        /* Inf or NaN */
  98                if (mant != 0)
  99                        s |= 0x400000;  /* turn NaN into QNaN */
 100                return s;
 101        }
 102        if ((exp | mant) == 0)          /* +0 or -0 */
 103                return 0xff800000;      /* return -Inf */
 104
 105        if (exp == 0) {
 106                /* denormalized */
 107                asm("cntlzw %0,%1" : "=r" (lz) : "r" (mant));
 108                mant <<= lz - 8;
 109                exp = (-118 - lz) << 23;
 110        } else {
 111                mant |= 0x800000;
 112                exp -= 127 << 23;
 113        }
 114
 115        if (mant >= 0xb504f3) {                         /* 2^0.5 * 2^23 */
 116                exp |= 0x400000;                        /* 0.5 * 2^23 */
 117                asm("mulhwu %0,%1,%2" : "=r" (mant)
 118                    : "r" (mant), "r" (0xb504f334));    /* 2^-0.5 * 2^32 */
 119        }
 120        if (mant >= 0x9837f0) {                         /* 2^0.25 * 2^23 */
 121                exp |= 0x200000;                        /* 0.25 * 2^23 */
 122                asm("mulhwu %0,%1,%2" : "=r" (mant)
 123                    : "r" (mant), "r" (0xd744fccb));    /* 2^-0.25 * 2^32 */
 124        }
 125        if (mant >= 0x8b95c2) {                         /* 2^0.125 * 2^23 */
 126                exp |= 0x100000;                        /* 0.125 * 2^23 */
 127                asm("mulhwu %0,%1,%2" : "=r" (mant)
 128                    : "r" (mant), "r" (0xeac0c6e8));    /* 2^-0.125 * 2^32 */
 129        }
 130        if (mant > 0x800000) {                          /* 1.0 * 2^23 */
 131                /* calculate (mant - 1) * 1.381097463 */
 132                /* 1.381097463 == 0.125 / (2^0.125 - 1) */
 133                asm("mulhwu %0,%1,%2" : "=r" (frac)
 134                    : "r" ((mant - 0x800000) << 1), "r" (0xb0c7cd3a));
 135                exp += frac;
 136        }
 137        s = exp & 0x80000000;
 138        if (exp != 0) {
 139                if (s)
 140                        exp = -exp;
 141                asm("cntlzw %0,%1" : "=r" (lz) : "r" (exp));
 142                lz = 8 - lz;
 143                if (lz > 0)
 144                        exp >>= lz;
 145                else if (lz < 0)
 146                        exp <<= -lz;
 147                s += ((lz + 126) << 23) + exp;
 148        }
 149        return s;
 150}
 151
 152#define VSCR_SAT        1
 153
 154static int ctsxs(unsigned int x, int scale, unsigned int *vscrp)
 155{
 156        int exp, mant;
 157
 158        exp = (x >> 23) & 0xff;
 159        mant = x & 0x7fffff;
 160        if (exp == 255 && mant != 0)
 161                return 0;               /* NaN -> 0 */
 162        exp = exp - 127 + scale;
 163        if (exp < 0)
 164                return 0;               /* round towards zero */
 165        if (exp >= 31) {
 166                /* saturate, unless the result would be -2^31 */
 167                if (x + (scale << 23) != 0xcf000000)
 168                        *vscrp |= VSCR_SAT;
 169                return (x & 0x80000000)? 0x80000000: 0x7fffffff;
 170        }
 171        mant |= 0x800000;
 172        mant = (mant << 7) >> (30 - exp);
 173        return (x & 0x80000000)? -mant: mant;
 174}
 175
 176static unsigned int ctuxs(unsigned int x, int scale, unsigned int *vscrp)
 177{
 178        int exp;
 179        unsigned int mant;
 180
 181        exp = (x >> 23) & 0xff;
 182        mant = x & 0x7fffff;
 183        if (exp == 255 && mant != 0)
 184                return 0;               /* NaN -> 0 */
 185        exp = exp - 127 + scale;
 186        if (exp < 0)
 187                return 0;               /* round towards zero */
 188        if (x & 0x80000000) {
 189                /* negative => saturate to 0 */
 190                *vscrp |= VSCR_SAT;
 191                return 0;
 192        }
 193        if (exp >= 32) {
 194                /* saturate */
 195                *vscrp |= VSCR_SAT;
 196                return 0xffffffff;
 197        }
 198        mant |= 0x800000;
 199        mant = (mant << 8) >> (31 - exp);
 200        return mant;
 201}
 202
 203/* Round to floating integer, towards 0 */
 204static unsigned int rfiz(unsigned int x)
 205{
 206        int exp;
 207
 208        exp = ((x >> 23) & 0xff) - 127;
 209        if (exp == 128 && (x & 0x7fffff) != 0)
 210                return x | 0x400000;    /* NaN -> make it a QNaN */
 211        if (exp >= 23)
 212                return x;               /* it's an integer already (or Inf) */
 213        if (exp < 0)
 214                return x & 0x80000000;  /* |x| < 1.0 rounds to 0 */
 215        return x & ~(0x7fffff >> exp);
 216}
 217
 218/* Round to floating integer, towards +/- Inf */
 219static unsigned int rfii(unsigned int x)
 220{
 221        int exp, mask;
 222
 223        exp = ((x >> 23) & 0xff) - 127;
 224        if (exp == 128 && (x & 0x7fffff) != 0)
 225                return x | 0x400000;    /* NaN -> make it a QNaN */
 226        if (exp >= 23)
 227                return x;               /* it's an integer already (or Inf) */
 228        if ((x & 0x7fffffff) == 0)
 229                return x;               /* +/-0 -> +/-0 */
 230        if (exp < 0)
 231                /* 0 < |x| < 1.0 rounds to +/- 1.0 */
 232                return (x & 0x80000000) | 0x3f800000;
 233        mask = 0x7fffff >> exp;
 234        /* mantissa overflows into exponent - that's OK,
 235           it can't overflow into the sign bit */
 236        return (x + mask) & ~mask;
 237}
 238
 239/* Round to floating integer, to nearest */
 240static unsigned int rfin(unsigned int x)
 241{
 242        int exp, half;
 243
 244        exp = ((x >> 23) & 0xff) - 127;
 245        if (exp == 128 && (x & 0x7fffff) != 0)
 246                return x | 0x400000;    /* NaN -> make it a QNaN */
 247        if (exp >= 23)
 248                return x;               /* it's an integer already (or Inf) */
 249        if (exp < -1)
 250                return x & 0x80000000;  /* |x| < 0.5 -> +/-0 */
 251        if (exp == -1)
 252                /* 0.5 <= |x| < 1.0 rounds to +/- 1.0 */
 253                return (x & 0x80000000) | 0x3f800000;
 254        half = 0x400000 >> exp;
 255        /* add 0.5 to the magnitude and chop off the fraction bits */
 256        return (x + half) & ~(0x7fffff >> exp);
 257}
 258
 259int emulate_altivec(struct pt_regs *regs)
 260{
 261        unsigned int instr, i;
 262        unsigned int va, vb, vc, vd;
 263        vector128 *vrs;
 264
 265        if (get_user(instr, (unsigned int __user *) regs->nip))
 266                return -EFAULT;
 267        if ((instr >> 26) != 4)
 268                return -EINVAL;         /* not an altivec instruction */
 269        vd = (instr >> 21) & 0x1f;
 270        va = (instr >> 16) & 0x1f;
 271        vb = (instr >> 11) & 0x1f;
 272        vc = (instr >> 6) & 0x1f;
 273
 274        vrs = current->thread.vr;
 275        switch (instr & 0x3f) {
 276        case 10:
 277                switch (vc) {
 278                case 0: /* vaddfp */
 279                        vaddfp(&vrs[vd], &vrs[va], &vrs[vb]);
 280                        break;
 281                case 1: /* vsubfp */
 282                        vsubfp(&vrs[vd], &vrs[va], &vrs[vb]);
 283                        break;
 284                case 4: /* vrefp */
 285                        vrefp(&vrs[vd], &vrs[vb]);
 286                        break;
 287                case 5: /* vrsqrtefp */
 288                        vrsqrtefp(&vrs[vd], &vrs[vb]);
 289                        break;
 290                case 6: /* vexptefp */
 291                        for (i = 0; i < 4; ++i)
 292                                vrs[vd].u[i] = eexp2(vrs[vb].u[i]);
 293                        break;
 294                case 7: /* vlogefp */
 295                        for (i = 0; i < 4; ++i)
 296                                vrs[vd].u[i] = elog2(vrs[vb].u[i]);
 297                        break;
 298                case 8:         /* vrfin */
 299                        for (i = 0; i < 4; ++i)
 300                                vrs[vd].u[i] = rfin(vrs[vb].u[i]);
 301                        break;
 302                case 9:         /* vrfiz */
 303                        for (i = 0; i < 4; ++i)
 304                                vrs[vd].u[i] = rfiz(vrs[vb].u[i]);
 305                        break;
 306                case 10:        /* vrfip */
 307                        for (i = 0; i < 4; ++i) {
 308                                u32 x = vrs[vb].u[i];
 309                                x = (x & 0x80000000)? rfiz(x): rfii(x);
 310                                vrs[vd].u[i] = x;
 311                        }
 312                        break;
 313                case 11:        /* vrfim */
 314                        for (i = 0; i < 4; ++i) {
 315                                u32 x = vrs[vb].u[i];
 316                                x = (x & 0x80000000)? rfii(x): rfiz(x);
 317                                vrs[vd].u[i] = x;
 318                        }
 319                        break;
 320                case 14:        /* vctuxs */
 321                        for (i = 0; i < 4; ++i)
 322                                vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va,
 323                                                &current->thread.vscr.u[3]);
 324                        break;
 325                case 15:        /* vctsxs */
 326                        for (i = 0; i < 4; ++i)
 327                                vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va,
 328                                                &current->thread.vscr.u[3]);
 329                        break;
 330                default:
 331                        return -EINVAL;
 332                }
 333                break;
 334        case 46:        /* vmaddfp */
 335                vmaddfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
 336                break;
 337        case 47:        /* vnmsubfp */
 338                vnmsubfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
 339                break;
 340        default:
 341                return -EINVAL;
 342        }
 343
 344        return 0;
 345}
 346