linux/arch/powerpc/kernel/vecemu.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Routines to emulate some Altivec/VMX instructions, specifically
   4 * those that can trap when given denormalized operands in Java mode.
   5 */
   6#include <linux/kernel.h>
   7#include <linux/errno.h>
   8#include <linux/sched.h>
   9#include <asm/ptrace.h>
  10#include <asm/processor.h>
  11#include <asm/switch_to.h>
  12#include <linux/uaccess.h>
  13
  14/* Functions in vector.S */
  15extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b);
  16extern void vsubfp(vector128 *dst, vector128 *a, vector128 *b);
  17extern void vmaddfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
  18extern void vnmsubfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
  19extern void vrefp(vector128 *dst, vector128 *src);
  20extern void vrsqrtefp(vector128 *dst, vector128 *src);
  21extern void vexptep(vector128 *dst, vector128 *src);
  22
  23static unsigned int exp2s[8] = {
  24        0x800000,
  25        0x8b95c2,
  26        0x9837f0,
  27        0xa5fed7,
  28        0xb504f3,
  29        0xc5672a,
  30        0xd744fd,
  31        0xeac0c7
  32};
  33
  34/*
  35 * Computes an estimate of 2^x.  The `s' argument is the 32-bit
  36 * single-precision floating-point representation of x.
  37 */
  38static unsigned int eexp2(unsigned int s)
  39{
  40        int exp, pwr;
  41        unsigned int mant, frac;
  42
  43        /* extract exponent field from input */
  44        exp = ((s >> 23) & 0xff) - 127;
  45        if (exp > 7) {
  46                /* check for NaN input */
  47                if (exp == 128 && (s & 0x7fffff) != 0)
  48                        return s | 0x400000;    /* return QNaN */
  49                /* 2^-big = 0, 2^+big = +Inf */
  50                return (s & 0x80000000)? 0: 0x7f800000; /* 0 or +Inf */
  51        }
  52        if (exp < -23)
  53                return 0x3f800000;      /* 1.0 */
  54
  55        /* convert to fixed point integer in 9.23 representation */
  56        pwr = (s & 0x7fffff) | 0x800000;
  57        if (exp > 0)
  58                pwr <<= exp;
  59        else
  60                pwr >>= -exp;
  61        if (s & 0x80000000)
  62                pwr = -pwr;
  63
  64        /* extract integer part, which becomes exponent part of result */
  65        exp = (pwr >> 23) + 126;
  66        if (exp >= 254)
  67                return 0x7f800000;
  68        if (exp < -23)
  69                return 0;
  70
  71        /* table lookup on top 3 bits of fraction to get mantissa */
  72        mant = exp2s[(pwr >> 20) & 7];
  73
  74        /* linear interpolation using remaining 20 bits of fraction */
  75        asm("mulhwu %0,%1,%2" : "=r" (frac)
  76            : "r" (pwr << 12), "r" (0x172b83ff));
  77        asm("mulhwu %0,%1,%2" : "=r" (frac) : "r" (frac), "r" (mant));
  78        mant += frac;
  79
  80        if (exp >= 0)
  81                return mant + (exp << 23);
  82
  83        /* denormalized result */
  84        exp = -exp;
  85        mant += 1 << (exp - 1);
  86        return mant >> exp;
  87}
  88
  89/*
  90 * Computes an estimate of log_2(x).  The `s' argument is the 32-bit
  91 * single-precision floating-point representation of x.
  92 */
  93static unsigned int elog2(unsigned int s)
  94{
  95        int exp, mant, lz, frac;
  96
  97        exp = s & 0x7f800000;
  98        mant = s & 0x7fffff;
  99        if (exp == 0x7f800000) {        /* Inf or NaN */
 100                if (mant != 0)
 101                        s |= 0x400000;  /* turn NaN into QNaN */
 102                return s;
 103        }
 104        if ((exp | mant) == 0)          /* +0 or -0 */
 105                return 0xff800000;      /* return -Inf */
 106
 107        if (exp == 0) {
 108                /* denormalized */
 109                asm("cntlzw %0,%1" : "=r" (lz) : "r" (mant));
 110                mant <<= lz - 8;
 111                exp = (-118 - lz) << 23;
 112        } else {
 113                mant |= 0x800000;
 114                exp -= 127 << 23;
 115        }
 116
 117        if (mant >= 0xb504f3) {                         /* 2^0.5 * 2^23 */
 118                exp |= 0x400000;                        /* 0.5 * 2^23 */
 119                asm("mulhwu %0,%1,%2" : "=r" (mant)
 120                    : "r" (mant), "r" (0xb504f334));    /* 2^-0.5 * 2^32 */
 121        }
 122        if (mant >= 0x9837f0) {                         /* 2^0.25 * 2^23 */
 123                exp |= 0x200000;                        /* 0.25 * 2^23 */
 124                asm("mulhwu %0,%1,%2" : "=r" (mant)
 125                    : "r" (mant), "r" (0xd744fccb));    /* 2^-0.25 * 2^32 */
 126        }
 127        if (mant >= 0x8b95c2) {                         /* 2^0.125 * 2^23 */
 128                exp |= 0x100000;                        /* 0.125 * 2^23 */
 129                asm("mulhwu %0,%1,%2" : "=r" (mant)
 130                    : "r" (mant), "r" (0xeac0c6e8));    /* 2^-0.125 * 2^32 */
 131        }
 132        if (mant > 0x800000) {                          /* 1.0 * 2^23 */
 133                /* calculate (mant - 1) * 1.381097463 */
 134                /* 1.381097463 == 0.125 / (2^0.125 - 1) */
 135                asm("mulhwu %0,%1,%2" : "=r" (frac)
 136                    : "r" ((mant - 0x800000) << 1), "r" (0xb0c7cd3a));
 137                exp += frac;
 138        }
 139        s = exp & 0x80000000;
 140        if (exp != 0) {
 141                if (s)
 142                        exp = -exp;
 143                asm("cntlzw %0,%1" : "=r" (lz) : "r" (exp));
 144                lz = 8 - lz;
 145                if (lz > 0)
 146                        exp >>= lz;
 147                else if (lz < 0)
 148                        exp <<= -lz;
 149                s += ((lz + 126) << 23) + exp;
 150        }
 151        return s;
 152}
 153
 154#define VSCR_SAT        1
 155
 156static int ctsxs(unsigned int x, int scale, unsigned int *vscrp)
 157{
 158        int exp, mant;
 159
 160        exp = (x >> 23) & 0xff;
 161        mant = x & 0x7fffff;
 162        if (exp == 255 && mant != 0)
 163                return 0;               /* NaN -> 0 */
 164        exp = exp - 127 + scale;
 165        if (exp < 0)
 166                return 0;               /* round towards zero */
 167        if (exp >= 31) {
 168                /* saturate, unless the result would be -2^31 */
 169                if (x + (scale << 23) != 0xcf000000)
 170                        *vscrp |= VSCR_SAT;
 171                return (x & 0x80000000)? 0x80000000: 0x7fffffff;
 172        }
 173        mant |= 0x800000;
 174        mant = (mant << 7) >> (30 - exp);
 175        return (x & 0x80000000)? -mant: mant;
 176}
 177
 178static unsigned int ctuxs(unsigned int x, int scale, unsigned int *vscrp)
 179{
 180        int exp;
 181        unsigned int mant;
 182
 183        exp = (x >> 23) & 0xff;
 184        mant = x & 0x7fffff;
 185        if (exp == 255 && mant != 0)
 186                return 0;               /* NaN -> 0 */
 187        exp = exp - 127 + scale;
 188        if (exp < 0)
 189                return 0;               /* round towards zero */
 190        if (x & 0x80000000) {
 191                /* negative => saturate to 0 */
 192                *vscrp |= VSCR_SAT;
 193                return 0;
 194        }
 195        if (exp >= 32) {
 196                /* saturate */
 197                *vscrp |= VSCR_SAT;
 198                return 0xffffffff;
 199        }
 200        mant |= 0x800000;
 201        mant = (mant << 8) >> (31 - exp);
 202        return mant;
 203}
 204
 205/* Round to floating integer, towards 0 */
 206static unsigned int rfiz(unsigned int x)
 207{
 208        int exp;
 209
 210        exp = ((x >> 23) & 0xff) - 127;
 211        if (exp == 128 && (x & 0x7fffff) != 0)
 212                return x | 0x400000;    /* NaN -> make it a QNaN */
 213        if (exp >= 23)
 214                return x;               /* it's an integer already (or Inf) */
 215        if (exp < 0)
 216                return x & 0x80000000;  /* |x| < 1.0 rounds to 0 */
 217        return x & ~(0x7fffff >> exp);
 218}
 219
 220/* Round to floating integer, towards +/- Inf */
 221static unsigned int rfii(unsigned int x)
 222{
 223        int exp, mask;
 224
 225        exp = ((x >> 23) & 0xff) - 127;
 226        if (exp == 128 && (x & 0x7fffff) != 0)
 227                return x | 0x400000;    /* NaN -> make it a QNaN */
 228        if (exp >= 23)
 229                return x;               /* it's an integer already (or Inf) */
 230        if ((x & 0x7fffffff) == 0)
 231                return x;               /* +/-0 -> +/-0 */
 232        if (exp < 0)
 233                /* 0 < |x| < 1.0 rounds to +/- 1.0 */
 234                return (x & 0x80000000) | 0x3f800000;
 235        mask = 0x7fffff >> exp;
 236        /* mantissa overflows into exponent - that's OK,
 237           it can't overflow into the sign bit */
 238        return (x + mask) & ~mask;
 239}
 240
 241/* Round to floating integer, to nearest */
 242static unsigned int rfin(unsigned int x)
 243{
 244        int exp, half;
 245
 246        exp = ((x >> 23) & 0xff) - 127;
 247        if (exp == 128 && (x & 0x7fffff) != 0)
 248                return x | 0x400000;    /* NaN -> make it a QNaN */
 249        if (exp >= 23)
 250                return x;               /* it's an integer already (or Inf) */
 251        if (exp < -1)
 252                return x & 0x80000000;  /* |x| < 0.5 -> +/-0 */
 253        if (exp == -1)
 254                /* 0.5 <= |x| < 1.0 rounds to +/- 1.0 */
 255                return (x & 0x80000000) | 0x3f800000;
 256        half = 0x400000 >> exp;
 257        /* add 0.5 to the magnitude and chop off the fraction bits */
 258        return (x + half) & ~(0x7fffff >> exp);
 259}
 260
 261int emulate_altivec(struct pt_regs *regs)
 262{
 263        unsigned int instr, i;
 264        unsigned int va, vb, vc, vd;
 265        vector128 *vrs;
 266
 267        if (get_user(instr, (unsigned int __user *) regs->nip))
 268                return -EFAULT;
 269        if ((instr >> 26) != 4)
 270                return -EINVAL;         /* not an altivec instruction */
 271        vd = (instr >> 21) & 0x1f;
 272        va = (instr >> 16) & 0x1f;
 273        vb = (instr >> 11) & 0x1f;
 274        vc = (instr >> 6) & 0x1f;
 275
 276        vrs = current->thread.vr_state.vr;
 277        switch (instr & 0x3f) {
 278        case 10:
 279                switch (vc) {
 280                case 0: /* vaddfp */
 281                        vaddfp(&vrs[vd], &vrs[va], &vrs[vb]);
 282                        break;
 283                case 1: /* vsubfp */
 284                        vsubfp(&vrs[vd], &vrs[va], &vrs[vb]);
 285                        break;
 286                case 4: /* vrefp */
 287                        vrefp(&vrs[vd], &vrs[vb]);
 288                        break;
 289                case 5: /* vrsqrtefp */
 290                        vrsqrtefp(&vrs[vd], &vrs[vb]);
 291                        break;
 292                case 6: /* vexptefp */
 293                        for (i = 0; i < 4; ++i)
 294                                vrs[vd].u[i] = eexp2(vrs[vb].u[i]);
 295                        break;
 296                case 7: /* vlogefp */
 297                        for (i = 0; i < 4; ++i)
 298                                vrs[vd].u[i] = elog2(vrs[vb].u[i]);
 299                        break;
 300                case 8:         /* vrfin */
 301                        for (i = 0; i < 4; ++i)
 302                                vrs[vd].u[i] = rfin(vrs[vb].u[i]);
 303                        break;
 304                case 9:         /* vrfiz */
 305                        for (i = 0; i < 4; ++i)
 306                                vrs[vd].u[i] = rfiz(vrs[vb].u[i]);
 307                        break;
 308                case 10:        /* vrfip */
 309                        for (i = 0; i < 4; ++i) {
 310                                u32 x = vrs[vb].u[i];
 311                                x = (x & 0x80000000)? rfiz(x): rfii(x);
 312                                vrs[vd].u[i] = x;
 313                        }
 314                        break;
 315                case 11:        /* vrfim */
 316                        for (i = 0; i < 4; ++i) {
 317                                u32 x = vrs[vb].u[i];
 318                                x = (x & 0x80000000)? rfii(x): rfiz(x);
 319                                vrs[vd].u[i] = x;
 320                        }
 321                        break;
 322                case 14:        /* vctuxs */
 323                        for (i = 0; i < 4; ++i)
 324                                vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va,
 325                                        &current->thread.vr_state.vscr.u[3]);
 326                        break;
 327                case 15:        /* vctsxs */
 328                        for (i = 0; i < 4; ++i)
 329                                vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va,
 330                                        &current->thread.vr_state.vscr.u[3]);
 331                        break;
 332                default:
 333                        return -EINVAL;
 334                }
 335                break;
 336        case 46:        /* vmaddfp */
 337                vmaddfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
 338                break;
 339        case 47:        /* vnmsubfp */
 340                vnmsubfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
 341                break;
 342        default:
 343                return -EINVAL;
 344        }
 345
 346        return 0;
 347}
 348