LXR linux/arch/powerpc/kernel/vecemu.c

   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Routines to emulate some Altivec/VMX instructions, specifically
   4 * those that can trap when given denormalized operands in Java mode.
   5 */
   6#include <linux/kernel.h>
   7#include <linux/errno.h>
   8#include <linux/sched.h>
   9#include <asm/ptrace.h>
  10#include <asm/processor.h>
  11#include <linux/uaccess.h>
  12
  13/* Functions in vector.S */
  14extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b);
  15extern void vsubfp(vector128 *dst, vector128 *a, vector128 *b);
  16extern void vmaddfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
  17extern void vnmsubfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
  18extern void vrefp(vector128 *dst, vector128 *src);
  19extern void vrsqrtefp(vector128 *dst, vector128 *src);
  20extern void vexptep(vector128 *dst, vector128 *src);
  21
  22static unsigned int exp2s[8] = {
  23        0x800000,
  24        0x8b95c2,
  25        0x9837f0,
  26        0xa5fed7,
  27        0xb504f3,
  28        0xc5672a,
  29        0xd744fd,
  30        0xeac0c7
  31};
  32
  33/*
  34 * Computes an estimate of 2^x.  The `s' argument is the 32-bit
  35 * single-precision floating-point representation of x.
  36 */
  37static unsigned int eexp2(unsigned int s)
  38{
  39        int exp, pwr;
  40        unsigned int mant, frac;
  41
  42        /* extract exponent field from input */
  43        exp = ((s >> 23) & 0xff) - 127;
  44        if (exp > 7) {
  45                /* check for NaN input */
  46                if (exp == 128 && (s & 0x7fffff) != 0)
  47                        return s | 0x400000;    /* return QNaN */
  48                /* 2^-big = 0, 2^+big = +Inf */
  49                return (s & 0x80000000)? 0: 0x7f800000; /* 0 or +Inf */
  50        }
  51        if (exp < -23)
  52                return 0x3f800000;      /* 1.0 */
  53
  54        /* convert to fixed point integer in 9.23 representation */
  55        pwr = (s & 0x7fffff) | 0x800000;
  56        if (exp > 0)
  57                pwr <<= exp;
  58        else
  59                pwr >>= -exp;
  60        if (s & 0x80000000)
  61                pwr = -pwr;
  62
  63        /* extract integer part, which becomes exponent part of result */
  64        exp = (pwr >> 23) + 126;
  65        if (exp >= 254)
  66                return 0x7f800000;
  67        if (exp < -23)
  68                return 0;
  69
  70        /* table lookup on top 3 bits of fraction to get mantissa */
  71        mant = exp2s[(pwr >> 20) & 7];
  72
  73        /* linear interpolation using remaining 20 bits of fraction */
  74        asm("mulhwu %0,%1,%2" : "=r" (frac)
  75            : "r" (pwr << 12), "r" (0x172b83ff));
  76        asm("mulhwu %0,%1,%2" : "=r" (frac) : "r" (frac), "r" (mant));
  77        mant += frac;
  78
  79        if (exp >= 0)
  80                return mant + (exp << 23);
  81
  82        /* denormalized result */
  83        exp = -exp;
  84        mant += 1 << (exp - 1);
  85        return mant >> exp;
  86}
  87
  88/*
  89 * Computes an estimate of log_2(x).  The `s' argument is the 32-bit
  90 * single-precision floating-point representation of x.
  91 */
  92static unsigned int elog2(unsigned int s)
  93{
  94        int exp, mant, lz, frac;
  95
  96        exp = s & 0x7f800000;
  97        mant = s & 0x7fffff;
  98        if (exp == 0x7f800000) {        /* Inf or NaN */
  99                if (mant != 0)
 100                        s |= 0x400000;  /* turn NaN into QNaN */
 101                return s;
 102        }
 103        if ((exp | mant) == 0)          /* +0 or -0 */
 104                return 0xff800000;      /* return -Inf */
 105
 106        if (exp == 0) {
 107                /* denormalized */
 108                asm("cntlzw %0,%1" : "=r" (lz) : "r" (mant));
 109                mant <<= lz - 8;
 110                exp = (-118 - lz) << 23;
 111        } else {
 112                mant |= 0x800000;
 113                exp -= 127 << 23;
 114        }
 115
 116        if (mant >= 0xb504f3) {                         /* 2^0.5 * 2^23 */
 117                exp |= 0x400000;                        /* 0.5 * 2^23 */
 118                asm("mulhwu %0,%1,%2" : "=r" (mant)
 119                    : "r" (mant), "r" (0xb504f334));    /* 2^-0.5 * 2^32 */
 120        }
 121        if (mant >= 0x9837f0) {                         /* 2^0.25 * 2^23 */
 122                exp |= 0x200000;                        /* 0.25 * 2^23 */
 123                asm("mulhwu %0,%1,%2" : "=r" (mant)
 124                    : "r" (mant), "r" (0xd744fccb));    /* 2^-0.25 * 2^32 */
 125        }
 126        if (mant >= 0x8b95c2) {                         /* 2^0.125 * 2^23 */
 127                exp |= 0x100000;                        /* 0.125 * 2^23 */
 128                asm("mulhwu %0,%1,%2" : "=r" (mant)
 129                    : "r" (mant), "r" (0xeac0c6e8));    /* 2^-0.125 * 2^32 */
 130        }
 131        if (mant > 0x800000) {                          /* 1.0 * 2^23 */
 132                /* calculate (mant - 1) * 1.381097463 */
 133                /* 1.381097463 == 0.125 / (2^0.125 - 1) */
 134                asm("mulhwu %0,%1,%2" : "=r" (frac)
 135                    : "r" ((mant - 0x800000) << 1), "r" (0xb0c7cd3a));
 136                exp += frac;
 137        }
 138        s = exp & 0x80000000;
 139        if (exp != 0) {
 140                if (s)
 141                        exp = -exp;
 142                asm("cntlzw %0,%1" : "=r" (lz) : "r" (exp));
 143                lz = 8 - lz;
 144                if (lz > 0)
 145                        exp >>= lz;
 146                else if (lz < 0)
 147                        exp <<= -lz;
 148                s += ((lz + 126) << 23) + exp;
 149        }
 150        return s;
 151}
 152
 153#define VSCR_SAT        1
 154
 155static int ctsxs(unsigned int x, int scale, unsigned int *vscrp)
 156{
 157        int exp, mant;
 158
 159        exp = (x >> 23) & 0xff;
 160        mant = x & 0x7fffff;
 161        if (exp == 255 && mant != 0)
 162                return 0;               /* NaN -> 0 */
 163        exp = exp - 127 + scale;
 164        if (exp < 0)
 165                return 0;               /* round towards zero */
 166        if (exp >= 31) {
 167                /* saturate, unless the result would be -2^31 */
 168                if (x + (scale << 23) != 0xcf000000)
 169                        *vscrp |= VSCR_SAT;
 170                return (x & 0x80000000)? 0x80000000: 0x7fffffff;
 171        }
 172        mant |= 0x800000;
 173        mant = (mant << 7) >> (30 - exp);
 174        return (x & 0x80000000)? -mant: mant;
 175}
 176
 177static unsigned int ctuxs(unsigned int x, int scale, unsigned int *vscrp)
 178{
 179        int exp;
 180        unsigned int mant;
 181
 182        exp = (x >> 23) & 0xff;
 183        mant = x & 0x7fffff;
 184        if (exp == 255 && mant != 0)
 185                return 0;               /* NaN -> 0 */
 186        exp = exp - 127 + scale;
 187        if (exp < 0)
 188                return 0;               /* round towards zero */
 189        if (x & 0x80000000) {
 190                /* negative => saturate to 0 */
 191                *vscrp |= VSCR_SAT;
 192                return 0;
 193        }
 194        if (exp >= 32) {
 195                /* saturate */
 196                *vscrp |= VSCR_SAT;
 197                return 0xffffffff;
 198        }
 199        mant |= 0x800000;
 200        mant = (mant << 8) >> (31 - exp);
 201        return mant;
 202}
 203
 204/* Round to floating integer, towards 0 */
 205static unsigned int rfiz(unsigned int x)
 206{
 207        int exp;
 208
 209        exp = ((x >> 23) & 0xff) - 127;
 210        if (exp == 128 && (x & 0x7fffff) != 0)
 211                return x | 0x400000;    /* NaN -> make it a QNaN */
 212        if (exp >= 23)
 213                return x;               /* it's an integer already (or Inf) */
 214        if (exp < 0)
 215                return x & 0x80000000;  /* |x| < 1.0 rounds to 0 */
 216        return x & ~(0x7fffff >> exp);
 217}
 218
 219/* Round to floating integer, towards +/- Inf */
 220static unsigned int rfii(unsigned int x)
 221{
 222        int exp, mask;
 223
 224        exp = ((x >> 23) & 0xff) - 127;
 225        if (exp == 128 && (x & 0x7fffff) != 0)
 226                return x | 0x400000;    /* NaN -> make it a QNaN */
 227        if (exp >= 23)
 228                return x;               /* it's an integer already (or Inf) */
 229        if ((x & 0x7fffffff) == 0)
 230                return x;               /* +/-0 -> +/-0 */
 231        if (exp < 0)
 232                /* 0 < |x| < 1.0 rounds to +/- 1.0 */
 233                return (x & 0x80000000) | 0x3f800000;
 234        mask = 0x7fffff >> exp;
 235        /* mantissa overflows into exponent - that's OK,
 236           it can't overflow into the sign bit */
 237        return (x + mask) & ~mask;
 238}
 239
 240/* Round to floating integer, to nearest */
 241static unsigned int rfin(unsigned int x)
 242{
 243        int exp, half;
 244
 245        exp = ((x >> 23) & 0xff) - 127;
 246        if (exp == 128 && (x & 0x7fffff) != 0)
 247                return x | 0x400000;    /* NaN -> make it a QNaN */
 248        if (exp >= 23)
 249                return x;               /* it's an integer already (or Inf) */
 250        if (exp < -1)
 251                return x & 0x80000000;  /* |x| < 0.5 -> +/-0 */
 252        if (exp == -1)
 253                /* 0.5 <= |x| < 1.0 rounds to +/- 1.0 */
 254                return (x & 0x80000000) | 0x3f800000;
 255        half = 0x400000 >> exp;
 256        /* add 0.5 to the magnitude and chop off the fraction bits */
 257        return (x + half) & ~(0x7fffff >> exp);
 258}
 259
 260int emulate_altivec(struct pt_regs *regs)
 261{
 262        unsigned int instr, i;
 263        unsigned int va, vb, vc, vd;
 264        vector128 *vrs;
 265
 266        if (get_user(instr, (unsigned int __user *) regs->nip))
 267                return -EFAULT;
 268        if ((instr >> 26) != 4)
 269                return -EINVAL;         /* not an altivec instruction */
 270        vd = (instr >> 21) & 0x1f;
 271        va = (instr >> 16) & 0x1f;
 272        vb = (instr >> 11) & 0x1f;
 273        vc = (instr >> 6) & 0x1f;
 274
 275        vrs = current->thread.vr_state.vr;
 276        switch (instr & 0x3f) {
 277        case 10:
 278                switch (vc) {
 279                case 0: /* vaddfp */
 280                        vaddfp(&vrs[vd], &vrs[va], &vrs[vb]);
 281                        break;
 282                case 1: /* vsubfp */
 283                        vsubfp(&vrs[vd], &vrs[va], &vrs[vb]);
 284                        break;
 285                case 4: /* vrefp */
 286                        vrefp(&vrs[vd], &vrs[vb]);
 287                        break;
 288                case 5: /* vrsqrtefp */
 289                        vrsqrtefp(&vrs[vd], &vrs[vb]);
 290                        break;
 291                case 6: /* vexptefp */
 292                        for (i = 0; i < 4; ++i)
 293                                vrs[vd].u[i] = eexp2(vrs[vb].u[i]);
 294                        break;
 295                case 7: /* vlogefp */
 296                        for (i = 0; i < 4; ++i)
 297                                vrs[vd].u[i] = elog2(vrs[vb].u[i]);
 298                        break;
 299                case 8:         /* vrfin */
 300                        for (i = 0; i < 4; ++i)
 301                                vrs[vd].u[i] = rfin(vrs[vb].u[i]);
 302                        break;
 303                case 9:         /* vrfiz */
 304                        for (i = 0; i < 4; ++i)
 305                                vrs[vd].u[i] = rfiz(vrs[vb].u[i]);
 306                        break;
 307                case 10:        /* vrfip */
 308                        for (i = 0; i < 4; ++i) {
 309                                u32 x = vrs[vb].u[i];
 310                                x = (x & 0x80000000)? rfiz(x): rfii(x);
 311                                vrs[vd].u[i] = x;
 312                        }
 313                        break;
 314                case 11:        /* vrfim */
 315                        for (i = 0; i < 4; ++i) {
 316                                u32 x = vrs[vb].u[i];
 317                                x = (x & 0x80000000)? rfii(x): rfiz(x);
 318                                vrs[vd].u[i] = x;
 319                        }
 320                        break;
 321                case 14:        /* vctuxs */
 322                        for (i = 0; i < 4; ++i)
 323                                vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va,
 324                                        &current->thread.vr_state.vscr.u[3]);
 325                        break;
 326                case 15:        /* vctsxs */
 327                        for (i = 0; i < 4; ++i)
 328                                vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va,
 329                                        &current->thread.vr_state.vscr.u[3]);
 330                        break;
 331                default:
 332                        return -EINVAL;
 333                }
 334                break;
 335        case 46:        /* vmaddfp */
 336                vmaddfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
 337                break;
 338        case 47:        /* vnmsubfp */
 339                vnmsubfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
 340                break;
 341        default:
 342                return -EINVAL;
 343        }
 344
 345        return 0;
 346}
 347