linux/arch/powerpc/kernel/vecemu.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Routines to emulate some Altivec/VMX instructions, specifically
   4 * those that can trap when given denormalized operands in Java mode.
   5 */
   6#include <linux/kernel.h>
   7#include <linux/errno.h>
   8#include <linux/sched.h>
   9#include <asm/ptrace.h>
  10#include <asm/processor.h>
  11#include <asm/switch_to.h>
  12#include <linux/uaccess.h>
  13#include <asm/inst.h>
  14
  15/* Functions in vector.S */
  16extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b);
  17extern void vsubfp(vector128 *dst, vector128 *a, vector128 *b);
  18extern void vmaddfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
  19extern void vnmsubfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
  20extern void vrefp(vector128 *dst, vector128 *src);
  21extern void vrsqrtefp(vector128 *dst, vector128 *src);
  22extern void vexptep(vector128 *dst, vector128 *src);
  23
  24static unsigned int exp2s[8] = {
  25        0x800000,
  26        0x8b95c2,
  27        0x9837f0,
  28        0xa5fed7,
  29        0xb504f3,
  30        0xc5672a,
  31        0xd744fd,
  32        0xeac0c7
  33};
  34
  35/*
  36 * Computes an estimate of 2^x.  The `s' argument is the 32-bit
  37 * single-precision floating-point representation of x.
  38 */
  39static unsigned int eexp2(unsigned int s)
  40{
  41        int exp, pwr;
  42        unsigned int mant, frac;
  43
  44        /* extract exponent field from input */
  45        exp = ((s >> 23) & 0xff) - 127;
  46        if (exp > 7) {
  47                /* check for NaN input */
  48                if (exp == 128 && (s & 0x7fffff) != 0)
  49                        return s | 0x400000;    /* return QNaN */
  50                /* 2^-big = 0, 2^+big = +Inf */
  51                return (s & 0x80000000)? 0: 0x7f800000; /* 0 or +Inf */
  52        }
  53        if (exp < -23)
  54                return 0x3f800000;      /* 1.0 */
  55
  56        /* convert to fixed point integer in 9.23 representation */
  57        pwr = (s & 0x7fffff) | 0x800000;
  58        if (exp > 0)
  59                pwr <<= exp;
  60        else
  61                pwr >>= -exp;
  62        if (s & 0x80000000)
  63                pwr = -pwr;
  64
  65        /* extract integer part, which becomes exponent part of result */
  66        exp = (pwr >> 23) + 126;
  67        if (exp >= 254)
  68                return 0x7f800000;
  69        if (exp < -23)
  70                return 0;
  71
  72        /* table lookup on top 3 bits of fraction to get mantissa */
  73        mant = exp2s[(pwr >> 20) & 7];
  74
  75        /* linear interpolation using remaining 20 bits of fraction */
  76        asm("mulhwu %0,%1,%2" : "=r" (frac)
  77            : "r" (pwr << 12), "r" (0x172b83ff));
  78        asm("mulhwu %0,%1,%2" : "=r" (frac) : "r" (frac), "r" (mant));
  79        mant += frac;
  80
  81        if (exp >= 0)
  82                return mant + (exp << 23);
  83
  84        /* denormalized result */
  85        exp = -exp;
  86        mant += 1 << (exp - 1);
  87        return mant >> exp;
  88}
  89
  90/*
  91 * Computes an estimate of log_2(x).  The `s' argument is the 32-bit
  92 * single-precision floating-point representation of x.
  93 */
  94static unsigned int elog2(unsigned int s)
  95{
  96        int exp, mant, lz, frac;
  97
  98        exp = s & 0x7f800000;
  99        mant = s & 0x7fffff;
 100        if (exp == 0x7f800000) {        /* Inf or NaN */
 101                if (mant != 0)
 102                        s |= 0x400000;  /* turn NaN into QNaN */
 103                return s;
 104        }
 105        if ((exp | mant) == 0)          /* +0 or -0 */
 106                return 0xff800000;      /* return -Inf */
 107
 108        if (exp == 0) {
 109                /* denormalized */
 110                asm("cntlzw %0,%1" : "=r" (lz) : "r" (mant));
 111                mant <<= lz - 8;
 112                exp = (-118 - lz) << 23;
 113        } else {
 114                mant |= 0x800000;
 115                exp -= 127 << 23;
 116        }
 117
 118        if (mant >= 0xb504f3) {                         /* 2^0.5 * 2^23 */
 119                exp |= 0x400000;                        /* 0.5 * 2^23 */
 120                asm("mulhwu %0,%1,%2" : "=r" (mant)
 121                    : "r" (mant), "r" (0xb504f334));    /* 2^-0.5 * 2^32 */
 122        }
 123        if (mant >= 0x9837f0) {                         /* 2^0.25 * 2^23 */
 124                exp |= 0x200000;                        /* 0.25 * 2^23 */
 125                asm("mulhwu %0,%1,%2" : "=r" (mant)
 126                    : "r" (mant), "r" (0xd744fccb));    /* 2^-0.25 * 2^32 */
 127        }
 128        if (mant >= 0x8b95c2) {                         /* 2^0.125 * 2^23 */
 129                exp |= 0x100000;                        /* 0.125 * 2^23 */
 130                asm("mulhwu %0,%1,%2" : "=r" (mant)
 131                    : "r" (mant), "r" (0xeac0c6e8));    /* 2^-0.125 * 2^32 */
 132        }
 133        if (mant > 0x800000) {                          /* 1.0 * 2^23 */
 134                /* calculate (mant - 1) * 1.381097463 */
 135                /* 1.381097463 == 0.125 / (2^0.125 - 1) */
 136                asm("mulhwu %0,%1,%2" : "=r" (frac)
 137                    : "r" ((mant - 0x800000) << 1), "r" (0xb0c7cd3a));
 138                exp += frac;
 139        }
 140        s = exp & 0x80000000;
 141        if (exp != 0) {
 142                if (s)
 143                        exp = -exp;
 144                asm("cntlzw %0,%1" : "=r" (lz) : "r" (exp));
 145                lz = 8 - lz;
 146                if (lz > 0)
 147                        exp >>= lz;
 148                else if (lz < 0)
 149                        exp <<= -lz;
 150                s += ((lz + 126) << 23) + exp;
 151        }
 152        return s;
 153}
 154
 155#define VSCR_SAT        1
 156
 157static int ctsxs(unsigned int x, int scale, unsigned int *vscrp)
 158{
 159        int exp, mant;
 160
 161        exp = (x >> 23) & 0xff;
 162        mant = x & 0x7fffff;
 163        if (exp == 255 && mant != 0)
 164                return 0;               /* NaN -> 0 */
 165        exp = exp - 127 + scale;
 166        if (exp < 0)
 167                return 0;               /* round towards zero */
 168        if (exp >= 31) {
 169                /* saturate, unless the result would be -2^31 */
 170                if (x + (scale << 23) != 0xcf000000)
 171                        *vscrp |= VSCR_SAT;
 172                return (x & 0x80000000)? 0x80000000: 0x7fffffff;
 173        }
 174        mant |= 0x800000;
 175        mant = (mant << 7) >> (30 - exp);
 176        return (x & 0x80000000)? -mant: mant;
 177}
 178
 179static unsigned int ctuxs(unsigned int x, int scale, unsigned int *vscrp)
 180{
 181        int exp;
 182        unsigned int mant;
 183
 184        exp = (x >> 23) & 0xff;
 185        mant = x & 0x7fffff;
 186        if (exp == 255 && mant != 0)
 187                return 0;               /* NaN -> 0 */
 188        exp = exp - 127 + scale;
 189        if (exp < 0)
 190                return 0;               /* round towards zero */
 191        if (x & 0x80000000) {
 192                /* negative => saturate to 0 */
 193                *vscrp |= VSCR_SAT;
 194                return 0;
 195        }
 196        if (exp >= 32) {
 197                /* saturate */
 198                *vscrp |= VSCR_SAT;
 199                return 0xffffffff;
 200        }
 201        mant |= 0x800000;
 202        mant = (mant << 8) >> (31 - exp);
 203        return mant;
 204}
 205
 206/* Round to floating integer, towards 0 */
 207static unsigned int rfiz(unsigned int x)
 208{
 209        int exp;
 210
 211        exp = ((x >> 23) & 0xff) - 127;
 212        if (exp == 128 && (x & 0x7fffff) != 0)
 213                return x | 0x400000;    /* NaN -> make it a QNaN */
 214        if (exp >= 23)
 215                return x;               /* it's an integer already (or Inf) */
 216        if (exp < 0)
 217                return x & 0x80000000;  /* |x| < 1.0 rounds to 0 */
 218        return x & ~(0x7fffff >> exp);
 219}
 220
 221/* Round to floating integer, towards +/- Inf */
 222static unsigned int rfii(unsigned int x)
 223{
 224        int exp, mask;
 225
 226        exp = ((x >> 23) & 0xff) - 127;
 227        if (exp == 128 && (x & 0x7fffff) != 0)
 228                return x | 0x400000;    /* NaN -> make it a QNaN */
 229        if (exp >= 23)
 230                return x;               /* it's an integer already (or Inf) */
 231        if ((x & 0x7fffffff) == 0)
 232                return x;               /* +/-0 -> +/-0 */
 233        if (exp < 0)
 234                /* 0 < |x| < 1.0 rounds to +/- 1.0 */
 235                return (x & 0x80000000) | 0x3f800000;
 236        mask = 0x7fffff >> exp;
 237        /* mantissa overflows into exponent - that's OK,
 238           it can't overflow into the sign bit */
 239        return (x + mask) & ~mask;
 240}
 241
 242/* Round to floating integer, to nearest */
 243static unsigned int rfin(unsigned int x)
 244{
 245        int exp, half;
 246
 247        exp = ((x >> 23) & 0xff) - 127;
 248        if (exp == 128 && (x & 0x7fffff) != 0)
 249                return x | 0x400000;    /* NaN -> make it a QNaN */
 250        if (exp >= 23)
 251                return x;               /* it's an integer already (or Inf) */
 252        if (exp < -1)
 253                return x & 0x80000000;  /* |x| < 0.5 -> +/-0 */
 254        if (exp == -1)
 255                /* 0.5 <= |x| < 1.0 rounds to +/- 1.0 */
 256                return (x & 0x80000000) | 0x3f800000;
 257        half = 0x400000 >> exp;
 258        /* add 0.5 to the magnitude and chop off the fraction bits */
 259        return (x + half) & ~(0x7fffff >> exp);
 260}
 261
 262int emulate_altivec(struct pt_regs *regs)
 263{
 264        struct ppc_inst instr;
 265        unsigned int i, word;
 266        unsigned int va, vb, vc, vd;
 267        vector128 *vrs;
 268
 269        if (get_user_instr(instr, (void __user *)regs->nip))
 270                return -EFAULT;
 271
 272        word = ppc_inst_val(instr);
 273        if (ppc_inst_primary_opcode(instr) != 4)
 274                return -EINVAL;         /* not an altivec instruction */
 275        vd = (word >> 21) & 0x1f;
 276        va = (word >> 16) & 0x1f;
 277        vb = (word >> 11) & 0x1f;
 278        vc = (word >> 6) & 0x1f;
 279
 280        vrs = current->thread.vr_state.vr;
 281        switch (word & 0x3f) {
 282        case 10:
 283                switch (vc) {
 284                case 0: /* vaddfp */
 285                        vaddfp(&vrs[vd], &vrs[va], &vrs[vb]);
 286                        break;
 287                case 1: /* vsubfp */
 288                        vsubfp(&vrs[vd], &vrs[va], &vrs[vb]);
 289                        break;
 290                case 4: /* vrefp */
 291                        vrefp(&vrs[vd], &vrs[vb]);
 292                        break;
 293                case 5: /* vrsqrtefp */
 294                        vrsqrtefp(&vrs[vd], &vrs[vb]);
 295                        break;
 296                case 6: /* vexptefp */
 297                        for (i = 0; i < 4; ++i)
 298                                vrs[vd].u[i] = eexp2(vrs[vb].u[i]);
 299                        break;
 300                case 7: /* vlogefp */
 301                        for (i = 0; i < 4; ++i)
 302                                vrs[vd].u[i] = elog2(vrs[vb].u[i]);
 303                        break;
 304                case 8:         /* vrfin */
 305                        for (i = 0; i < 4; ++i)
 306                                vrs[vd].u[i] = rfin(vrs[vb].u[i]);
 307                        break;
 308                case 9:         /* vrfiz */
 309                        for (i = 0; i < 4; ++i)
 310                                vrs[vd].u[i] = rfiz(vrs[vb].u[i]);
 311                        break;
 312                case 10:        /* vrfip */
 313                        for (i = 0; i < 4; ++i) {
 314                                u32 x = vrs[vb].u[i];
 315                                x = (x & 0x80000000)? rfiz(x): rfii(x);
 316                                vrs[vd].u[i] = x;
 317                        }
 318                        break;
 319                case 11:        /* vrfim */
 320                        for (i = 0; i < 4; ++i) {
 321                                u32 x = vrs[vb].u[i];
 322                                x = (x & 0x80000000)? rfii(x): rfiz(x);
 323                                vrs[vd].u[i] = x;
 324                        }
 325                        break;
 326                case 14:        /* vctuxs */
 327                        for (i = 0; i < 4; ++i)
 328                                vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va,
 329                                        &current->thread.vr_state.vscr.u[3]);
 330                        break;
 331                case 15:        /* vctsxs */
 332                        for (i = 0; i < 4; ++i)
 333                                vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va,
 334                                        &current->thread.vr_state.vscr.u[3]);
 335                        break;
 336                default:
 337                        return -EINVAL;
 338                }
 339                break;
 340        case 46:        /* vmaddfp */
 341                vmaddfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
 342                break;
 343        case 47:        /* vnmsubfp */
 344                vnmsubfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
 345                break;
 346        default:
 347                return -EINVAL;
 348        }
 349
 350        return 0;
 351}
 352