qemu/target/hexagon/arch.c
<<
>>
Prefs
   1/*
   2 *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
   3 *
   4 *  This program is free software; you can redistribute it and/or modify
   5 *  it under the terms of the GNU General Public License as published by
   6 *  the Free Software Foundation; either version 2 of the License, or
   7 *  (at your option) any later version.
   8 *
   9 *  This program is distributed in the hope that it will be useful,
  10 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 *  GNU General Public License for more details.
  13 *
  14 *  You should have received a copy of the GNU General Public License
  15 *  along with this program; if not, see <http://www.gnu.org/licenses/>.
  16 */
  17
  18#include "qemu/osdep.h"
  19#include "fpu/softfloat.h"
  20#include "cpu.h"
  21#include "fma_emu.h"
  22#include "arch.h"
  23#include "macros.h"
  24
  25#define SF_BIAS        127
  26#define SF_MAXEXP      254
  27#define SF_MANTBITS    23
  28#define float32_nan    make_float32(0xffffffff)
  29
  30/*
  31 * These three tables are used by the cabacdecbin instruction
  32 */
  33const uint8_t rLPS_table_64x4[64][4] = {
  34    {128, 176, 208, 240},
  35    {128, 167, 197, 227},
  36    {128, 158, 187, 216},
  37    {123, 150, 178, 205},
  38    {116, 142, 169, 195},
  39    {111, 135, 160, 185},
  40    {105, 128, 152, 175},
  41    {100, 122, 144, 166},
  42    {95, 116, 137, 158},
  43    {90, 110, 130, 150},
  44    {85, 104, 123, 142},
  45    {81, 99, 117, 135},
  46    {77, 94, 111, 128},
  47    {73, 89, 105, 122},
  48    {69, 85, 100, 116},
  49    {66, 80, 95, 110},
  50    {62, 76, 90, 104},
  51    {59, 72, 86, 99},
  52    {56, 69, 81, 94},
  53    {53, 65, 77, 89},
  54    {51, 62, 73, 85},
  55    {48, 59, 69, 80},
  56    {46, 56, 66, 76},
  57    {43, 53, 63, 72},
  58    {41, 50, 59, 69},
  59    {39, 48, 56, 65},
  60    {37, 45, 54, 62},
  61    {35, 43, 51, 59},
  62    {33, 41, 48, 56},
  63    {32, 39, 46, 53},
  64    {30, 37, 43, 50},
  65    {29, 35, 41, 48},
  66    {27, 33, 39, 45},
  67    {26, 31, 37, 43},
  68    {24, 30, 35, 41},
  69    {23, 28, 33, 39},
  70    {22, 27, 32, 37},
  71    {21, 26, 30, 35},
  72    {20, 24, 29, 33},
  73    {19, 23, 27, 31},
  74    {18, 22, 26, 30},
  75    {17, 21, 25, 28},
  76    {16, 20, 23, 27},
  77    {15, 19, 22, 25},
  78    {14, 18, 21, 24},
  79    {14, 17, 20, 23},
  80    {13, 16, 19, 22},
  81    {12, 15, 18, 21},
  82    {12, 14, 17, 20},
  83    {11, 14, 16, 19},
  84    {11, 13, 15, 18},
  85    {10, 12, 15, 17},
  86    {10, 12, 14, 16},
  87    {9, 11, 13, 15},
  88    {9, 11, 12, 14},
  89    {8, 10, 12, 14},
  90    {8, 9, 11, 13},
  91    {7, 9, 11, 12},
  92    {7, 9, 10, 12},
  93    {7, 8, 10, 11},
  94    {6, 8, 9, 11},
  95    {6, 7, 9, 10},
  96    {6, 7, 8, 9},
  97    {2, 2, 2, 2}
  98};
  99
 100const uint8_t AC_next_state_MPS_64[64] = {
 101    1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
 102    11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
 103    21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
 104    31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
 105    41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
 106    51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
 107    61, 62, 62, 63
 108};
 109
 110
 111const uint8_t AC_next_state_LPS_64[64] = {
 112    0, 0, 1, 2, 2, 4, 4, 5, 6, 7,
 113    8, 9, 9, 11, 11, 12, 13, 13, 15, 15,
 114    16, 16, 18, 18, 19, 19, 21, 21, 22, 22,
 115    23, 24, 24, 25, 26, 26, 27, 27, 28, 29,
 116    29, 30, 30, 30, 31, 32, 32, 33, 33, 33,
 117    34, 34, 35, 35, 35, 36, 36, 36, 37, 37,
 118    37, 38, 38, 63
 119};
 120
 121#define BITS_MASK_8 0x5555555555555555ULL
 122#define PAIR_MASK_8 0x3333333333333333ULL
 123#define NYBL_MASK_8 0x0f0f0f0f0f0f0f0fULL
 124#define BYTE_MASK_8 0x00ff00ff00ff00ffULL
 125#define HALF_MASK_8 0x0000ffff0000ffffULL
 126#define WORD_MASK_8 0x00000000ffffffffULL
 127
 128uint64_t interleave(uint32_t odd, uint32_t even)
 129{
 130    /* Convert to long long */
 131    uint64_t myodd = odd;
 132    uint64_t myeven = even;
 133    /* First, spread bits out */
 134    myodd = (myodd | (myodd << 16)) & HALF_MASK_8;
 135    myeven = (myeven | (myeven << 16)) & HALF_MASK_8;
 136    myodd = (myodd | (myodd << 8)) & BYTE_MASK_8;
 137    myeven = (myeven | (myeven << 8)) & BYTE_MASK_8;
 138    myodd = (myodd | (myodd << 4)) & NYBL_MASK_8;
 139    myeven = (myeven | (myeven << 4)) & NYBL_MASK_8;
 140    myodd = (myodd | (myodd << 2)) & PAIR_MASK_8;
 141    myeven = (myeven | (myeven << 2)) & PAIR_MASK_8;
 142    myodd = (myodd | (myodd << 1)) & BITS_MASK_8;
 143    myeven = (myeven | (myeven << 1)) & BITS_MASK_8;
 144    /* Now OR together */
 145    return myeven | (myodd << 1);
 146}
 147
 148uint64_t deinterleave(uint64_t src)
 149{
 150    /* Get odd and even bits */
 151    uint64_t myodd = ((src >> 1) & BITS_MASK_8);
 152    uint64_t myeven = (src & BITS_MASK_8);
 153
 154    /* Unspread bits */
 155    myeven = (myeven | (myeven >> 1)) & PAIR_MASK_8;
 156    myodd = (myodd | (myodd >> 1)) & PAIR_MASK_8;
 157    myeven = (myeven | (myeven >> 2)) & NYBL_MASK_8;
 158    myodd = (myodd | (myodd >> 2)) & NYBL_MASK_8;
 159    myeven = (myeven | (myeven >> 4)) & BYTE_MASK_8;
 160    myodd = (myodd | (myodd >> 4)) & BYTE_MASK_8;
 161    myeven = (myeven | (myeven >> 8)) & HALF_MASK_8;
 162    myodd = (myodd | (myodd >> 8)) & HALF_MASK_8;
 163    myeven = (myeven | (myeven >> 16)) & WORD_MASK_8;
 164    myodd = (myodd | (myodd >> 16)) & WORD_MASK_8;
 165
 166    /* Return odd bits in upper half */
 167    return myeven | (myodd << 32);
 168}
 169
 170int32_t conv_round(int32_t a, int n)
 171{
 172    int64_t val;
 173
 174    if (n == 0) {
 175        val = a;
 176    } else if ((a & ((1 << (n - 1)) - 1)) == 0) {    /* N-1..0 all zero? */
 177        /* Add LSB from int part */
 178        val = ((fSE32_64(a)) + (int64_t) (((uint32_t) ((1 << n) & a)) >> 1));
 179    } else {
 180        val = ((fSE32_64(a)) + (1 << (n - 1)));
 181    }
 182
 183    val = val >> n;
 184    return (int32_t)val;
 185}
 186
 187/* Floating Point Stuff */
 188
 189static const FloatRoundMode softfloat_roundingmodes[] = {
 190    float_round_nearest_even,
 191    float_round_to_zero,
 192    float_round_down,
 193    float_round_up,
 194};
 195
 196void arch_fpop_start(CPUHexagonState *env)
 197{
 198    set_float_exception_flags(0, &env->fp_status);
 199    set_float_rounding_mode(
 200        softfloat_roundingmodes[fREAD_REG_FIELD(USR, USR_FPRND)],
 201        &env->fp_status);
 202}
 203
 204#ifdef CONFIG_USER_ONLY
 205/*
 206 * Hexagon Linux kernel only sets the relevant bits in USR (user status
 207 * register).  The exception isn't raised to user mode, so we don't
 208 * model it in qemu user mode.
 209 */
 210#define RAISE_FP_EXCEPTION   do {} while (0)
 211#endif
 212
 213#define SOFTFLOAT_TEST_FLAG(FLAG, MYF, MYE) \
 214    do { \
 215        if (flags & FLAG) { \
 216            if (GET_USR_FIELD(USR_##MYF) == 0) { \
 217                SET_USR_FIELD(USR_##MYF, 1); \
 218                if (GET_USR_FIELD(USR_##MYE)) { \
 219                    RAISE_FP_EXCEPTION; \
 220                } \
 221            } \
 222        } \
 223    } while (0)
 224
 225void arch_fpop_end(CPUHexagonState *env)
 226{
 227    int flags = get_float_exception_flags(&env->fp_status);
 228    if (flags != 0) {
 229        SOFTFLOAT_TEST_FLAG(float_flag_inexact, FPINPF, FPINPE);
 230        SOFTFLOAT_TEST_FLAG(float_flag_divbyzero, FPDBZF, FPDBZE);
 231        SOFTFLOAT_TEST_FLAG(float_flag_invalid, FPINVF, FPINVE);
 232        SOFTFLOAT_TEST_FLAG(float_flag_overflow, FPOVFF, FPOVFE);
 233        SOFTFLOAT_TEST_FLAG(float_flag_underflow, FPUNFF, FPUNFE);
 234    }
 235}
 236
 237int arch_sf_recip_common(float32 *Rs, float32 *Rt, float32 *Rd, int *adjust,
 238                         float_status *fp_status)
 239{
 240    int n_exp;
 241    int d_exp;
 242    int ret = 0;
 243    float32 RsV, RtV, RdV;
 244    int PeV = 0;
 245    RsV = *Rs;
 246    RtV = *Rt;
 247    if (float32_is_any_nan(RsV) && float32_is_any_nan(RtV)) {
 248        if (extract32(RsV & RtV, 22, 1) == 0) {
 249            float_raise(float_flag_invalid, fp_status);
 250        }
 251        RdV = RsV = RtV = float32_nan;
 252    } else if (float32_is_any_nan(RsV)) {
 253        if (extract32(RsV, 22, 1) == 0) {
 254            float_raise(float_flag_invalid, fp_status);
 255        }
 256        RdV = RsV = RtV = float32_nan;
 257    } else if (float32_is_any_nan(RtV)) {
 258        /* or put NaN in num/den fixup? */
 259        if (extract32(RtV, 22, 1) == 0) {
 260            float_raise(float_flag_invalid, fp_status);
 261        }
 262        RdV = RsV = RtV = float32_nan;
 263    } else if (float32_is_infinity(RsV) && float32_is_infinity(RtV)) {
 264        /* or put Inf in num fixup? */
 265        RdV = RsV = RtV = float32_nan;
 266        float_raise(float_flag_invalid, fp_status);
 267    } else if (float32_is_zero(RsV) && float32_is_zero(RtV)) {
 268        /* or put zero in num fixup? */
 269        RdV = RsV = RtV = float32_nan;
 270        float_raise(float_flag_invalid, fp_status);
 271    } else if (float32_is_zero(RtV)) {
 272        /* or put Inf in num fixup? */
 273        uint8_t RsV_sign = float32_is_neg(RsV);
 274        uint8_t RtV_sign = float32_is_neg(RtV);
 275        /* Check that RsV is NOT infinite before we overwrite it */
 276        if (!float32_is_infinity(RsV)) {
 277            float_raise(float_flag_divbyzero, fp_status);
 278        }
 279        RsV = infinite_float32(RsV_sign ^ RtV_sign);
 280        RtV = float32_one;
 281        RdV = float32_one;
 282    } else if (float32_is_infinity(RtV)) {
 283        RsV = make_float32(0x80000000 & (RsV ^ RtV));
 284        RtV = float32_one;
 285        RdV = float32_one;
 286    } else if (float32_is_zero(RsV)) {
 287        /* Does this just work itself out? */
 288        /* No, 0/Inf causes problems. */
 289        RsV = make_float32(0x80000000 & (RsV ^ RtV));
 290        RtV = float32_one;
 291        RdV = float32_one;
 292    } else if (float32_is_infinity(RsV)) {
 293        uint8_t RsV_sign = float32_is_neg(RsV);
 294        uint8_t RtV_sign = float32_is_neg(RtV);
 295        RsV = infinite_float32(RsV_sign ^ RtV_sign);
 296        RtV = float32_one;
 297        RdV = float32_one;
 298    } else {
 299        PeV = 0x00;
 300        /* Basic checks passed */
 301        n_exp = float32_getexp(RsV);
 302        d_exp = float32_getexp(RtV);
 303        if ((n_exp - d_exp + SF_BIAS) <= SF_MANTBITS) {
 304            /* Near quotient underflow / inexact Q */
 305            PeV = 0x80;
 306            RtV = float32_scalbn(RtV, -64, fp_status);
 307            RsV = float32_scalbn(RsV, 64, fp_status);
 308        } else if ((n_exp - d_exp + SF_BIAS) > (SF_MAXEXP - 24)) {
 309            /* Near quotient overflow */
 310            PeV = 0x40;
 311            RtV = float32_scalbn(RtV, 32, fp_status);
 312            RsV = float32_scalbn(RsV, -32, fp_status);
 313        } else if (n_exp <= SF_MANTBITS + 2) {
 314            RtV = float32_scalbn(RtV, 64, fp_status);
 315            RsV = float32_scalbn(RsV, 64, fp_status);
 316        } else if (d_exp <= 1) {
 317            RtV = float32_scalbn(RtV, 32, fp_status);
 318            RsV = float32_scalbn(RsV, 32, fp_status);
 319        } else if (d_exp > 252) {
 320            RtV = float32_scalbn(RtV, -32, fp_status);
 321            RsV = float32_scalbn(RsV, -32, fp_status);
 322        }
 323        RdV = 0;
 324        ret = 1;
 325    }
 326    *Rs = RsV;
 327    *Rt = RtV;
 328    *Rd = RdV;
 329    *adjust = PeV;
 330    return ret;
 331}
 332
 333int arch_sf_invsqrt_common(float32 *Rs, float32 *Rd, int *adjust,
 334                           float_status *fp_status)
 335{
 336    float32 RsV, RdV;
 337    int PeV = 0;
 338    int r_exp;
 339    int ret = 0;
 340    RsV = *Rs;
 341    if (float32_is_any_nan(RsV)) {
 342        if (extract32(RsV, 22, 1) == 0) {
 343            float_raise(float_flag_invalid, fp_status);
 344        }
 345        RdV = RsV = float32_nan;
 346    } else if (float32_lt(RsV, float32_zero, fp_status)) {
 347        /* Negative nonzero values are NaN */
 348        float_raise(float_flag_invalid, fp_status);
 349        RsV = float32_nan;
 350        RdV = float32_nan;
 351    } else if (float32_is_infinity(RsV)) {
 352        /* or put Inf in num fixup? */
 353        RsV = infinite_float32(1);
 354        RdV = infinite_float32(1);
 355    } else if (float32_is_zero(RsV)) {
 356        /* or put zero in num fixup? */
 357        RdV = float32_one;
 358    } else {
 359        PeV = 0x00;
 360        /* Basic checks passed */
 361        r_exp = float32_getexp(RsV);
 362        if (r_exp <= 24) {
 363            RsV = float32_scalbn(RsV, 64, fp_status);
 364            PeV = 0xe0;
 365        }
 366        RdV = 0;
 367        ret = 1;
 368    }
 369    *Rs = RsV;
 370    *Rd = RdV;
 371    *adjust = PeV;
 372    return ret;
 373}
 374
 375const uint8_t recip_lookup_table[128] = {
 376    0x0fe, 0x0fa, 0x0f6, 0x0f2, 0x0ef, 0x0eb, 0x0e7, 0x0e4,
 377    0x0e0, 0x0dd, 0x0d9, 0x0d6, 0x0d2, 0x0cf, 0x0cc, 0x0c9,
 378    0x0c6, 0x0c2, 0x0bf, 0x0bc, 0x0b9, 0x0b6, 0x0b3, 0x0b1,
 379    0x0ae, 0x0ab, 0x0a8, 0x0a5, 0x0a3, 0x0a0, 0x09d, 0x09b,
 380    0x098, 0x096, 0x093, 0x091, 0x08e, 0x08c, 0x08a, 0x087,
 381    0x085, 0x083, 0x080, 0x07e, 0x07c, 0x07a, 0x078, 0x075,
 382    0x073, 0x071, 0x06f, 0x06d, 0x06b, 0x069, 0x067, 0x065,
 383    0x063, 0x061, 0x05f, 0x05e, 0x05c, 0x05a, 0x058, 0x056,
 384    0x054, 0x053, 0x051, 0x04f, 0x04e, 0x04c, 0x04a, 0x049,
 385    0x047, 0x045, 0x044, 0x042, 0x040, 0x03f, 0x03d, 0x03c,
 386    0x03a, 0x039, 0x037, 0x036, 0x034, 0x033, 0x032, 0x030,
 387    0x02f, 0x02d, 0x02c, 0x02b, 0x029, 0x028, 0x027, 0x025,
 388    0x024, 0x023, 0x021, 0x020, 0x01f, 0x01e, 0x01c, 0x01b,
 389    0x01a, 0x019, 0x017, 0x016, 0x015, 0x014, 0x013, 0x012,
 390    0x011, 0x00f, 0x00e, 0x00d, 0x00c, 0x00b, 0x00a, 0x009,
 391    0x008, 0x007, 0x006, 0x005, 0x004, 0x003, 0x002, 0x000,
 392};
 393
 394const uint8_t invsqrt_lookup_table[128] = {
 395    0x069, 0x066, 0x063, 0x061, 0x05e, 0x05b, 0x059, 0x057,
 396    0x054, 0x052, 0x050, 0x04d, 0x04b, 0x049, 0x047, 0x045,
 397    0x043, 0x041, 0x03f, 0x03d, 0x03b, 0x039, 0x037, 0x036,
 398    0x034, 0x032, 0x030, 0x02f, 0x02d, 0x02c, 0x02a, 0x028,
 399    0x027, 0x025, 0x024, 0x022, 0x021, 0x01f, 0x01e, 0x01d,
 400    0x01b, 0x01a, 0x019, 0x017, 0x016, 0x015, 0x014, 0x012,
 401    0x011, 0x010, 0x00f, 0x00d, 0x00c, 0x00b, 0x00a, 0x009,
 402    0x008, 0x007, 0x006, 0x005, 0x004, 0x003, 0x002, 0x001,
 403    0x0fe, 0x0fa, 0x0f6, 0x0f3, 0x0ef, 0x0eb, 0x0e8, 0x0e4,
 404    0x0e1, 0x0de, 0x0db, 0x0d7, 0x0d4, 0x0d1, 0x0ce, 0x0cb,
 405    0x0c9, 0x0c6, 0x0c3, 0x0c0, 0x0be, 0x0bb, 0x0b8, 0x0b6,
 406    0x0b3, 0x0b1, 0x0af, 0x0ac, 0x0aa, 0x0a8, 0x0a5, 0x0a3,
 407    0x0a1, 0x09f, 0x09d, 0x09b, 0x099, 0x097, 0x095, 0x093,
 408    0x091, 0x08f, 0x08d, 0x08b, 0x089, 0x087, 0x086, 0x084,
 409    0x082, 0x080, 0x07f, 0x07d, 0x07b, 0x07a, 0x078, 0x077,
 410    0x075, 0x074, 0x072, 0x071, 0x06f, 0x06e, 0x06c, 0x06b,
 411};
 412