qemu/target/hexagon/arch.c
<<
>>
Prefs
   1/*
   2 *  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
   3 *
   4 *  This program is free software; you can redistribute it and/or modify
   5 *  it under the terms of the GNU General Public License as published by
   6 *  the Free Software Foundation; either version 2 of the License, or
   7 *  (at your option) any later version.
   8 *
   9 *  This program is distributed in the hope that it will be useful,
  10 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 *  GNU General Public License for more details.
  13 *
  14 *  You should have received a copy of the GNU General Public License
  15 *  along with this program; if not, see <http://www.gnu.org/licenses/>.
  16 */
  17
  18#include "qemu/osdep.h"
  19#include "fpu/softfloat.h"
  20#include "cpu.h"
  21#include "fma_emu.h"
  22#include "arch.h"
  23#include "macros.h"
  24
  25#define SF_BIAS        127
  26#define SF_MAXEXP      254
  27#define SF_MANTBITS    23
  28#define float32_nan    make_float32(0xffffffff)
  29
  30/*
  31 * These three tables are used by the cabacdecbin instruction
  32 */
  33const uint8_t rLPS_table_64x4[64][4] = {
  34    {128, 176, 208, 240},
  35    {128, 167, 197, 227},
  36    {128, 158, 187, 216},
  37    {123, 150, 178, 205},
  38    {116, 142, 169, 195},
  39    {111, 135, 160, 185},
  40    {105, 128, 152, 175},
  41    {100, 122, 144, 166},
  42    {95, 116, 137, 158},
  43    {90, 110, 130, 150},
  44    {85, 104, 123, 142},
  45    {81, 99, 117, 135},
  46    {77, 94, 111, 128},
  47    {73, 89, 105, 122},
  48    {69, 85, 100, 116},
  49    {66, 80, 95, 110},
  50    {62, 76, 90, 104},
  51    {59, 72, 86, 99},
  52    {56, 69, 81, 94},
  53    {53, 65, 77, 89},
  54    {51, 62, 73, 85},
  55    {48, 59, 69, 80},
  56    {46, 56, 66, 76},
  57    {43, 53, 63, 72},
  58    {41, 50, 59, 69},
  59    {39, 48, 56, 65},
  60    {37, 45, 54, 62},
  61    {35, 43, 51, 59},
  62    {33, 41, 48, 56},
  63    {32, 39, 46, 53},
  64    {30, 37, 43, 50},
  65    {29, 35, 41, 48},
  66    {27, 33, 39, 45},
  67    {26, 31, 37, 43},
  68    {24, 30, 35, 41},
  69    {23, 28, 33, 39},
  70    {22, 27, 32, 37},
  71    {21, 26, 30, 35},
  72    {20, 24, 29, 33},
  73    {19, 23, 27, 31},
  74    {18, 22, 26, 30},
  75    {17, 21, 25, 28},
  76    {16, 20, 23, 27},
  77    {15, 19, 22, 25},
  78    {14, 18, 21, 24},
  79    {14, 17, 20, 23},
  80    {13, 16, 19, 22},
  81    {12, 15, 18, 21},
  82    {12, 14, 17, 20},
  83    {11, 14, 16, 19},
  84    {11, 13, 15, 18},
  85    {10, 12, 15, 17},
  86    {10, 12, 14, 16},
  87    {9, 11, 13, 15},
  88    {9, 11, 12, 14},
  89    {8, 10, 12, 14},
  90    {8, 9, 11, 13},
  91    {7, 9, 11, 12},
  92    {7, 9, 10, 12},
  93    {7, 8, 10, 11},
  94    {6, 8, 9, 11},
  95    {6, 7, 9, 10},
  96    {6, 7, 8, 9},
  97    {2, 2, 2, 2}
  98};
  99
 100const uint8_t AC_next_state_MPS_64[64] = {
 101    1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
 102    11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
 103    21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
 104    31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
 105    41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
 106    51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
 107    61, 62, 62, 63
 108};
 109
 110
 111const uint8_t AC_next_state_LPS_64[64] = {
 112    0, 0, 1, 2, 2, 4, 4, 5, 6, 7,
 113    8, 9, 9, 11, 11, 12, 13, 13, 15, 15,
 114    16, 16, 18, 18, 19, 19, 21, 21, 22, 22,
 115    23, 24, 24, 25, 26, 26, 27, 27, 28, 29,
 116    29, 30, 30, 30, 31, 32, 32, 33, 33, 33,
 117    34, 34, 35, 35, 35, 36, 36, 36, 37, 37,
 118    37, 38, 38, 63
 119};
 120
 121#define BITS_MASK_8 0x5555555555555555ULL
 122#define PAIR_MASK_8 0x3333333333333333ULL
 123#define NYBL_MASK_8 0x0f0f0f0f0f0f0f0fULL
 124#define BYTE_MASK_8 0x00ff00ff00ff00ffULL
 125#define HALF_MASK_8 0x0000ffff0000ffffULL
 126#define WORD_MASK_8 0x00000000ffffffffULL
 127
 128uint64_t interleave(uint32_t odd, uint32_t even)
 129{
 130    /* Convert to long long */
 131    uint64_t myodd = odd;
 132    uint64_t myeven = even;
 133    /* First, spread bits out */
 134    myodd = (myodd | (myodd << 16)) & HALF_MASK_8;
 135    myeven = (myeven | (myeven << 16)) & HALF_MASK_8;
 136    myodd = (myodd | (myodd << 8)) & BYTE_MASK_8;
 137    myeven = (myeven | (myeven << 8)) & BYTE_MASK_8;
 138    myodd = (myodd | (myodd << 4)) & NYBL_MASK_8;
 139    myeven = (myeven | (myeven << 4)) & NYBL_MASK_8;
 140    myodd = (myodd | (myodd << 2)) & PAIR_MASK_8;
 141    myeven = (myeven | (myeven << 2)) & PAIR_MASK_8;
 142    myodd = (myodd | (myodd << 1)) & BITS_MASK_8;
 143    myeven = (myeven | (myeven << 1)) & BITS_MASK_8;
 144    /* Now OR together */
 145    return myeven | (myodd << 1);
 146}
 147
 148uint64_t deinterleave(uint64_t src)
 149{
 150    /* Get odd and even bits */
 151    uint64_t myodd = ((src >> 1) & BITS_MASK_8);
 152    uint64_t myeven = (src & BITS_MASK_8);
 153
 154    /* Unspread bits */
 155    myeven = (myeven | (myeven >> 1)) & PAIR_MASK_8;
 156    myodd = (myodd | (myodd >> 1)) & PAIR_MASK_8;
 157    myeven = (myeven | (myeven >> 2)) & NYBL_MASK_8;
 158    myodd = (myodd | (myodd >> 2)) & NYBL_MASK_8;
 159    myeven = (myeven | (myeven >> 4)) & BYTE_MASK_8;
 160    myodd = (myodd | (myodd >> 4)) & BYTE_MASK_8;
 161    myeven = (myeven | (myeven >> 8)) & HALF_MASK_8;
 162    myodd = (myodd | (myodd >> 8)) & HALF_MASK_8;
 163    myeven = (myeven | (myeven >> 16)) & WORD_MASK_8;
 164    myodd = (myodd | (myodd >> 16)) & WORD_MASK_8;
 165
 166    /* Return odd bits in upper half */
 167    return myeven | (myodd << 32);
 168}
 169
 170int32_t conv_round(int32_t a, int n)
 171{
 172    int64_t val;
 173
 174    if (n == 0) {
 175        val = a;
 176    } else if ((a & ((1 << (n - 1)) - 1)) == 0) {    /* N-1..0 all zero? */
 177        /* Add LSB from int part */
 178        val = ((fSE32_64(a)) + (int64_t) (((uint32_t) ((1 << n) & a)) >> 1));
 179    } else {
 180        val = ((fSE32_64(a)) + (1 << (n - 1)));
 181    }
 182
 183    val = val >> n;
 184    return (int32_t)val;
 185}
 186
 187/* Floating Point Stuff */
 188
 189static const FloatRoundMode softfloat_roundingmodes[] = {
 190    float_round_nearest_even,
 191    float_round_to_zero,
 192    float_round_down,
 193    float_round_up,
 194};
 195
 196void arch_fpop_start(CPUHexagonState *env)
 197{
 198    set_float_exception_flags(0, &env->fp_status);
 199    set_float_rounding_mode(
 200        softfloat_roundingmodes[fREAD_REG_FIELD(USR, USR_FPRND)],
 201        &env->fp_status);
 202}
 203
 204#ifdef CONFIG_USER_ONLY
 205/*
 206 * Hexagon Linux kernel only sets the relevant bits in USR (user status
 207 * register).  The exception isn't raised to user mode, so we don't
 208 * model it in qemu user mode.
 209 */
 210#define RAISE_FP_EXCEPTION   do {} while (0)
 211#endif
 212
 213#define SOFTFLOAT_TEST_FLAG(FLAG, MYF, MYE) \
 214    do { \
 215        if (flags & FLAG) { \
 216            if (GET_USR_FIELD(USR_##MYF) == 0) { \
 217                SET_USR_FIELD(USR_##MYF, 1); \
 218                if (GET_USR_FIELD(USR_##MYE)) { \
 219                    RAISE_FP_EXCEPTION; \
 220                } \
 221            } \
 222        } \
 223    } while (0)
 224
 225void arch_fpop_end(CPUHexagonState *env)
 226{
 227    const bool pkt_need_commit = true;
 228    int flags = get_float_exception_flags(&env->fp_status);
 229    if (flags != 0) {
 230        SOFTFLOAT_TEST_FLAG(float_flag_inexact, FPINPF, FPINPE);
 231        SOFTFLOAT_TEST_FLAG(float_flag_divbyzero, FPDBZF, FPDBZE);
 232        SOFTFLOAT_TEST_FLAG(float_flag_invalid, FPINVF, FPINVE);
 233        SOFTFLOAT_TEST_FLAG(float_flag_overflow, FPOVFF, FPOVFE);
 234        SOFTFLOAT_TEST_FLAG(float_flag_underflow, FPUNFF, FPUNFE);
 235    }
 236}
 237
 238int arch_sf_recip_common(float32 *Rs, float32 *Rt, float32 *Rd, int *adjust,
 239                         float_status *fp_status)
 240{
 241    int n_exp;
 242    int d_exp;
 243    int ret = 0;
 244    float32 RsV, RtV, RdV;
 245    int PeV = 0;
 246    RsV = *Rs;
 247    RtV = *Rt;
 248    if (float32_is_any_nan(RsV) && float32_is_any_nan(RtV)) {
 249        if (extract32(RsV & RtV, 22, 1) == 0) {
 250            float_raise(float_flag_invalid, fp_status);
 251        }
 252        RdV = RsV = RtV = float32_nan;
 253    } else if (float32_is_any_nan(RsV)) {
 254        if (extract32(RsV, 22, 1) == 0) {
 255            float_raise(float_flag_invalid, fp_status);
 256        }
 257        RdV = RsV = RtV = float32_nan;
 258    } else if (float32_is_any_nan(RtV)) {
 259        /* or put NaN in num/den fixup? */
 260        if (extract32(RtV, 22, 1) == 0) {
 261            float_raise(float_flag_invalid, fp_status);
 262        }
 263        RdV = RsV = RtV = float32_nan;
 264    } else if (float32_is_infinity(RsV) && float32_is_infinity(RtV)) {
 265        /* or put Inf in num fixup? */
 266        RdV = RsV = RtV = float32_nan;
 267        float_raise(float_flag_invalid, fp_status);
 268    } else if (float32_is_zero(RsV) && float32_is_zero(RtV)) {
 269        /* or put zero in num fixup? */
 270        RdV = RsV = RtV = float32_nan;
 271        float_raise(float_flag_invalid, fp_status);
 272    } else if (float32_is_zero(RtV)) {
 273        /* or put Inf in num fixup? */
 274        uint8_t RsV_sign = float32_is_neg(RsV);
 275        uint8_t RtV_sign = float32_is_neg(RtV);
 276        /* Check that RsV is NOT infinite before we overwrite it */
 277        if (!float32_is_infinity(RsV)) {
 278            float_raise(float_flag_divbyzero, fp_status);
 279        }
 280        RsV = infinite_float32(RsV_sign ^ RtV_sign);
 281        RtV = float32_one;
 282        RdV = float32_one;
 283    } else if (float32_is_infinity(RtV)) {
 284        RsV = make_float32(0x80000000 & (RsV ^ RtV));
 285        RtV = float32_one;
 286        RdV = float32_one;
 287    } else if (float32_is_zero(RsV)) {
 288        /* Does this just work itself out? */
 289        /* No, 0/Inf causes problems. */
 290        RsV = make_float32(0x80000000 & (RsV ^ RtV));
 291        RtV = float32_one;
 292        RdV = float32_one;
 293    } else if (float32_is_infinity(RsV)) {
 294        uint8_t RsV_sign = float32_is_neg(RsV);
 295        uint8_t RtV_sign = float32_is_neg(RtV);
 296        RsV = infinite_float32(RsV_sign ^ RtV_sign);
 297        RtV = float32_one;
 298        RdV = float32_one;
 299    } else {
 300        PeV = 0x00;
 301        /* Basic checks passed */
 302        n_exp = float32_getexp_raw(RsV);
 303        d_exp = float32_getexp_raw(RtV);
 304        if ((n_exp - d_exp + SF_BIAS) <= SF_MANTBITS) {
 305            /* Near quotient underflow / inexact Q */
 306            PeV = 0x80;
 307            RtV = float32_scalbn(RtV, -64, fp_status);
 308            RsV = float32_scalbn(RsV, 64, fp_status);
 309        } else if ((n_exp - d_exp + SF_BIAS) > (SF_MAXEXP - 24)) {
 310            /* Near quotient overflow */
 311            PeV = 0x40;
 312            RtV = float32_scalbn(RtV, 32, fp_status);
 313            RsV = float32_scalbn(RsV, -32, fp_status);
 314        } else if (n_exp <= SF_MANTBITS + 2) {
 315            RtV = float32_scalbn(RtV, 64, fp_status);
 316            RsV = float32_scalbn(RsV, 64, fp_status);
 317        } else if (d_exp <= 1) {
 318            RtV = float32_scalbn(RtV, 32, fp_status);
 319            RsV = float32_scalbn(RsV, 32, fp_status);
 320        } else if (d_exp > 252) {
 321            RtV = float32_scalbn(RtV, -32, fp_status);
 322            RsV = float32_scalbn(RsV, -32, fp_status);
 323        }
 324        RdV = 0;
 325        ret = 1;
 326    }
 327    *Rs = RsV;
 328    *Rt = RtV;
 329    *Rd = RdV;
 330    *adjust = PeV;
 331    return ret;
 332}
 333
 334int arch_sf_invsqrt_common(float32 *Rs, float32 *Rd, int *adjust,
 335                           float_status *fp_status)
 336{
 337    float32 RsV, RdV;
 338    int PeV = 0;
 339    int r_exp;
 340    int ret = 0;
 341    RsV = *Rs;
 342    if (float32_is_any_nan(RsV)) {
 343        if (extract32(RsV, 22, 1) == 0) {
 344            float_raise(float_flag_invalid, fp_status);
 345        }
 346        RdV = RsV = float32_nan;
 347    } else if (float32_lt(RsV, float32_zero, fp_status)) {
 348        /* Negative nonzero values are NaN */
 349        float_raise(float_flag_invalid, fp_status);
 350        RsV = float32_nan;
 351        RdV = float32_nan;
 352    } else if (float32_is_infinity(RsV)) {
 353        /* or put Inf in num fixup? */
 354        RsV = infinite_float32(1);
 355        RdV = infinite_float32(1);
 356    } else if (float32_is_zero(RsV)) {
 357        /* or put zero in num fixup? */
 358        RdV = float32_one;
 359    } else {
 360        PeV = 0x00;
 361        /* Basic checks passed */
 362        r_exp = float32_getexp(RsV);
 363        if (r_exp <= 24) {
 364            RsV = float32_scalbn(RsV, 64, fp_status);
 365            PeV = 0xe0;
 366        }
 367        RdV = 0;
 368        ret = 1;
 369    }
 370    *Rs = RsV;
 371    *Rd = RdV;
 372    *adjust = PeV;
 373    return ret;
 374}
 375
 376const uint8_t recip_lookup_table[128] = {
 377    0x0fe, 0x0fa, 0x0f6, 0x0f2, 0x0ef, 0x0eb, 0x0e7, 0x0e4,
 378    0x0e0, 0x0dd, 0x0d9, 0x0d6, 0x0d2, 0x0cf, 0x0cc, 0x0c9,
 379    0x0c6, 0x0c2, 0x0bf, 0x0bc, 0x0b9, 0x0b6, 0x0b3, 0x0b1,
 380    0x0ae, 0x0ab, 0x0a8, 0x0a5, 0x0a3, 0x0a0, 0x09d, 0x09b,
 381    0x098, 0x096, 0x093, 0x091, 0x08e, 0x08c, 0x08a, 0x087,
 382    0x085, 0x083, 0x080, 0x07e, 0x07c, 0x07a, 0x078, 0x075,
 383    0x073, 0x071, 0x06f, 0x06d, 0x06b, 0x069, 0x067, 0x065,
 384    0x063, 0x061, 0x05f, 0x05e, 0x05c, 0x05a, 0x058, 0x056,
 385    0x054, 0x053, 0x051, 0x04f, 0x04e, 0x04c, 0x04a, 0x049,
 386    0x047, 0x045, 0x044, 0x042, 0x040, 0x03f, 0x03d, 0x03c,
 387    0x03a, 0x039, 0x037, 0x036, 0x034, 0x033, 0x032, 0x030,
 388    0x02f, 0x02d, 0x02c, 0x02b, 0x029, 0x028, 0x027, 0x025,
 389    0x024, 0x023, 0x021, 0x020, 0x01f, 0x01e, 0x01c, 0x01b,
 390    0x01a, 0x019, 0x017, 0x016, 0x015, 0x014, 0x013, 0x012,
 391    0x011, 0x00f, 0x00e, 0x00d, 0x00c, 0x00b, 0x00a, 0x009,
 392    0x008, 0x007, 0x006, 0x005, 0x004, 0x003, 0x002, 0x000,
 393};
 394
 395const uint8_t invsqrt_lookup_table[128] = {
 396    0x069, 0x066, 0x063, 0x061, 0x05e, 0x05b, 0x059, 0x057,
 397    0x054, 0x052, 0x050, 0x04d, 0x04b, 0x049, 0x047, 0x045,
 398    0x043, 0x041, 0x03f, 0x03d, 0x03b, 0x039, 0x037, 0x036,
 399    0x034, 0x032, 0x030, 0x02f, 0x02d, 0x02c, 0x02a, 0x028,
 400    0x027, 0x025, 0x024, 0x022, 0x021, 0x01f, 0x01e, 0x01d,
 401    0x01b, 0x01a, 0x019, 0x017, 0x016, 0x015, 0x014, 0x012,
 402    0x011, 0x010, 0x00f, 0x00d, 0x00c, 0x00b, 0x00a, 0x009,
 403    0x008, 0x007, 0x006, 0x005, 0x004, 0x003, 0x002, 0x001,
 404    0x0fe, 0x0fa, 0x0f6, 0x0f3, 0x0ef, 0x0eb, 0x0e8, 0x0e4,
 405    0x0e1, 0x0de, 0x0db, 0x0d7, 0x0d4, 0x0d1, 0x0ce, 0x0cb,
 406    0x0c9, 0x0c6, 0x0c3, 0x0c0, 0x0be, 0x0bb, 0x0b8, 0x0b6,
 407    0x0b3, 0x0b1, 0x0af, 0x0ac, 0x0aa, 0x0a8, 0x0a5, 0x0a3,
 408    0x0a1, 0x09f, 0x09d, 0x09b, 0x099, 0x097, 0x095, 0x093,
 409    0x091, 0x08f, 0x08d, 0x08b, 0x089, 0x087, 0x086, 0x084,
 410    0x082, 0x080, 0x07f, 0x07d, 0x07b, 0x07a, 0x078, 0x077,
 411    0x075, 0x074, 0x072, 0x071, 0x06f, 0x06e, 0x06c, 0x06b,
 412};
 413