qemu/target/sparc/vis_helper.c
<<
>>
Prefs
   1/*
   2 * VIS op helpers
   3 *
   4 *  Copyright (c) 2003-2005 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include "cpu.h"
  22#include "exec/helper-proto.h"
  23
  24/* This function uses non-native bit order */
  25#define GET_FIELD(X, FROM, TO)                                  \
  26    ((X) >> (63 - (TO)) & ((1ULL << ((TO) - (FROM) + 1)) - 1))
  27
  28/* This function uses the order in the manuals, i.e. bit 0 is 2^0 */
  29#define GET_FIELD_SP(X, FROM, TO)               \
  30    GET_FIELD(X, 63 - (TO), 63 - (FROM))
  31
  32target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize)
  33{
  34    return (GET_FIELD_SP(pixel_addr, 60, 63) << (17 + 2 * cubesize)) |
  35        (GET_FIELD_SP(pixel_addr, 39, 39 + cubesize - 1) << (17 + cubesize)) |
  36        (GET_FIELD_SP(pixel_addr, 17 + cubesize - 1, 17) << 17) |
  37        (GET_FIELD_SP(pixel_addr, 56, 59) << 13) |
  38        (GET_FIELD_SP(pixel_addr, 35, 38) << 9) |
  39        (GET_FIELD_SP(pixel_addr, 13, 16) << 5) |
  40        (((pixel_addr >> 55) & 1) << 4) |
  41        (GET_FIELD_SP(pixel_addr, 33, 34) << 2) |
  42        GET_FIELD_SP(pixel_addr, 11, 12);
  43}
  44
  45#ifdef HOST_WORDS_BIGENDIAN
  46#define VIS_B64(n) b[7 - (n)]
  47#define VIS_W64(n) w[3 - (n)]
  48#define VIS_SW64(n) sw[3 - (n)]
  49#define VIS_L64(n) l[1 - (n)]
  50#define VIS_B32(n) b[3 - (n)]
  51#define VIS_W32(n) w[1 - (n)]
  52#else
  53#define VIS_B64(n) b[n]
  54#define VIS_W64(n) w[n]
  55#define VIS_SW64(n) sw[n]
  56#define VIS_L64(n) l[n]
  57#define VIS_B32(n) b[n]
  58#define VIS_W32(n) w[n]
  59#endif
  60
  61typedef union {
  62    uint8_t b[8];
  63    uint16_t w[4];
  64    int16_t sw[4];
  65    uint32_t l[2];
  66    uint64_t ll;
  67    float64 d;
  68} VIS64;
  69
  70typedef union {
  71    uint8_t b[4];
  72    uint16_t w[2];
  73    uint32_t l;
  74    float32 f;
  75} VIS32;
  76
  77uint64_t helper_fpmerge(uint64_t src1, uint64_t src2)
  78{
  79    VIS64 s, d;
  80
  81    s.ll = src1;
  82    d.ll = src2;
  83
  84    /* Reverse calculation order to handle overlap */
  85    d.VIS_B64(7) = s.VIS_B64(3);
  86    d.VIS_B64(6) = d.VIS_B64(3);
  87    d.VIS_B64(5) = s.VIS_B64(2);
  88    d.VIS_B64(4) = d.VIS_B64(2);
  89    d.VIS_B64(3) = s.VIS_B64(1);
  90    d.VIS_B64(2) = d.VIS_B64(1);
  91    d.VIS_B64(1) = s.VIS_B64(0);
  92    /* d.VIS_B64(0) = d.VIS_B64(0); */
  93
  94    return d.ll;
  95}
  96
  97uint64_t helper_fmul8x16(uint64_t src1, uint64_t src2)
  98{
  99    VIS64 s, d;
 100    uint32_t tmp;
 101
 102    s.ll = src1;
 103    d.ll = src2;
 104
 105#define PMUL(r)                                                 \
 106    tmp = (int32_t)d.VIS_SW64(r) * (int32_t)s.VIS_B64(r);       \
 107    if ((tmp & 0xff) > 0x7f) {                                  \
 108        tmp += 0x100;                                           \
 109    }                                                           \
 110    d.VIS_W64(r) = tmp >> 8;
 111
 112    PMUL(0);
 113    PMUL(1);
 114    PMUL(2);
 115    PMUL(3);
 116#undef PMUL
 117
 118    return d.ll;
 119}
 120
 121uint64_t helper_fmul8x16al(uint64_t src1, uint64_t src2)
 122{
 123    VIS64 s, d;
 124    uint32_t tmp;
 125
 126    s.ll = src1;
 127    d.ll = src2;
 128
 129#define PMUL(r)                                                 \
 130    tmp = (int32_t)d.VIS_SW64(1) * (int32_t)s.VIS_B64(r);       \
 131    if ((tmp & 0xff) > 0x7f) {                                  \
 132        tmp += 0x100;                                           \
 133    }                                                           \
 134    d.VIS_W64(r) = tmp >> 8;
 135
 136    PMUL(0);
 137    PMUL(1);
 138    PMUL(2);
 139    PMUL(3);
 140#undef PMUL
 141
 142    return d.ll;
 143}
 144
 145uint64_t helper_fmul8x16au(uint64_t src1, uint64_t src2)
 146{
 147    VIS64 s, d;
 148    uint32_t tmp;
 149
 150    s.ll = src1;
 151    d.ll = src2;
 152
 153#define PMUL(r)                                                 \
 154    tmp = (int32_t)d.VIS_SW64(0) * (int32_t)s.VIS_B64(r);       \
 155    if ((tmp & 0xff) > 0x7f) {                                  \
 156        tmp += 0x100;                                           \
 157    }                                                           \
 158    d.VIS_W64(r) = tmp >> 8;
 159
 160    PMUL(0);
 161    PMUL(1);
 162    PMUL(2);
 163    PMUL(3);
 164#undef PMUL
 165
 166    return d.ll;
 167}
 168
 169uint64_t helper_fmul8sux16(uint64_t src1, uint64_t src2)
 170{
 171    VIS64 s, d;
 172    uint32_t tmp;
 173
 174    s.ll = src1;
 175    d.ll = src2;
 176
 177#define PMUL(r)                                                         \
 178    tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8);       \
 179    if ((tmp & 0xff) > 0x7f) {                                          \
 180        tmp += 0x100;                                                   \
 181    }                                                                   \
 182    d.VIS_W64(r) = tmp >> 8;
 183
 184    PMUL(0);
 185    PMUL(1);
 186    PMUL(2);
 187    PMUL(3);
 188#undef PMUL
 189
 190    return d.ll;
 191}
 192
 193uint64_t helper_fmul8ulx16(uint64_t src1, uint64_t src2)
 194{
 195    VIS64 s, d;
 196    uint32_t tmp;
 197
 198    s.ll = src1;
 199    d.ll = src2;
 200
 201#define PMUL(r)                                                         \
 202    tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2));        \
 203    if ((tmp & 0xff) > 0x7f) {                                          \
 204        tmp += 0x100;                                                   \
 205    }                                                                   \
 206    d.VIS_W64(r) = tmp >> 8;
 207
 208    PMUL(0);
 209    PMUL(1);
 210    PMUL(2);
 211    PMUL(3);
 212#undef PMUL
 213
 214    return d.ll;
 215}
 216
 217uint64_t helper_fmuld8sux16(uint64_t src1, uint64_t src2)
 218{
 219    VIS64 s, d;
 220    uint32_t tmp;
 221
 222    s.ll = src1;
 223    d.ll = src2;
 224
 225#define PMUL(r)                                                         \
 226    tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8);       \
 227    if ((tmp & 0xff) > 0x7f) {                                          \
 228        tmp += 0x100;                                                   \
 229    }                                                                   \
 230    d.VIS_L64(r) = tmp;
 231
 232    /* Reverse calculation order to handle overlap */
 233    PMUL(1);
 234    PMUL(0);
 235#undef PMUL
 236
 237    return d.ll;
 238}
 239
 240uint64_t helper_fmuld8ulx16(uint64_t src1, uint64_t src2)
 241{
 242    VIS64 s, d;
 243    uint32_t tmp;
 244
 245    s.ll = src1;
 246    d.ll = src2;
 247
 248#define PMUL(r)                                                         \
 249    tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2));        \
 250    if ((tmp & 0xff) > 0x7f) {                                          \
 251        tmp += 0x100;                                                   \
 252    }                                                                   \
 253    d.VIS_L64(r) = tmp;
 254
 255    /* Reverse calculation order to handle overlap */
 256    PMUL(1);
 257    PMUL(0);
 258#undef PMUL
 259
 260    return d.ll;
 261}
 262
 263uint64_t helper_fexpand(uint64_t src1, uint64_t src2)
 264{
 265    VIS32 s;
 266    VIS64 d;
 267
 268    s.l = (uint32_t)src1;
 269    d.ll = src2;
 270    d.VIS_W64(0) = s.VIS_B32(0) << 4;
 271    d.VIS_W64(1) = s.VIS_B32(1) << 4;
 272    d.VIS_W64(2) = s.VIS_B32(2) << 4;
 273    d.VIS_W64(3) = s.VIS_B32(3) << 4;
 274
 275    return d.ll;
 276}
 277
 278#define VIS_HELPER(name, F)                             \
 279    uint64_t name##16(uint64_t src1, uint64_t src2)     \
 280    {                                                   \
 281        VIS64 s, d;                                     \
 282                                                        \
 283        s.ll = src1;                                    \
 284        d.ll = src2;                                    \
 285                                                        \
 286        d.VIS_W64(0) = F(d.VIS_W64(0), s.VIS_W64(0));   \
 287        d.VIS_W64(1) = F(d.VIS_W64(1), s.VIS_W64(1));   \
 288        d.VIS_W64(2) = F(d.VIS_W64(2), s.VIS_W64(2));   \
 289        d.VIS_W64(3) = F(d.VIS_W64(3), s.VIS_W64(3));   \
 290                                                        \
 291        return d.ll;                                    \
 292    }                                                   \
 293                                                        \
 294    uint32_t name##16s(uint32_t src1, uint32_t src2)    \
 295    {                                                   \
 296        VIS32 s, d;                                     \
 297                                                        \
 298        s.l = src1;                                     \
 299        d.l = src2;                                     \
 300                                                        \
 301        d.VIS_W32(0) = F(d.VIS_W32(0), s.VIS_W32(0));   \
 302        d.VIS_W32(1) = F(d.VIS_W32(1), s.VIS_W32(1));   \
 303                                                        \
 304        return d.l;                                     \
 305    }                                                   \
 306                                                        \
 307    uint64_t name##32(uint64_t src1, uint64_t src2)     \
 308    {                                                   \
 309        VIS64 s, d;                                     \
 310                                                        \
 311        s.ll = src1;                                    \
 312        d.ll = src2;                                    \
 313                                                        \
 314        d.VIS_L64(0) = F(d.VIS_L64(0), s.VIS_L64(0));   \
 315        d.VIS_L64(1) = F(d.VIS_L64(1), s.VIS_L64(1));   \
 316                                                        \
 317        return d.ll;                                    \
 318    }                                                   \
 319                                                        \
 320    uint32_t name##32s(uint32_t src1, uint32_t src2)    \
 321    {                                                   \
 322        VIS32 s, d;                                     \
 323                                                        \
 324        s.l = src1;                                     \
 325        d.l = src2;                                     \
 326                                                        \
 327        d.l = F(d.l, s.l);                              \
 328                                                        \
 329        return d.l;                                     \
 330    }
 331
 332#define FADD(a, b) ((a) + (b))
 333#define FSUB(a, b) ((a) - (b))
 334VIS_HELPER(helper_fpadd, FADD)
 335VIS_HELPER(helper_fpsub, FSUB)
 336
 337#define VIS_CMPHELPER(name, F)                                    \
 338    uint64_t name##16(uint64_t src1, uint64_t src2)               \
 339    {                                                             \
 340        VIS64 s, d;                                               \
 341                                                                  \
 342        s.ll = src1;                                              \
 343        d.ll = src2;                                              \
 344                                                                  \
 345        d.VIS_W64(0) = F(s.VIS_W64(0), d.VIS_W64(0)) ? 1 : 0;     \
 346        d.VIS_W64(0) |= F(s.VIS_W64(1), d.VIS_W64(1)) ? 2 : 0;    \
 347        d.VIS_W64(0) |= F(s.VIS_W64(2), d.VIS_W64(2)) ? 4 : 0;    \
 348        d.VIS_W64(0) |= F(s.VIS_W64(3), d.VIS_W64(3)) ? 8 : 0;    \
 349        d.VIS_W64(1) = d.VIS_W64(2) = d.VIS_W64(3) = 0;           \
 350                                                                  \
 351        return d.ll;                                              \
 352    }                                                             \
 353                                                                  \
 354    uint64_t name##32(uint64_t src1, uint64_t src2)               \
 355    {                                                             \
 356        VIS64 s, d;                                               \
 357                                                                  \
 358        s.ll = src1;                                              \
 359        d.ll = src2;                                              \
 360                                                                  \
 361        d.VIS_L64(0) = F(s.VIS_L64(0), d.VIS_L64(0)) ? 1 : 0;     \
 362        d.VIS_L64(0) |= F(s.VIS_L64(1), d.VIS_L64(1)) ? 2 : 0;    \
 363        d.VIS_L64(1) = 0;                                         \
 364                                                                  \
 365        return d.ll;                                              \
 366    }
 367
 368#define FCMPGT(a, b) ((a) > (b))
 369#define FCMPEQ(a, b) ((a) == (b))
 370#define FCMPLE(a, b) ((a) <= (b))
 371#define FCMPNE(a, b) ((a) != (b))
 372
 373VIS_CMPHELPER(helper_fcmpgt, FCMPGT)
 374VIS_CMPHELPER(helper_fcmpeq, FCMPEQ)
 375VIS_CMPHELPER(helper_fcmple, FCMPLE)
 376VIS_CMPHELPER(helper_fcmpne, FCMPNE)
 377
 378uint64_t helper_pdist(uint64_t sum, uint64_t src1, uint64_t src2)
 379{
 380    int i;
 381    for (i = 0; i < 8; i++) {
 382        int s1, s2;
 383
 384        s1 = (src1 >> (56 - (i * 8))) & 0xff;
 385        s2 = (src2 >> (56 - (i * 8))) & 0xff;
 386
 387        /* Absolute value of difference. */
 388        s1 -= s2;
 389        if (s1 < 0) {
 390            s1 = -s1;
 391        }
 392
 393        sum += s1;
 394    }
 395
 396    return sum;
 397}
 398
 399uint32_t helper_fpack16(uint64_t gsr, uint64_t rs2)
 400{
 401    int scale = (gsr >> 3) & 0xf;
 402    uint32_t ret = 0;
 403    int byte;
 404
 405    for (byte = 0; byte < 4; byte++) {
 406        uint32_t val;
 407        int16_t src = rs2 >> (byte * 16);
 408        int32_t scaled = src << scale;
 409        int32_t from_fixed = scaled >> 7;
 410
 411        val = (from_fixed < 0 ?  0 :
 412               from_fixed > 255 ?  255 : from_fixed);
 413
 414        ret |= val << (8 * byte);
 415    }
 416
 417    return ret;
 418}
 419
 420uint64_t helper_fpack32(uint64_t gsr, uint64_t rs1, uint64_t rs2)
 421{
 422    int scale = (gsr >> 3) & 0x1f;
 423    uint64_t ret = 0;
 424    int word;
 425
 426    ret = (rs1 << 8) & ~(0x000000ff000000ffULL);
 427    for (word = 0; word < 2; word++) {
 428        uint64_t val;
 429        int32_t src = rs2 >> (word * 32);
 430        int64_t scaled = (int64_t)src << scale;
 431        int64_t from_fixed = scaled >> 23;
 432
 433        val = (from_fixed < 0 ? 0 :
 434               (from_fixed > 255) ? 255 : from_fixed);
 435
 436        ret |= val << (32 * word);
 437    }
 438
 439    return ret;
 440}
 441
 442uint32_t helper_fpackfix(uint64_t gsr, uint64_t rs2)
 443{
 444    int scale = (gsr >> 3) & 0x1f;
 445    uint32_t ret = 0;
 446    int word;
 447
 448    for (word = 0; word < 2; word++) {
 449        uint32_t val;
 450        int32_t src = rs2 >> (word * 32);
 451        int64_t scaled = (int64_t)src << scale;
 452        int64_t from_fixed = scaled >> 16;
 453
 454        val = (from_fixed < -32768 ? -32768 :
 455               from_fixed > 32767 ?  32767 : from_fixed);
 456
 457        ret |= (val & 0xffff) << (word * 16);
 458    }
 459
 460    return ret;
 461}
 462
 463uint64_t helper_bshuffle(uint64_t gsr, uint64_t src1, uint64_t src2)
 464{
 465    union {
 466        uint64_t ll[2];
 467        uint8_t b[16];
 468    } s;
 469    VIS64 r;
 470    uint32_t i, mask, host;
 471
 472    /* Set up S such that we can index across all of the bytes.  */
 473#ifdef HOST_WORDS_BIGENDIAN
 474    s.ll[0] = src1;
 475    s.ll[1] = src2;
 476    host = 0;
 477#else
 478    s.ll[1] = src1;
 479    s.ll[0] = src2;
 480    host = 15;
 481#endif
 482    mask = gsr >> 32;
 483
 484    for (i = 0; i < 8; ++i) {
 485        unsigned e = (mask >> (28 - i*4)) & 0xf;
 486        r.VIS_B64(i) = s.b[e ^ host];
 487    }
 488
 489    return r.ll;
 490}
 491