LXR qemu/target-sparc/vis

   1/*
   2 * VIS op helpers
   3 *
   4 *  Copyright (c) 2003-2005 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include "cpu.h"
  21#include "helper.h"
  22
  23/* This function uses non-native bit order */
  24#define GET_FIELD(X, FROM, TO)                                  \
  25    ((X) >> (63 - (TO)) & ((1ULL << ((TO) - (FROM) + 1)) - 1))
  26
  27/* This function uses the order in the manuals, i.e. bit 0 is 2^0 */
  28#define GET_FIELD_SP(X, FROM, TO)               \
  29    GET_FIELD(X, 63 - (TO), 63 - (FROM))
  30
  31target_ulong helper_array8(target_ulong pixel_addr, target_ulong cubesize)
  32{
  33    return (GET_FIELD_SP(pixel_addr, 60, 63) << (17 + 2 * cubesize)) |
  34        (GET_FIELD_SP(pixel_addr, 39, 39 + cubesize - 1) << (17 + cubesize)) |
  35        (GET_FIELD_SP(pixel_addr, 17 + cubesize - 1, 17) << 17) |
  36        (GET_FIELD_SP(pixel_addr, 56, 59) << 13) |
  37        (GET_FIELD_SP(pixel_addr, 35, 38) << 9) |
  38        (GET_FIELD_SP(pixel_addr, 13, 16) << 5) |
  39        (((pixel_addr >> 55) & 1) << 4) |
  40        (GET_FIELD_SP(pixel_addr, 33, 34) << 2) |
  41        GET_FIELD_SP(pixel_addr, 11, 12);
  42}
  43
  44#ifdef HOST_WORDS_BIGENDIAN
  45#define VIS_B64(n) b[7 - (n)]
  46#define VIS_W64(n) w[3 - (n)]
  47#define VIS_SW64(n) sw[3 - (n)]
  48#define VIS_L64(n) l[1 - (n)]
  49#define VIS_B32(n) b[3 - (n)]
  50#define VIS_W32(n) w[1 - (n)]
  51#else
  52#define VIS_B64(n) b[n]
  53#define VIS_W64(n) w[n]
  54#define VIS_SW64(n) sw[n]
  55#define VIS_L64(n) l[n]
  56#define VIS_B32(n) b[n]
  57#define VIS_W32(n) w[n]
  58#endif
  59
  60typedef union {
  61    uint8_t b[8];
  62    uint16_t w[4];
  63    int16_t sw[4];
  64    uint32_t l[2];
  65    uint64_t ll;
  66    float64 d;
  67} VIS64;
  68
  69typedef union {
  70    uint8_t b[4];
  71    uint16_t w[2];
  72    uint32_t l;
  73    float32 f;
  74} VIS32;
  75
  76uint64_t helper_fpmerge(uint64_t src1, uint64_t src2)
  77{
  78    VIS64 s, d;
  79
  80    s.ll = src1;
  81    d.ll = src2;
  82
  83    /* Reverse calculation order to handle overlap */
  84    d.VIS_B64(7) = s.VIS_B64(3);
  85    d.VIS_B64(6) = d.VIS_B64(3);
  86    d.VIS_B64(5) = s.VIS_B64(2);
  87    d.VIS_B64(4) = d.VIS_B64(2);
  88    d.VIS_B64(3) = s.VIS_B64(1);
  89    d.VIS_B64(2) = d.VIS_B64(1);
  90    d.VIS_B64(1) = s.VIS_B64(0);
  91    /* d.VIS_B64(0) = d.VIS_B64(0); */
  92
  93    return d.ll;
  94}
  95
  96uint64_t helper_fmul8x16(uint64_t src1, uint64_t src2)
  97{
  98    VIS64 s, d;
  99    uint32_t tmp;
 100
 101    s.ll = src1;
 102    d.ll = src2;
 103
 104#define PMUL(r)                                                 \
 105    tmp = (int32_t)d.VIS_SW64(r) * (int32_t)s.VIS_B64(r);       \
 106    if ((tmp & 0xff) > 0x7f) {                                  \
 107        tmp += 0x100;                                           \
 108    }                                                           \
 109    d.VIS_W64(r) = tmp >> 8;
 110
 111    PMUL(0);
 112    PMUL(1);
 113    PMUL(2);
 114    PMUL(3);
 115#undef PMUL
 116
 117    return d.ll;
 118}
 119
 120uint64_t helper_fmul8x16al(uint64_t src1, uint64_t src2)
 121{
 122    VIS64 s, d;
 123    uint32_t tmp;
 124
 125    s.ll = src1;
 126    d.ll = src2;
 127
 128#define PMUL(r)                                                 \
 129    tmp = (int32_t)d.VIS_SW64(1) * (int32_t)s.VIS_B64(r);       \
 130    if ((tmp & 0xff) > 0x7f) {                                  \
 131        tmp += 0x100;                                           \
 132    }                                                           \
 133    d.VIS_W64(r) = tmp >> 8;
 134
 135    PMUL(0);
 136    PMUL(1);
 137    PMUL(2);
 138    PMUL(3);
 139#undef PMUL
 140
 141    return d.ll;
 142}
 143
 144uint64_t helper_fmul8x16au(uint64_t src1, uint64_t src2)
 145{
 146    VIS64 s, d;
 147    uint32_t tmp;
 148
 149    s.ll = src1;
 150    d.ll = src2;
 151
 152#define PMUL(r)                                                 \
 153    tmp = (int32_t)d.VIS_SW64(0) * (int32_t)s.VIS_B64(r);       \
 154    if ((tmp & 0xff) > 0x7f) {                                  \
 155        tmp += 0x100;                                           \
 156    }                                                           \
 157    d.VIS_W64(r) = tmp >> 8;
 158
 159    PMUL(0);
 160    PMUL(1);
 161    PMUL(2);
 162    PMUL(3);
 163#undef PMUL
 164
 165    return d.ll;
 166}
 167
 168uint64_t helper_fmul8sux16(uint64_t src1, uint64_t src2)
 169{
 170    VIS64 s, d;
 171    uint32_t tmp;
 172
 173    s.ll = src1;
 174    d.ll = src2;
 175
 176#define PMUL(r)                                                         \
 177    tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8);       \
 178    if ((tmp & 0xff) > 0x7f) {                                          \
 179        tmp += 0x100;                                                   \
 180    }                                                                   \
 181    d.VIS_W64(r) = tmp >> 8;
 182
 183    PMUL(0);
 184    PMUL(1);
 185    PMUL(2);
 186    PMUL(3);
 187#undef PMUL
 188
 189    return d.ll;
 190}
 191
 192uint64_t helper_fmul8ulx16(uint64_t src1, uint64_t src2)
 193{
 194    VIS64 s, d;
 195    uint32_t tmp;
 196
 197    s.ll = src1;
 198    d.ll = src2;
 199
 200#define PMUL(r)                                                         \
 201    tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2));        \
 202    if ((tmp & 0xff) > 0x7f) {                                          \
 203        tmp += 0x100;                                                   \
 204    }                                                                   \
 205    d.VIS_W64(r) = tmp >> 8;
 206
 207    PMUL(0);
 208    PMUL(1);
 209    PMUL(2);
 210    PMUL(3);
 211#undef PMUL
 212
 213    return d.ll;
 214}
 215
 216uint64_t helper_fmuld8sux16(uint64_t src1, uint64_t src2)
 217{
 218    VIS64 s, d;
 219    uint32_t tmp;
 220
 221    s.ll = src1;
 222    d.ll = src2;
 223
 224#define PMUL(r)                                                         \
 225    tmp = (int32_t)d.VIS_SW64(r) * ((int32_t)s.VIS_SW64(r) >> 8);       \
 226    if ((tmp & 0xff) > 0x7f) {                                          \
 227        tmp += 0x100;                                                   \
 228    }                                                                   \
 229    d.VIS_L64(r) = tmp;
 230
 231    /* Reverse calculation order to handle overlap */
 232    PMUL(1);
 233    PMUL(0);
 234#undef PMUL
 235
 236    return d.ll;
 237}
 238
 239uint64_t helper_fmuld8ulx16(uint64_t src1, uint64_t src2)
 240{
 241    VIS64 s, d;
 242    uint32_t tmp;
 243
 244    s.ll = src1;
 245    d.ll = src2;
 246
 247#define PMUL(r)                                                         \
 248    tmp = (int32_t)d.VIS_SW64(r) * ((uint32_t)s.VIS_B64(r * 2));        \
 249    if ((tmp & 0xff) > 0x7f) {                                          \
 250        tmp += 0x100;                                                   \
 251    }                                                                   \
 252    d.VIS_L64(r) = tmp;
 253
 254    /* Reverse calculation order to handle overlap */
 255    PMUL(1);
 256    PMUL(0);
 257#undef PMUL
 258
 259    return d.ll;
 260}
 261
 262uint64_t helper_fexpand(uint64_t src1, uint64_t src2)
 263{
 264    VIS32 s;
 265    VIS64 d;
 266
 267    s.l = (uint32_t)src1;
 268    d.ll = src2;
 269    d.VIS_W64(0) = s.VIS_B32(0) << 4;
 270    d.VIS_W64(1) = s.VIS_B32(1) << 4;
 271    d.VIS_W64(2) = s.VIS_B32(2) << 4;
 272    d.VIS_W64(3) = s.VIS_B32(3) << 4;
 273
 274    return d.ll;
 275}
 276
 277#define VIS_HELPER(name, F)                             \
 278    uint64_t name##16(uint64_t src1, uint64_t src2)     \
 279    {                                                   \
 280        VIS64 s, d;                                     \
 281                                                        \
 282        s.ll = src1;                                    \
 283        d.ll = src2;                                    \
 284                                                        \
 285        d.VIS_W64(0) = F(d.VIS_W64(0), s.VIS_W64(0));   \
 286        d.VIS_W64(1) = F(d.VIS_W64(1), s.VIS_W64(1));   \
 287        d.VIS_W64(2) = F(d.VIS_W64(2), s.VIS_W64(2));   \
 288        d.VIS_W64(3) = F(d.VIS_W64(3), s.VIS_W64(3));   \
 289                                                        \
 290        return d.ll;                                    \
 291    }                                                   \
 292                                                        \
 293    uint32_t name##16s(uint32_t src1, uint32_t src2)    \
 294    {                                                   \
 295        VIS32 s, d;                                     \
 296                                                        \
 297        s.l = src1;                                     \
 298        d.l = src2;                                     \
 299                                                        \
 300        d.VIS_W32(0) = F(d.VIS_W32(0), s.VIS_W32(0));   \
 301        d.VIS_W32(1) = F(d.VIS_W32(1), s.VIS_W32(1));   \
 302                                                        \
 303        return d.l;                                     \
 304    }                                                   \
 305                                                        \
 306    uint64_t name##32(uint64_t src1, uint64_t src2)     \
 307    {                                                   \
 308        VIS64 s, d;                                     \
 309                                                        \
 310        s.ll = src1;                                    \
 311        d.ll = src2;                                    \
 312                                                        \
 313        d.VIS_L64(0) = F(d.VIS_L64(0), s.VIS_L64(0));   \
 314        d.VIS_L64(1) = F(d.VIS_L64(1), s.VIS_L64(1));   \
 315                                                        \
 316        return d.ll;                                    \
 317    }                                                   \
 318                                                        \
 319    uint32_t name##32s(uint32_t src1, uint32_t src2)    \
 320    {                                                   \
 321        VIS32 s, d;                                     \
 322                                                        \
 323        s.l = src1;                                     \
 324        d.l = src2;                                     \
 325                                                        \
 326        d.l = F(d.l, s.l);                              \
 327                                                        \
 328        return d.l;                                     \
 329    }
 330
 331#define FADD(a, b) ((a) + (b))
 332#define FSUB(a, b) ((a) - (b))
 333VIS_HELPER(helper_fpadd, FADD)
 334VIS_HELPER(helper_fpsub, FSUB)
 335
 336#define VIS_CMPHELPER(name, F)                                    \
 337    uint64_t name##16(uint64_t src1, uint64_t src2)               \
 338    {                                                             \
 339        VIS64 s, d;                                               \
 340                                                                  \
 341        s.ll = src1;                                              \
 342        d.ll = src2;                                              \
 343                                                                  \
 344        d.VIS_W64(0) = F(s.VIS_W64(0), d.VIS_W64(0)) ? 1 : 0;     \
 345        d.VIS_W64(0) |= F(s.VIS_W64(1), d.VIS_W64(1)) ? 2 : 0;    \
 346        d.VIS_W64(0) |= F(s.VIS_W64(2), d.VIS_W64(2)) ? 4 : 0;    \
 347        d.VIS_W64(0) |= F(s.VIS_W64(3), d.VIS_W64(3)) ? 8 : 0;    \
 348        d.VIS_W64(1) = d.VIS_W64(2) = d.VIS_W64(3) = 0;           \
 349                                                                  \
 350        return d.ll;                                              \
 351    }                                                             \
 352                                                                  \
 353    uint64_t name##32(uint64_t src1, uint64_t src2)               \
 354    {                                                             \
 355        VIS64 s, d;                                               \
 356                                                                  \
 357        s.ll = src1;                                              \
 358        d.ll = src2;                                              \
 359                                                                  \
 360        d.VIS_L64(0) = F(s.VIS_L64(0), d.VIS_L64(0)) ? 1 : 0;     \
 361        d.VIS_L64(0) |= F(s.VIS_L64(1), d.VIS_L64(1)) ? 2 : 0;    \
 362        d.VIS_L64(1) = 0;                                         \
 363                                                                  \
 364        return d.ll;                                              \
 365    }
 366
 367#define FCMPGT(a, b) ((a) > (b))
 368#define FCMPEQ(a, b) ((a) == (b))
 369#define FCMPLE(a, b) ((a) <= (b))
 370#define FCMPNE(a, b) ((a) != (b))
 371
 372VIS_CMPHELPER(helper_fcmpgt, FCMPGT)
 373VIS_CMPHELPER(helper_fcmpeq, FCMPEQ)
 374VIS_CMPHELPER(helper_fcmple, FCMPLE)
 375VIS_CMPHELPER(helper_fcmpne, FCMPNE)
 376
 377uint64_t helper_pdist(uint64_t sum, uint64_t src1, uint64_t src2)
 378{
 379    int i;
 380    for (i = 0; i < 8; i++) {
 381        int s1, s2;
 382
 383        s1 = (src1 >> (56 - (i * 8))) & 0xff;
 384        s2 = (src2 >> (56 - (i * 8))) & 0xff;
 385
 386        /* Absolute value of difference. */
 387        s1 -= s2;
 388        if (s1 < 0) {
 389            s1 = -s1;
 390        }
 391
 392        sum += s1;
 393    }
 394
 395    return sum;
 396}
 397
 398uint32_t helper_fpack16(uint64_t gsr, uint64_t rs2)
 399{
 400    int scale = (gsr >> 3) & 0xf;
 401    uint32_t ret = 0;
 402    int byte;
 403
 404    for (byte = 0; byte < 4; byte++) {
 405        uint32_t val;
 406        int16_t src = rs2 >> (byte * 16);
 407        int32_t scaled = src << scale;
 408        int32_t from_fixed = scaled >> 7;
 409
 410        val = (from_fixed < 0 ?  0 :
 411               from_fixed > 255 ?  255 : from_fixed);
 412
 413        ret |= val << (8 * byte);
 414    }
 415
 416    return ret;
 417}
 418
 419uint64_t helper_fpack32(uint64_t gsr, uint64_t rs1, uint64_t rs2)
 420{
 421    int scale = (gsr >> 3) & 0x1f;
 422    uint64_t ret = 0;
 423    int word;
 424
 425    ret = (rs1 << 8) & ~(0x000000ff000000ffULL);
 426    for (word = 0; word < 2; word++) {
 427        uint64_t val;
 428        int32_t src = rs2 >> (word * 32);
 429        int64_t scaled = (int64_t)src << scale;
 430        int64_t from_fixed = scaled >> 23;
 431
 432        val = (from_fixed < 0 ? 0 :
 433               (from_fixed > 255) ? 255 : from_fixed);
 434
 435        ret |= val << (32 * word);
 436    }
 437
 438    return ret;
 439}
 440
 441uint32_t helper_fpackfix(uint64_t gsr, uint64_t rs2)
 442{
 443    int scale = (gsr >> 3) & 0x1f;
 444    uint32_t ret = 0;
 445    int word;
 446
 447    for (word = 0; word < 2; word++) {
 448        uint32_t val;
 449        int32_t src = rs2 >> (word * 32);
 450        int64_t scaled = src << scale;
 451        int64_t from_fixed = scaled >> 16;
 452
 453        val = (from_fixed < -32768 ? -32768 :
 454               from_fixed > 32767 ?  32767 : from_fixed);
 455
 456        ret |= (val & 0xffff) << (word * 16);
 457    }
 458
 459    return ret;
 460}
 461
 462uint64_t helper_bshuffle(uint64_t gsr, uint64_t src1, uint64_t src2)
 463{
 464    union {
 465        uint64_t ll[2];
 466        uint8_t b[16];
 467    } s;
 468    VIS64 r;
 469    uint32_t i, mask, host;
 470
 471    /* Set up S such that we can index across all of the bytes.  */
 472#ifdef HOST_WORDS_BIGENDIAN
 473    s.ll[0] = src1;
 474    s.ll[1] = src2;
 475    host = 0;
 476#else
 477    s.ll[1] = src1;
 478    s.ll[0] = src2;
 479    host = 15;
 480#endif
 481    mask = gsr >> 32;
 482
 483    for (i = 0; i < 8; ++i) {
 484        unsigned e = (mask >> (28 - i*4)) & 0xf;
 485        r.VIS_B64(i) = s.b[e ^ host];
 486    }
 487
 488    return r.ll;
 489}
 490