qemu/target-ppc/int_helper.c
<<
>>
Prefs
   1/*
   2 *  PowerPC integer and vector emulation helpers for QEMU.
   3 *
   4 *  Copyright (c) 2003-2007 Jocelyn Mayer
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include "qemu/osdep.h"
  20#include "cpu.h"
  21#include "exec/exec-all.h"
  22#include "qemu/host-utils.h"
  23#include "exec/helper-proto.h"
  24#include "crypto/aes.h"
  25
  26#include "helper_regs.h"
  27/*****************************************************************************/
  28/* Fixed point operations helpers */
  29
  30target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
  31                           uint32_t oe)
  32{
  33    uint64_t rt = 0;
  34    int overflow = 0;
  35
  36    uint64_t dividend = (uint64_t)ra << 32;
  37    uint64_t divisor = (uint32_t)rb;
  38
  39    if (unlikely(divisor == 0)) {
  40        overflow = 1;
  41    } else {
  42        rt = dividend / divisor;
  43        overflow = rt > UINT32_MAX;
  44    }
  45
  46    if (unlikely(overflow)) {
  47        rt = 0; /* Undefined */
  48    }
  49
  50    if (oe) {
  51        if (unlikely(overflow)) {
  52            env->so = env->ov = 1;
  53        } else {
  54            env->ov = 0;
  55        }
  56    }
  57
  58    return (target_ulong)rt;
  59}
  60
  61target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
  62                          uint32_t oe)
  63{
  64    int64_t rt = 0;
  65    int overflow = 0;
  66
  67    int64_t dividend = (int64_t)ra << 32;
  68    int64_t divisor = (int64_t)((int32_t)rb);
  69
  70    if (unlikely((divisor == 0) ||
  71                 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
  72        overflow = 1;
  73    } else {
  74        rt = dividend / divisor;
  75        overflow = rt != (int32_t)rt;
  76    }
  77
  78    if (unlikely(overflow)) {
  79        rt = 0; /* Undefined */
  80    }
  81
  82    if (oe) {
  83        if (unlikely(overflow)) {
  84            env->so = env->ov = 1;
  85        } else {
  86            env->ov = 0;
  87        }
  88    }
  89
  90    return (target_ulong)rt;
  91}
  92
  93#if defined(TARGET_PPC64)
  94
  95uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
  96{
  97    uint64_t rt = 0;
  98    int overflow = 0;
  99
 100    overflow = divu128(&rt, &ra, rb);
 101
 102    if (unlikely(overflow)) {
 103        rt = 0; /* Undefined */
 104    }
 105
 106    if (oe) {
 107        if (unlikely(overflow)) {
 108            env->so = env->ov = 1;
 109        } else {
 110            env->ov = 0;
 111        }
 112    }
 113
 114    return rt;
 115}
 116
 117uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
 118{
 119    int64_t rt = 0;
 120    int64_t ra = (int64_t)rau;
 121    int64_t rb = (int64_t)rbu;
 122    int overflow = divs128(&rt, &ra, rb);
 123
 124    if (unlikely(overflow)) {
 125        rt = 0; /* Undefined */
 126    }
 127
 128    if (oe) {
 129
 130        if (unlikely(overflow)) {
 131            env->so = env->ov = 1;
 132        } else {
 133            env->ov = 0;
 134        }
 135    }
 136
 137    return rt;
 138}
 139
 140#endif
 141
 142
 143target_ulong helper_cntlzw(target_ulong t)
 144{
 145    return clz32(t);
 146}
 147
 148#if defined(TARGET_PPC64)
 149target_ulong helper_cntlzd(target_ulong t)
 150{
 151    return clz64(t);
 152}
 153#endif
 154
 155#if defined(TARGET_PPC64)
 156
 157uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
 158{
 159    int i;
 160    uint64_t ra = 0;
 161
 162    for (i = 0; i < 8; i++) {
 163        int index = (rs >> (i*8)) & 0xFF;
 164        if (index < 64) {
 165            if (rb & (1ull << (63-index))) {
 166                ra |= 1 << i;
 167            }
 168        }
 169    }
 170    return ra;
 171}
 172
 173#endif
 174
 175target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
 176{
 177    target_ulong mask = 0xff;
 178    target_ulong ra = 0;
 179    int i;
 180
 181    for (i = 0; i < sizeof(target_ulong); i++) {
 182        if ((rs & mask) == (rb & mask)) {
 183            ra |= mask;
 184        }
 185        mask <<= 8;
 186    }
 187    return ra;
 188}
 189
 190/* shift right arithmetic helper */
 191target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
 192                         target_ulong shift)
 193{
 194    int32_t ret;
 195
 196    if (likely(!(shift & 0x20))) {
 197        if (likely((uint32_t)shift != 0)) {
 198            shift &= 0x1f;
 199            ret = (int32_t)value >> shift;
 200            if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
 201                env->ca = 0;
 202            } else {
 203                env->ca = 1;
 204            }
 205        } else {
 206            ret = (int32_t)value;
 207            env->ca = 0;
 208        }
 209    } else {
 210        ret = (int32_t)value >> 31;
 211        env->ca = (ret != 0);
 212    }
 213    return (target_long)ret;
 214}
 215
 216#if defined(TARGET_PPC64)
 217target_ulong helper_srad(CPUPPCState *env, target_ulong value,
 218                         target_ulong shift)
 219{
 220    int64_t ret;
 221
 222    if (likely(!(shift & 0x40))) {
 223        if (likely((uint64_t)shift != 0)) {
 224            shift &= 0x3f;
 225            ret = (int64_t)value >> shift;
 226            if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
 227                env->ca = 0;
 228            } else {
 229                env->ca = 1;
 230            }
 231        } else {
 232            ret = (int64_t)value;
 233            env->ca = 0;
 234        }
 235    } else {
 236        ret = (int64_t)value >> 63;
 237        env->ca = (ret != 0);
 238    }
 239    return ret;
 240}
 241#endif
 242
 243#if defined(TARGET_PPC64)
 244target_ulong helper_popcntb(target_ulong val)
 245{
 246    val = (val & 0x5555555555555555ULL) + ((val >>  1) &
 247                                           0x5555555555555555ULL);
 248    val = (val & 0x3333333333333333ULL) + ((val >>  2) &
 249                                           0x3333333333333333ULL);
 250    val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >>  4) &
 251                                           0x0f0f0f0f0f0f0f0fULL);
 252    return val;
 253}
 254
 255target_ulong helper_popcntw(target_ulong val)
 256{
 257    val = (val & 0x5555555555555555ULL) + ((val >>  1) &
 258                                           0x5555555555555555ULL);
 259    val = (val & 0x3333333333333333ULL) + ((val >>  2) &
 260                                           0x3333333333333333ULL);
 261    val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >>  4) &
 262                                           0x0f0f0f0f0f0f0f0fULL);
 263    val = (val & 0x00ff00ff00ff00ffULL) + ((val >>  8) &
 264                                           0x00ff00ff00ff00ffULL);
 265    val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
 266                                           0x0000ffff0000ffffULL);
 267    return val;
 268}
 269
 270target_ulong helper_popcntd(target_ulong val)
 271{
 272    return ctpop64(val);
 273}
 274#else
 275target_ulong helper_popcntb(target_ulong val)
 276{
 277    val = (val & 0x55555555) + ((val >>  1) & 0x55555555);
 278    val = (val & 0x33333333) + ((val >>  2) & 0x33333333);
 279    val = (val & 0x0f0f0f0f) + ((val >>  4) & 0x0f0f0f0f);
 280    return val;
 281}
 282
 283target_ulong helper_popcntw(target_ulong val)
 284{
 285    val = (val & 0x55555555) + ((val >>  1) & 0x55555555);
 286    val = (val & 0x33333333) + ((val >>  2) & 0x33333333);
 287    val = (val & 0x0f0f0f0f) + ((val >>  4) & 0x0f0f0f0f);
 288    val = (val & 0x00ff00ff) + ((val >>  8) & 0x00ff00ff);
 289    val = (val & 0x0000ffff) + ((val >> 16) & 0x0000ffff);
 290    return val;
 291}
 292#endif
 293
 294/*****************************************************************************/
 295/* PowerPC 601 specific instructions (POWER bridge) */
 296target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
 297{
 298    uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
 299
 300    if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
 301        (int32_t)arg2 == 0) {
 302        env->spr[SPR_MQ] = 0;
 303        return INT32_MIN;
 304    } else {
 305        env->spr[SPR_MQ] = tmp % arg2;
 306        return  tmp / (int32_t)arg2;
 307    }
 308}
 309
 310target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
 311                         target_ulong arg2)
 312{
 313    uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
 314
 315    if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
 316        (int32_t)arg2 == 0) {
 317        env->so = env->ov = 1;
 318        env->spr[SPR_MQ] = 0;
 319        return INT32_MIN;
 320    } else {
 321        env->spr[SPR_MQ] = tmp % arg2;
 322        tmp /= (int32_t)arg2;
 323        if ((int32_t)tmp != tmp) {
 324            env->so = env->ov = 1;
 325        } else {
 326            env->ov = 0;
 327        }
 328        return tmp;
 329    }
 330}
 331
 332target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
 333                         target_ulong arg2)
 334{
 335    if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
 336        (int32_t)arg2 == 0) {
 337        env->spr[SPR_MQ] = 0;
 338        return INT32_MIN;
 339    } else {
 340        env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
 341        return (int32_t)arg1 / (int32_t)arg2;
 342    }
 343}
 344
 345target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
 346                          target_ulong arg2)
 347{
 348    if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
 349        (int32_t)arg2 == 0) {
 350        env->so = env->ov = 1;
 351        env->spr[SPR_MQ] = 0;
 352        return INT32_MIN;
 353    } else {
 354        env->ov = 0;
 355        env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
 356        return (int32_t)arg1 / (int32_t)arg2;
 357    }
 358}
 359
 360/*****************************************************************************/
 361/* 602 specific instructions */
 362/* mfrom is the most crazy instruction ever seen, imho ! */
 363/* Real implementation uses a ROM table. Do the same */
 364/* Extremely decomposed:
 365 *                      -arg / 256
 366 * return 256 * log10(10           + 1.0) + 0.5
 367 */
 368#if !defined(CONFIG_USER_ONLY)
 369target_ulong helper_602_mfrom(target_ulong arg)
 370{
 371    if (likely(arg < 602)) {
 372#include "mfrom_table.c"
 373        return mfrom_ROM_table[arg];
 374    } else {
 375        return 0;
 376    }
 377}
 378#endif
 379
 380/*****************************************************************************/
 381/* Altivec extension helpers */
 382#if defined(HOST_WORDS_BIGENDIAN)
 383#define HI_IDX 0
 384#define LO_IDX 1
 385#define AVRB(i) u8[i]
 386#define AVRW(i) u32[i]
 387#else
 388#define HI_IDX 1
 389#define LO_IDX 0
 390#define AVRB(i) u8[15-(i)]
 391#define AVRW(i) u32[3-(i)]
 392#endif
 393
 394#if defined(HOST_WORDS_BIGENDIAN)
 395#define VECTOR_FOR_INORDER_I(index, element)                    \
 396    for (index = 0; index < ARRAY_SIZE(r->element); index++)
 397#else
 398#define VECTOR_FOR_INORDER_I(index, element)                    \
 399    for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--)
 400#endif
 401
 402/* Saturating arithmetic helpers.  */
 403#define SATCVT(from, to, from_type, to_type, min, max)          \
 404    static inline to_type cvt##from##to(from_type x, int *sat)  \
 405    {                                                           \
 406        to_type r;                                              \
 407                                                                \
 408        if (x < (from_type)min) {                               \
 409            r = min;                                            \
 410            *sat = 1;                                           \
 411        } else if (x > (from_type)max) {                        \
 412            r = max;                                            \
 413            *sat = 1;                                           \
 414        } else {                                                \
 415            r = x;                                              \
 416        }                                                       \
 417        return r;                                               \
 418    }
 419#define SATCVTU(from, to, from_type, to_type, min, max)         \
 420    static inline to_type cvt##from##to(from_type x, int *sat)  \
 421    {                                                           \
 422        to_type r;                                              \
 423                                                                \
 424        if (x > (from_type)max) {                               \
 425            r = max;                                            \
 426            *sat = 1;                                           \
 427        } else {                                                \
 428            r = x;                                              \
 429        }                                                       \
 430        return r;                                               \
 431    }
 432SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
 433SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
 434SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
 435
 436SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
 437SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
 438SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
 439SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
 440SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
 441SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
 442#undef SATCVT
 443#undef SATCVTU
 444
 445void helper_lvsl(ppc_avr_t *r, target_ulong sh)
 446{
 447    int i, j = (sh & 0xf);
 448
 449    VECTOR_FOR_INORDER_I(i, u8) {
 450        r->u8[i] = j++;
 451    }
 452}
 453
 454void helper_lvsr(ppc_avr_t *r, target_ulong sh)
 455{
 456    int i, j = 0x10 - (sh & 0xf);
 457
 458    VECTOR_FOR_INORDER_I(i, u8) {
 459        r->u8[i] = j++;
 460    }
 461}
 462
 463void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r)
 464{
 465#if defined(HOST_WORDS_BIGENDIAN)
 466    env->vscr = r->u32[3];
 467#else
 468    env->vscr = r->u32[0];
 469#endif
 470    set_flush_to_zero(vscr_nj, &env->vec_status);
 471}
 472
 473void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
 474{
 475    int i;
 476
 477    for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
 478        r->u32[i] = ~a->u32[i] < b->u32[i];
 479    }
 480}
 481
 482#define VARITH_DO(name, op, element)                                    \
 483    void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
 484    {                                                                   \
 485        int i;                                                          \
 486                                                                        \
 487        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
 488            r->element[i] = a->element[i] op b->element[i];             \
 489        }                                                               \
 490    }
 491#define VARITH(suffix, element)                 \
 492    VARITH_DO(add##suffix, +, element)          \
 493    VARITH_DO(sub##suffix, -, element)
 494VARITH(ubm, u8)
 495VARITH(uhm, u16)
 496VARITH(uwm, u32)
 497VARITH(udm, u64)
 498VARITH_DO(muluwm, *, u32)
 499#undef VARITH_DO
 500#undef VARITH
 501
 502#define VARITHFP(suffix, func)                                          \
 503    void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
 504                          ppc_avr_t *b)                                 \
 505    {                                                                   \
 506        int i;                                                          \
 507                                                                        \
 508        for (i = 0; i < ARRAY_SIZE(r->f); i++) {                        \
 509            r->f[i] = func(a->f[i], b->f[i], &env->vec_status);         \
 510        }                                                               \
 511    }
 512VARITHFP(addfp, float32_add)
 513VARITHFP(subfp, float32_sub)
 514VARITHFP(minfp, float32_min)
 515VARITHFP(maxfp, float32_max)
 516#undef VARITHFP
 517
 518#define VARITHFPFMA(suffix, type)                                       \
 519    void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
 520                           ppc_avr_t *b, ppc_avr_t *c)                  \
 521    {                                                                   \
 522        int i;                                                          \
 523        for (i = 0; i < ARRAY_SIZE(r->f); i++) {                        \
 524            r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i],         \
 525                                     type, &env->vec_status);           \
 526        }                                                               \
 527    }
 528VARITHFPFMA(maddfp, 0);
 529VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
 530#undef VARITHFPFMA
 531
 532#define VARITHSAT_CASE(type, op, cvt, element)                          \
 533    {                                                                   \
 534        type result = (type)a->element[i] op (type)b->element[i];       \
 535        r->element[i] = cvt(result, &sat);                              \
 536    }
 537
 538#define VARITHSAT_DO(name, op, optype, cvt, element)                    \
 539    void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,   \
 540                        ppc_avr_t *b)                                   \
 541    {                                                                   \
 542        int sat = 0;                                                    \
 543        int i;                                                          \
 544                                                                        \
 545        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
 546            switch (sizeof(r->element[0])) {                            \
 547            case 1:                                                     \
 548                VARITHSAT_CASE(optype, op, cvt, element);               \
 549                break;                                                  \
 550            case 2:                                                     \
 551                VARITHSAT_CASE(optype, op, cvt, element);               \
 552                break;                                                  \
 553            case 4:                                                     \
 554                VARITHSAT_CASE(optype, op, cvt, element);               \
 555                break;                                                  \
 556            }                                                           \
 557        }                                                               \
 558        if (sat) {                                                      \
 559            env->vscr |= (1 << VSCR_SAT);                               \
 560        }                                                               \
 561    }
 562#define VARITHSAT_SIGNED(suffix, element, optype, cvt)          \
 563    VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element)      \
 564    VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
 565#define VARITHSAT_UNSIGNED(suffix, element, optype, cvt)        \
 566    VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element)      \
 567    VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
 568VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
 569VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
 570VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
 571VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
 572VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
 573VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
 574#undef VARITHSAT_CASE
 575#undef VARITHSAT_DO
 576#undef VARITHSAT_SIGNED
 577#undef VARITHSAT_UNSIGNED
 578
 579#define VAVG_DO(name, element, etype)                                   \
 580    void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
 581    {                                                                   \
 582        int i;                                                          \
 583                                                                        \
 584        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
 585            etype x = (etype)a->element[i] + (etype)b->element[i] + 1;  \
 586            r->element[i] = x >> 1;                                     \
 587        }                                                               \
 588    }
 589
 590#define VAVG(type, signed_element, signed_type, unsigned_element,       \
 591             unsigned_type)                                             \
 592    VAVG_DO(avgs##type, signed_element, signed_type)                    \
 593    VAVG_DO(avgu##type, unsigned_element, unsigned_type)
 594VAVG(b, s8, int16_t, u8, uint16_t)
 595VAVG(h, s16, int32_t, u16, uint32_t)
 596VAVG(w, s32, int64_t, u32, uint64_t)
 597#undef VAVG_DO
 598#undef VAVG
 599
 600#define VCF(suffix, cvt, element)                                       \
 601    void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r,             \
 602                            ppc_avr_t *b, uint32_t uim)                 \
 603    {                                                                   \
 604        int i;                                                          \
 605                                                                        \
 606        for (i = 0; i < ARRAY_SIZE(r->f); i++) {                        \
 607            float32 t = cvt(b->element[i], &env->vec_status);           \
 608            r->f[i] = float32_scalbn(t, -uim, &env->vec_status);        \
 609        }                                                               \
 610    }
 611VCF(ux, uint32_to_float32, u32)
 612VCF(sx, int32_to_float32, s32)
 613#undef VCF
 614
 615#define VCMP_DO(suffix, compare, element, record)                       \
 616    void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r,            \
 617                             ppc_avr_t *a, ppc_avr_t *b)                \
 618    {                                                                   \
 619        uint64_t ones = (uint64_t)-1;                                   \
 620        uint64_t all = ones;                                            \
 621        uint64_t none = 0;                                              \
 622        int i;                                                          \
 623                                                                        \
 624        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
 625            uint64_t result = (a->element[i] compare b->element[i] ?    \
 626                               ones : 0x0);                             \
 627            switch (sizeof(a->element[0])) {                            \
 628            case 8:                                                     \
 629                r->u64[i] = result;                                     \
 630                break;                                                  \
 631            case 4:                                                     \
 632                r->u32[i] = result;                                     \
 633                break;                                                  \
 634            case 2:                                                     \
 635                r->u16[i] = result;                                     \
 636                break;                                                  \
 637            case 1:                                                     \
 638                r->u8[i] = result;                                      \
 639                break;                                                  \
 640            }                                                           \
 641            all &= result;                                              \
 642            none |= result;                                             \
 643        }                                                               \
 644        if (record) {                                                   \
 645            env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1);       \
 646        }                                                               \
 647    }
 648#define VCMP(suffix, compare, element)          \
 649    VCMP_DO(suffix, compare, element, 0)        \
 650    VCMP_DO(suffix##_dot, compare, element, 1)
 651VCMP(equb, ==, u8)
 652VCMP(equh, ==, u16)
 653VCMP(equw, ==, u32)
 654VCMP(equd, ==, u64)
 655VCMP(gtub, >, u8)
 656VCMP(gtuh, >, u16)
 657VCMP(gtuw, >, u32)
 658VCMP(gtud, >, u64)
 659VCMP(gtsb, >, s8)
 660VCMP(gtsh, >, s16)
 661VCMP(gtsw, >, s32)
 662VCMP(gtsd, >, s64)
 663#undef VCMP_DO
 664#undef VCMP
 665
 666#define VCMPFP_DO(suffix, compare, order, record)                       \
 667    void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r,            \
 668                             ppc_avr_t *a, ppc_avr_t *b)                \
 669    {                                                                   \
 670        uint32_t ones = (uint32_t)-1;                                   \
 671        uint32_t all = ones;                                            \
 672        uint32_t none = 0;                                              \
 673        int i;                                                          \
 674                                                                        \
 675        for (i = 0; i < ARRAY_SIZE(r->f); i++) {                        \
 676            uint32_t result;                                            \
 677            int rel = float32_compare_quiet(a->f[i], b->f[i],           \
 678                                            &env->vec_status);          \
 679            if (rel == float_relation_unordered) {                      \
 680                result = 0;                                             \
 681            } else if (rel compare order) {                             \
 682                result = ones;                                          \
 683            } else {                                                    \
 684                result = 0;                                             \
 685            }                                                           \
 686            r->u32[i] = result;                                         \
 687            all &= result;                                              \
 688            none |= result;                                             \
 689        }                                                               \
 690        if (record) {                                                   \
 691            env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1);       \
 692        }                                                               \
 693    }
 694#define VCMPFP(suffix, compare, order)          \
 695    VCMPFP_DO(suffix, compare, order, 0)        \
 696    VCMPFP_DO(suffix##_dot, compare, order, 1)
 697VCMPFP(eqfp, ==, float_relation_equal)
 698VCMPFP(gefp, !=, float_relation_less)
 699VCMPFP(gtfp, ==, float_relation_greater)
 700#undef VCMPFP_DO
 701#undef VCMPFP
 702
 703static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
 704                                    ppc_avr_t *a, ppc_avr_t *b, int record)
 705{
 706    int i;
 707    int all_in = 0;
 708
 709    for (i = 0; i < ARRAY_SIZE(r->f); i++) {
 710        int le_rel = float32_compare_quiet(a->f[i], b->f[i], &env->vec_status);
 711        if (le_rel == float_relation_unordered) {
 712            r->u32[i] = 0xc0000000;
 713            all_in = 1;
 714        } else {
 715            float32 bneg = float32_chs(b->f[i]);
 716            int ge_rel = float32_compare_quiet(a->f[i], bneg, &env->vec_status);
 717            int le = le_rel != float_relation_greater;
 718            int ge = ge_rel != float_relation_less;
 719
 720            r->u32[i] = ((!le) << 31) | ((!ge) << 30);
 721            all_in |= (!le | !ge);
 722        }
 723    }
 724    if (record) {
 725        env->crf[6] = (all_in == 0) << 1;
 726    }
 727}
 728
 729void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
 730{
 731    vcmpbfp_internal(env, r, a, b, 0);
 732}
 733
 734void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
 735                        ppc_avr_t *b)
 736{
 737    vcmpbfp_internal(env, r, a, b, 1);
 738}
 739
 740#define VCT(suffix, satcvt, element)                                    \
 741    void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r,             \
 742                            ppc_avr_t *b, uint32_t uim)                 \
 743    {                                                                   \
 744        int i;                                                          \
 745        int sat = 0;                                                    \
 746        float_status s = env->vec_status;                               \
 747                                                                        \
 748        set_float_rounding_mode(float_round_to_zero, &s);               \
 749        for (i = 0; i < ARRAY_SIZE(r->f); i++) {                        \
 750            if (float32_is_any_nan(b->f[i])) {                          \
 751                r->element[i] = 0;                                      \
 752            } else {                                                    \
 753                float64 t = float32_to_float64(b->f[i], &s);            \
 754                int64_t j;                                              \
 755                                                                        \
 756                t = float64_scalbn(t, uim, &s);                         \
 757                j = float64_to_int64(t, &s);                            \
 758                r->element[i] = satcvt(j, &sat);                        \
 759            }                                                           \
 760        }                                                               \
 761        if (sat) {                                                      \
 762            env->vscr |= (1 << VSCR_SAT);                               \
 763        }                                                               \
 764    }
 765VCT(uxs, cvtsduw, u32)
 766VCT(sxs, cvtsdsw, s32)
 767#undef VCT
 768
 769void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
 770                      ppc_avr_t *b, ppc_avr_t *c)
 771{
 772    int sat = 0;
 773    int i;
 774
 775    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
 776        int32_t prod = a->s16[i] * b->s16[i];
 777        int32_t t = (int32_t)c->s16[i] + (prod >> 15);
 778
 779        r->s16[i] = cvtswsh(t, &sat);
 780    }
 781
 782    if (sat) {
 783        env->vscr |= (1 << VSCR_SAT);
 784    }
 785}
 786
 787void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
 788                       ppc_avr_t *b, ppc_avr_t *c)
 789{
 790    int sat = 0;
 791    int i;
 792
 793    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
 794        int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
 795        int32_t t = (int32_t)c->s16[i] + (prod >> 15);
 796        r->s16[i] = cvtswsh(t, &sat);
 797    }
 798
 799    if (sat) {
 800        env->vscr |= (1 << VSCR_SAT);
 801    }
 802}
 803
 804#define VMINMAX_DO(name, compare, element)                              \
 805    void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
 806    {                                                                   \
 807        int i;                                                          \
 808                                                                        \
 809        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
 810            if (a->element[i] compare b->element[i]) {                  \
 811                r->element[i] = b->element[i];                          \
 812            } else {                                                    \
 813                r->element[i] = a->element[i];                          \
 814            }                                                           \
 815        }                                                               \
 816    }
 817#define VMINMAX(suffix, element)                \
 818    VMINMAX_DO(min##suffix, >, element)         \
 819    VMINMAX_DO(max##suffix, <, element)
 820VMINMAX(sb, s8)
 821VMINMAX(sh, s16)
 822VMINMAX(sw, s32)
 823VMINMAX(sd, s64)
 824VMINMAX(ub, u8)
 825VMINMAX(uh, u16)
 826VMINMAX(uw, u32)
 827VMINMAX(ud, u64)
 828#undef VMINMAX_DO
 829#undef VMINMAX
 830
 831void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
 832{
 833    int i;
 834
 835    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
 836        int32_t prod = a->s16[i] * b->s16[i];
 837        r->s16[i] = (int16_t) (prod + c->s16[i]);
 838    }
 839}
 840
 841#define VMRG_DO(name, element, highp)                                   \
 842    void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
 843    {                                                                   \
 844        ppc_avr_t result;                                               \
 845        int i;                                                          \
 846        size_t n_elems = ARRAY_SIZE(r->element);                        \
 847                                                                        \
 848        for (i = 0; i < n_elems / 2; i++) {                             \
 849            if (highp) {                                                \
 850                result.element[i*2+HI_IDX] = a->element[i];             \
 851                result.element[i*2+LO_IDX] = b->element[i];             \
 852            } else {                                                    \
 853                result.element[n_elems - i * 2 - (1 + HI_IDX)] =        \
 854                    b->element[n_elems - i - 1];                        \
 855                result.element[n_elems - i * 2 - (1 + LO_IDX)] =        \
 856                    a->element[n_elems - i - 1];                        \
 857            }                                                           \
 858        }                                                               \
 859        *r = result;                                                    \
 860    }
 861#if defined(HOST_WORDS_BIGENDIAN)
 862#define MRGHI 0
 863#define MRGLO 1
 864#else
 865#define MRGHI 1
 866#define MRGLO 0
 867#endif
 868#define VMRG(suffix, element)                   \
 869    VMRG_DO(mrgl##suffix, element, MRGHI)       \
 870    VMRG_DO(mrgh##suffix, element, MRGLO)
 871VMRG(b, u8)
 872VMRG(h, u16)
 873VMRG(w, u32)
 874#undef VMRG_DO
 875#undef VMRG
 876#undef MRGHI
 877#undef MRGLO
 878
 879void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
 880                     ppc_avr_t *b, ppc_avr_t *c)
 881{
 882    int32_t prod[16];
 883    int i;
 884
 885    for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
 886        prod[i] = (int32_t)a->s8[i] * b->u8[i];
 887    }
 888
 889    VECTOR_FOR_INORDER_I(i, s32) {
 890        r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
 891            prod[4 * i + 2] + prod[4 * i + 3];
 892    }
 893}
 894
 895void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
 896                     ppc_avr_t *b, ppc_avr_t *c)
 897{
 898    int32_t prod[8];
 899    int i;
 900
 901    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
 902        prod[i] = a->s16[i] * b->s16[i];
 903    }
 904
 905    VECTOR_FOR_INORDER_I(i, s32) {
 906        r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
 907    }
 908}
 909
 910void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
 911                     ppc_avr_t *b, ppc_avr_t *c)
 912{
 913    int32_t prod[8];
 914    int i;
 915    int sat = 0;
 916
 917    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
 918        prod[i] = (int32_t)a->s16[i] * b->s16[i];
 919    }
 920
 921    VECTOR_FOR_INORDER_I(i, s32) {
 922        int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
 923
 924        r->u32[i] = cvtsdsw(t, &sat);
 925    }
 926
 927    if (sat) {
 928        env->vscr |= (1 << VSCR_SAT);
 929    }
 930}
 931
 932void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
 933                     ppc_avr_t *b, ppc_avr_t *c)
 934{
 935    uint16_t prod[16];
 936    int i;
 937
 938    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
 939        prod[i] = a->u8[i] * b->u8[i];
 940    }
 941
 942    VECTOR_FOR_INORDER_I(i, u32) {
 943        r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
 944            prod[4 * i + 2] + prod[4 * i + 3];
 945    }
 946}
 947
 948void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
 949                     ppc_avr_t *b, ppc_avr_t *c)
 950{
 951    uint32_t prod[8];
 952    int i;
 953
 954    for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
 955        prod[i] = a->u16[i] * b->u16[i];
 956    }
 957
 958    VECTOR_FOR_INORDER_I(i, u32) {
 959        r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
 960    }
 961}
 962
 963void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
 964                     ppc_avr_t *b, ppc_avr_t *c)
 965{
 966    uint32_t prod[8];
 967    int i;
 968    int sat = 0;
 969
 970    for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
 971        prod[i] = a->u16[i] * b->u16[i];
 972    }
 973
 974    VECTOR_FOR_INORDER_I(i, s32) {
 975        uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
 976
 977        r->u32[i] = cvtuduw(t, &sat);
 978    }
 979
 980    if (sat) {
 981        env->vscr |= (1 << VSCR_SAT);
 982    }
 983}
 984
 985#define VMUL_DO(name, mul_element, prod_element, cast, evenp)           \
 986    void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
 987    {                                                                   \
 988        int i;                                                          \
 989                                                                        \
 990        VECTOR_FOR_INORDER_I(i, prod_element) {                         \
 991            if (evenp) {                                                \
 992                r->prod_element[i] =                                    \
 993                    (cast)a->mul_element[i * 2 + HI_IDX] *              \
 994                    (cast)b->mul_element[i * 2 + HI_IDX];               \
 995            } else {                                                    \
 996                r->prod_element[i] =                                    \
 997                    (cast)a->mul_element[i * 2 + LO_IDX] *              \
 998                    (cast)b->mul_element[i * 2 + LO_IDX];               \
 999            }                                                           \
1000        }                                                               \
1001    }
1002#define VMUL(suffix, mul_element, prod_element, cast)            \
1003    VMUL_DO(mule##suffix, mul_element, prod_element, cast, 1)    \
1004    VMUL_DO(mulo##suffix, mul_element, prod_element, cast, 0)
1005VMUL(sb, s8, s16, int16_t)
1006VMUL(sh, s16, s32, int32_t)
1007VMUL(sw, s32, s64, int64_t)
1008VMUL(ub, u8, u16, uint16_t)
1009VMUL(uh, u16, u32, uint32_t)
1010VMUL(uw, u32, u64, uint64_t)
1011#undef VMUL_DO
1012#undef VMUL
1013
1014void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1015                  ppc_avr_t *c)
1016{
1017    ppc_avr_t result;
1018    int i;
1019
1020    VECTOR_FOR_INORDER_I(i, u8) {
1021        int s = c->u8[i] & 0x1f;
1022#if defined(HOST_WORDS_BIGENDIAN)
1023        int index = s & 0xf;
1024#else
1025        int index = 15 - (s & 0xf);
1026#endif
1027
1028        if (s & 0x10) {
1029            result.u8[i] = b->u8[index];
1030        } else {
1031            result.u8[i] = a->u8[index];
1032        }
1033    }
1034    *r = result;
1035}
1036
1037#if defined(HOST_WORDS_BIGENDIAN)
1038#define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1039#define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1040#else
1041#define VBPERMQ_INDEX(avr, i) ((avr)->u8[15-(i)])
1042#define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1043#endif
1044
1045void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1046{
1047    int i;
1048    uint64_t perm = 0;
1049
1050    VECTOR_FOR_INORDER_I(i, u8) {
1051        int index = VBPERMQ_INDEX(b, i);
1052
1053        if (index < 128) {
1054            uint64_t mask = (1ull << (63-(index & 0x3F)));
1055            if (a->u64[VBPERMQ_DW(index)] & mask) {
1056                perm |= (0x8000 >> i);
1057            }
1058        }
1059    }
1060
1061    r->u64[HI_IDX] = perm;
1062    r->u64[LO_IDX] = 0;
1063}
1064
1065#undef VBPERMQ_INDEX
1066#undef VBPERMQ_DW
1067
1068static const uint64_t VGBBD_MASKS[256] = {
1069    0x0000000000000000ull, /* 00 */
1070    0x0000000000000080ull, /* 01 */
1071    0x0000000000008000ull, /* 02 */
1072    0x0000000000008080ull, /* 03 */
1073    0x0000000000800000ull, /* 04 */
1074    0x0000000000800080ull, /* 05 */
1075    0x0000000000808000ull, /* 06 */
1076    0x0000000000808080ull, /* 07 */
1077    0x0000000080000000ull, /* 08 */
1078    0x0000000080000080ull, /* 09 */
1079    0x0000000080008000ull, /* 0A */
1080    0x0000000080008080ull, /* 0B */
1081    0x0000000080800000ull, /* 0C */
1082    0x0000000080800080ull, /* 0D */
1083    0x0000000080808000ull, /* 0E */
1084    0x0000000080808080ull, /* 0F */
1085    0x0000008000000000ull, /* 10 */
1086    0x0000008000000080ull, /* 11 */
1087    0x0000008000008000ull, /* 12 */
1088    0x0000008000008080ull, /* 13 */
1089    0x0000008000800000ull, /* 14 */
1090    0x0000008000800080ull, /* 15 */
1091    0x0000008000808000ull, /* 16 */
1092    0x0000008000808080ull, /* 17 */
1093    0x0000008080000000ull, /* 18 */
1094    0x0000008080000080ull, /* 19 */
1095    0x0000008080008000ull, /* 1A */
1096    0x0000008080008080ull, /* 1B */
1097    0x0000008080800000ull, /* 1C */
1098    0x0000008080800080ull, /* 1D */
1099    0x0000008080808000ull, /* 1E */
1100    0x0000008080808080ull, /* 1F */
1101    0x0000800000000000ull, /* 20 */
1102    0x0000800000000080ull, /* 21 */
1103    0x0000800000008000ull, /* 22 */
1104    0x0000800000008080ull, /* 23 */
1105    0x0000800000800000ull, /* 24 */
1106    0x0000800000800080ull, /* 25 */
1107    0x0000800000808000ull, /* 26 */
1108    0x0000800000808080ull, /* 27 */
1109    0x0000800080000000ull, /* 28 */
1110    0x0000800080000080ull, /* 29 */
1111    0x0000800080008000ull, /* 2A */
1112    0x0000800080008080ull, /* 2B */
1113    0x0000800080800000ull, /* 2C */
1114    0x0000800080800080ull, /* 2D */
1115    0x0000800080808000ull, /* 2E */
1116    0x0000800080808080ull, /* 2F */
1117    0x0000808000000000ull, /* 30 */
1118    0x0000808000000080ull, /* 31 */
1119    0x0000808000008000ull, /* 32 */
1120    0x0000808000008080ull, /* 33 */
1121    0x0000808000800000ull, /* 34 */
1122    0x0000808000800080ull, /* 35 */
1123    0x0000808000808000ull, /* 36 */
1124    0x0000808000808080ull, /* 37 */
1125    0x0000808080000000ull, /* 38 */
1126    0x0000808080000080ull, /* 39 */
1127    0x0000808080008000ull, /* 3A */
1128    0x0000808080008080ull, /* 3B */
1129    0x0000808080800000ull, /* 3C */
1130    0x0000808080800080ull, /* 3D */
1131    0x0000808080808000ull, /* 3E */
1132    0x0000808080808080ull, /* 3F */
1133    0x0080000000000000ull, /* 40 */
1134    0x0080000000000080ull, /* 41 */
1135    0x0080000000008000ull, /* 42 */
1136    0x0080000000008080ull, /* 43 */
1137    0x0080000000800000ull, /* 44 */
1138    0x0080000000800080ull, /* 45 */
1139    0x0080000000808000ull, /* 46 */
1140    0x0080000000808080ull, /* 47 */
1141    0x0080000080000000ull, /* 48 */
1142    0x0080000080000080ull, /* 49 */
1143    0x0080000080008000ull, /* 4A */
1144    0x0080000080008080ull, /* 4B */
1145    0x0080000080800000ull, /* 4C */
1146    0x0080000080800080ull, /* 4D */
1147    0x0080000080808000ull, /* 4E */
1148    0x0080000080808080ull, /* 4F */
1149    0x0080008000000000ull, /* 50 */
1150    0x0080008000000080ull, /* 51 */
1151    0x0080008000008000ull, /* 52 */
1152    0x0080008000008080ull, /* 53 */
1153    0x0080008000800000ull, /* 54 */
1154    0x0080008000800080ull, /* 55 */
1155    0x0080008000808000ull, /* 56 */
1156    0x0080008000808080ull, /* 57 */
1157    0x0080008080000000ull, /* 58 */
1158    0x0080008080000080ull, /* 59 */
1159    0x0080008080008000ull, /* 5A */
1160    0x0080008080008080ull, /* 5B */
1161    0x0080008080800000ull, /* 5C */
1162    0x0080008080800080ull, /* 5D */
1163    0x0080008080808000ull, /* 5E */
1164    0x0080008080808080ull, /* 5F */
1165    0x0080800000000000ull, /* 60 */
1166    0x0080800000000080ull, /* 61 */
1167    0x0080800000008000ull, /* 62 */
1168    0x0080800000008080ull, /* 63 */
1169    0x0080800000800000ull, /* 64 */
1170    0x0080800000800080ull, /* 65 */
1171    0x0080800000808000ull, /* 66 */
1172    0x0080800000808080ull, /* 67 */
1173    0x0080800080000000ull, /* 68 */
1174    0x0080800080000080ull, /* 69 */
1175    0x0080800080008000ull, /* 6A */
1176    0x0080800080008080ull, /* 6B */
1177    0x0080800080800000ull, /* 6C */
1178    0x0080800080800080ull, /* 6D */
1179    0x0080800080808000ull, /* 6E */
1180    0x0080800080808080ull, /* 6F */
1181    0x0080808000000000ull, /* 70 */
1182    0x0080808000000080ull, /* 71 */
1183    0x0080808000008000ull, /* 72 */
1184    0x0080808000008080ull, /* 73 */
1185    0x0080808000800000ull, /* 74 */
1186    0x0080808000800080ull, /* 75 */
1187    0x0080808000808000ull, /* 76 */
1188    0x0080808000808080ull, /* 77 */
1189    0x0080808080000000ull, /* 78 */
1190    0x0080808080000080ull, /* 79 */
1191    0x0080808080008000ull, /* 7A */
1192    0x0080808080008080ull, /* 7B */
1193    0x0080808080800000ull, /* 7C */
1194    0x0080808080800080ull, /* 7D */
1195    0x0080808080808000ull, /* 7E */
1196    0x0080808080808080ull, /* 7F */
1197    0x8000000000000000ull, /* 80 */
1198    0x8000000000000080ull, /* 81 */
1199    0x8000000000008000ull, /* 82 */
1200    0x8000000000008080ull, /* 83 */
1201    0x8000000000800000ull, /* 84 */
1202    0x8000000000800080ull, /* 85 */
1203    0x8000000000808000ull, /* 86 */
1204    0x8000000000808080ull, /* 87 */
1205    0x8000000080000000ull, /* 88 */
1206    0x8000000080000080ull, /* 89 */
1207    0x8000000080008000ull, /* 8A */
1208    0x8000000080008080ull, /* 8B */
1209    0x8000000080800000ull, /* 8C */
1210    0x8000000080800080ull, /* 8D */
1211    0x8000000080808000ull, /* 8E */
1212    0x8000000080808080ull, /* 8F */
1213    0x8000008000000000ull, /* 90 */
1214    0x8000008000000080ull, /* 91 */
1215    0x8000008000008000ull, /* 92 */
1216    0x8000008000008080ull, /* 93 */
1217    0x8000008000800000ull, /* 94 */
1218    0x8000008000800080ull, /* 95 */
1219    0x8000008000808000ull, /* 96 */
1220    0x8000008000808080ull, /* 97 */
1221    0x8000008080000000ull, /* 98 */
1222    0x8000008080000080ull, /* 99 */
1223    0x8000008080008000ull, /* 9A */
1224    0x8000008080008080ull, /* 9B */
1225    0x8000008080800000ull, /* 9C */
1226    0x8000008080800080ull, /* 9D */
1227    0x8000008080808000ull, /* 9E */
1228    0x8000008080808080ull, /* 9F */
1229    0x8000800000000000ull, /* A0 */
1230    0x8000800000000080ull, /* A1 */
1231    0x8000800000008000ull, /* A2 */
1232    0x8000800000008080ull, /* A3 */
1233    0x8000800000800000ull, /* A4 */
1234    0x8000800000800080ull, /* A5 */
1235    0x8000800000808000ull, /* A6 */
1236    0x8000800000808080ull, /* A7 */
1237    0x8000800080000000ull, /* A8 */
1238    0x8000800080000080ull, /* A9 */
1239    0x8000800080008000ull, /* AA */
1240    0x8000800080008080ull, /* AB */
1241    0x8000800080800000ull, /* AC */
1242    0x8000800080800080ull, /* AD */
1243    0x8000800080808000ull, /* AE */
1244    0x8000800080808080ull, /* AF */
1245    0x8000808000000000ull, /* B0 */
1246    0x8000808000000080ull, /* B1 */
1247    0x8000808000008000ull, /* B2 */
1248    0x8000808000008080ull, /* B3 */
1249    0x8000808000800000ull, /* B4 */
1250    0x8000808000800080ull, /* B5 */
1251    0x8000808000808000ull, /* B6 */
1252    0x8000808000808080ull, /* B7 */
1253    0x8000808080000000ull, /* B8 */
1254    0x8000808080000080ull, /* B9 */
1255    0x8000808080008000ull, /* BA */
1256    0x8000808080008080ull, /* BB */
1257    0x8000808080800000ull, /* BC */
1258    0x8000808080800080ull, /* BD */
1259    0x8000808080808000ull, /* BE */
1260    0x8000808080808080ull, /* BF */
1261    0x8080000000000000ull, /* C0 */
1262    0x8080000000000080ull, /* C1 */
1263    0x8080000000008000ull, /* C2 */
1264    0x8080000000008080ull, /* C3 */
1265    0x8080000000800000ull, /* C4 */
1266    0x8080000000800080ull, /* C5 */
1267    0x8080000000808000ull, /* C6 */
1268    0x8080000000808080ull, /* C7 */
1269    0x8080000080000000ull, /* C8 */
1270    0x8080000080000080ull, /* C9 */
1271    0x8080000080008000ull, /* CA */
1272    0x8080000080008080ull, /* CB */
1273    0x8080000080800000ull, /* CC */
1274    0x8080000080800080ull, /* CD */
1275    0x8080000080808000ull, /* CE */
1276    0x8080000080808080ull, /* CF */
1277    0x8080008000000000ull, /* D0 */
1278    0x8080008000000080ull, /* D1 */
1279    0x8080008000008000ull, /* D2 */
1280    0x8080008000008080ull, /* D3 */
1281    0x8080008000800000ull, /* D4 */
1282    0x8080008000800080ull, /* D5 */
1283    0x8080008000808000ull, /* D6 */
1284    0x8080008000808080ull, /* D7 */
1285    0x8080008080000000ull, /* D8 */
1286    0x8080008080000080ull, /* D9 */
1287    0x8080008080008000ull, /* DA */
1288    0x8080008080008080ull, /* DB */
1289    0x8080008080800000ull, /* DC */
1290    0x8080008080800080ull, /* DD */
1291    0x8080008080808000ull, /* DE */
1292    0x8080008080808080ull, /* DF */
1293    0x8080800000000000ull, /* E0 */
1294    0x8080800000000080ull, /* E1 */
1295    0x8080800000008000ull, /* E2 */
1296    0x8080800000008080ull, /* E3 */
1297    0x8080800000800000ull, /* E4 */
1298    0x8080800000800080ull, /* E5 */
1299    0x8080800000808000ull, /* E6 */
1300    0x8080800000808080ull, /* E7 */
1301    0x8080800080000000ull, /* E8 */
1302    0x8080800080000080ull, /* E9 */
1303    0x8080800080008000ull, /* EA */
1304    0x8080800080008080ull, /* EB */
1305    0x8080800080800000ull, /* EC */
1306    0x8080800080800080ull, /* ED */
1307    0x8080800080808000ull, /* EE */
1308    0x8080800080808080ull, /* EF */
1309    0x8080808000000000ull, /* F0 */
1310    0x8080808000000080ull, /* F1 */
1311    0x8080808000008000ull, /* F2 */
1312    0x8080808000008080ull, /* F3 */
1313    0x8080808000800000ull, /* F4 */
1314    0x8080808000800080ull, /* F5 */
1315    0x8080808000808000ull, /* F6 */
1316    0x8080808000808080ull, /* F7 */
1317    0x8080808080000000ull, /* F8 */
1318    0x8080808080000080ull, /* F9 */
1319    0x8080808080008000ull, /* FA */
1320    0x8080808080008080ull, /* FB */
1321    0x8080808080800000ull, /* FC */
1322    0x8080808080800080ull, /* FD */
1323    0x8080808080808000ull, /* FE */
1324    0x8080808080808080ull, /* FF */
1325};
1326
1327void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b)
1328{
1329    int i;
1330    uint64_t t[2] = { 0, 0 };
1331
1332    VECTOR_FOR_INORDER_I(i, u8) {
1333#if defined(HOST_WORDS_BIGENDIAN)
1334        t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7);
1335#else
1336        t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (7-(i & 7));
1337#endif
1338    }
1339
1340    r->u64[0] = t[0];
1341    r->u64[1] = t[1];
1342}
1343
1344#define PMSUM(name, srcfld, trgfld, trgtyp)                   \
1345void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)  \
1346{                                                             \
1347    int i, j;                                                 \
1348    trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])];      \
1349                                                              \
1350    VECTOR_FOR_INORDER_I(i, srcfld) {                         \
1351        prod[i] = 0;                                          \
1352        for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) {      \
1353            if (a->srcfld[i] & (1ull<<j)) {                   \
1354                prod[i] ^= ((trgtyp)b->srcfld[i] << j);       \
1355            }                                                 \
1356        }                                                     \
1357    }                                                         \
1358                                                              \
1359    VECTOR_FOR_INORDER_I(i, trgfld) {                         \
1360        r->trgfld[i] = prod[2*i] ^ prod[2*i+1];               \
1361    }                                                         \
1362}
1363
1364PMSUM(vpmsumb, u8, u16, uint16_t)
1365PMSUM(vpmsumh, u16, u32, uint32_t)
1366PMSUM(vpmsumw, u32, u64, uint64_t)
1367
1368void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1369{
1370
1371#ifdef CONFIG_INT128
1372    int i, j;
1373    __uint128_t prod[2];
1374
1375    VECTOR_FOR_INORDER_I(i, u64) {
1376        prod[i] = 0;
1377        for (j = 0; j < 64; j++) {
1378            if (a->u64[i] & (1ull<<j)) {
1379                prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1380            }
1381        }
1382    }
1383
1384    r->u128 = prod[0] ^ prod[1];
1385
1386#else
1387    int i, j;
1388    ppc_avr_t prod[2];
1389
1390    VECTOR_FOR_INORDER_I(i, u64) {
1391        prod[i].u64[LO_IDX] = prod[i].u64[HI_IDX] = 0;
1392        for (j = 0; j < 64; j++) {
1393            if (a->u64[i] & (1ull<<j)) {
1394                ppc_avr_t bshift;
1395                if (j == 0) {
1396                    bshift.u64[HI_IDX] = 0;
1397                    bshift.u64[LO_IDX] = b->u64[i];
1398                } else {
1399                    bshift.u64[HI_IDX] = b->u64[i] >> (64-j);
1400                    bshift.u64[LO_IDX] = b->u64[i] << j;
1401                }
1402                prod[i].u64[LO_IDX] ^= bshift.u64[LO_IDX];
1403                prod[i].u64[HI_IDX] ^= bshift.u64[HI_IDX];
1404            }
1405        }
1406    }
1407
1408    r->u64[LO_IDX] = prod[0].u64[LO_IDX] ^ prod[1].u64[LO_IDX];
1409    r->u64[HI_IDX] = prod[0].u64[HI_IDX] ^ prod[1].u64[HI_IDX];
1410#endif
1411}
1412
1413
1414#if defined(HOST_WORDS_BIGENDIAN)
1415#define PKBIG 1
1416#else
1417#define PKBIG 0
1418#endif
1419void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1420{
1421    int i, j;
1422    ppc_avr_t result;
1423#if defined(HOST_WORDS_BIGENDIAN)
1424    const ppc_avr_t *x[2] = { a, b };
1425#else
1426    const ppc_avr_t *x[2] = { b, a };
1427#endif
1428
1429    VECTOR_FOR_INORDER_I(i, u64) {
1430        VECTOR_FOR_INORDER_I(j, u32) {
1431            uint32_t e = x[i]->u32[j];
1432
1433            result.u16[4*i+j] = (((e >> 9) & 0xfc00) |
1434                                 ((e >> 6) & 0x3e0) |
1435                                 ((e >> 3) & 0x1f));
1436        }
1437    }
1438    *r = result;
1439}
1440
1441#define VPK(suffix, from, to, cvt, dosat)                               \
1442    void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r,             \
1443                            ppc_avr_t *a, ppc_avr_t *b)                 \
1444    {                                                                   \
1445        int i;                                                          \
1446        int sat = 0;                                                    \
1447        ppc_avr_t result;                                               \
1448        ppc_avr_t *a0 = PKBIG ? a : b;                                  \
1449        ppc_avr_t *a1 = PKBIG ? b : a;                                  \
1450                                                                        \
1451        VECTOR_FOR_INORDER_I(i, from) {                                 \
1452            result.to[i] = cvt(a0->from[i], &sat);                      \
1453            result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);  \
1454        }                                                               \
1455        *r = result;                                                    \
1456        if (dosat && sat) {                                             \
1457            env->vscr |= (1 << VSCR_SAT);                               \
1458        }                                                               \
1459    }
1460#define I(x, y) (x)
1461VPK(shss, s16, s8, cvtshsb, 1)
1462VPK(shus, s16, u8, cvtshub, 1)
1463VPK(swss, s32, s16, cvtswsh, 1)
1464VPK(swus, s32, u16, cvtswuh, 1)
1465VPK(sdss, s64, s32, cvtsdsw, 1)
1466VPK(sdus, s64, u32, cvtsduw, 1)
1467VPK(uhus, u16, u8, cvtuhub, 1)
1468VPK(uwus, u32, u16, cvtuwuh, 1)
1469VPK(udus, u64, u32, cvtuduw, 1)
1470VPK(uhum, u16, u8, I, 0)
1471VPK(uwum, u32, u16, I, 0)
1472VPK(udum, u64, u32, I, 0)
1473#undef I
1474#undef VPK
1475#undef PKBIG
1476
1477void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1478{
1479    int i;
1480
1481    for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1482        r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status);
1483    }
1484}
1485
1486#define VRFI(suffix, rounding)                                  \
1487    void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r,    \
1488                             ppc_avr_t *b)                      \
1489    {                                                           \
1490        int i;                                                  \
1491        float_status s = env->vec_status;                       \
1492                                                                \
1493        set_float_rounding_mode(rounding, &s);                  \
1494        for (i = 0; i < ARRAY_SIZE(r->f); i++) {                \
1495            r->f[i] = float32_round_to_int (b->f[i], &s);       \
1496        }                                                       \
1497    }
1498VRFI(n, float_round_nearest_even)
1499VRFI(m, float_round_down)
1500VRFI(p, float_round_up)
1501VRFI(z, float_round_to_zero)
1502#undef VRFI
1503
1504#define VROTATE(suffix, element, mask)                                  \
1505    void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)   \
1506    {                                                                   \
1507        int i;                                                          \
1508                                                                        \
1509        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
1510            unsigned int shift = b->element[i] & mask;                  \
1511            r->element[i] = (a->element[i] << shift) |                  \
1512                (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1513        }                                                               \
1514    }
1515VROTATE(b, u8, 0x7)
1516VROTATE(h, u16, 0xF)
1517VROTATE(w, u32, 0x1F)
1518VROTATE(d, u64, 0x3F)
1519#undef VROTATE
1520
1521void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1522{
1523    int i;
1524
1525    for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1526        float32 t = float32_sqrt(b->f[i], &env->vec_status);
1527
1528        r->f[i] = float32_div(float32_one, t, &env->vec_status);
1529    }
1530}
1531
1532void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1533                 ppc_avr_t *c)
1534{
1535    r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1536    r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1537}
1538
1539void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1540{
1541    int i;
1542
1543    for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1544        r->f[i] = float32_exp2(b->f[i], &env->vec_status);
1545    }
1546}
1547
1548void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1549{
1550    int i;
1551
1552    for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1553        r->f[i] = float32_log2(b->f[i], &env->vec_status);
1554    }
1555}
1556
1557/* The specification says that the results are undefined if all of the
1558 * shift counts are not identical.  We check to make sure that they are
1559 * to conform to what real hardware appears to do.  */
1560#define VSHIFT(suffix, leftp)                                           \
1561    void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)    \
1562    {                                                                   \
1563        int shift = b->u8[LO_IDX*15] & 0x7;                             \
1564        int doit = 1;                                                   \
1565        int i;                                                          \
1566                                                                        \
1567        for (i = 0; i < ARRAY_SIZE(r->u8); i++) {                       \
1568            doit = doit && ((b->u8[i] & 0x7) == shift);                 \
1569        }                                                               \
1570        if (doit) {                                                     \
1571            if (shift == 0) {                                           \
1572                *r = *a;                                                \
1573            } else if (leftp) {                                         \
1574                uint64_t carry = a->u64[LO_IDX] >> (64 - shift);        \
1575                                                                        \
1576                r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry;     \
1577                r->u64[LO_IDX] = a->u64[LO_IDX] << shift;               \
1578            } else {                                                    \
1579                uint64_t carry = a->u64[HI_IDX] << (64 - shift);        \
1580                                                                        \
1581                r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry;     \
1582                r->u64[HI_IDX] = a->u64[HI_IDX] >> shift;               \
1583            }                                                           \
1584        }                                                               \
1585    }
1586VSHIFT(l, 1)
1587VSHIFT(r, 0)
1588#undef VSHIFT
1589
1590#define VSL(suffix, element, mask)                                      \
1591    void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)   \
1592    {                                                                   \
1593        int i;                                                          \
1594                                                                        \
1595        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
1596            unsigned int shift = b->element[i] & mask;                  \
1597                                                                        \
1598            r->element[i] = a->element[i] << shift;                     \
1599        }                                                               \
1600    }
1601VSL(b, u8, 0x7)
1602VSL(h, u16, 0x0F)
1603VSL(w, u32, 0x1F)
1604VSL(d, u64, 0x3F)
1605#undef VSL
1606
1607void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1608{
1609    int sh = shift & 0xf;
1610    int i;
1611    ppc_avr_t result;
1612
1613#if defined(HOST_WORDS_BIGENDIAN)
1614    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1615        int index = sh + i;
1616        if (index > 0xf) {
1617            result.u8[i] = b->u8[index - 0x10];
1618        } else {
1619            result.u8[i] = a->u8[index];
1620        }
1621    }
1622#else
1623    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1624        int index = (16 - sh) + i;
1625        if (index > 0xf) {
1626            result.u8[i] = a->u8[index - 0x10];
1627        } else {
1628            result.u8[i] = b->u8[index];
1629        }
1630    }
1631#endif
1632    *r = result;
1633}
1634
1635void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1636{
1637    int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf;
1638
1639#if defined(HOST_WORDS_BIGENDIAN)
1640    memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1641    memset(&r->u8[16-sh], 0, sh);
1642#else
1643    memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1644    memset(&r->u8[0], 0, sh);
1645#endif
1646}
1647
1648/* Experimental testing shows that hardware masks the immediate.  */
1649#define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1))
1650#if defined(HOST_WORDS_BIGENDIAN)
1651#define SPLAT_ELEMENT(element) _SPLAT_MASKED(element)
1652#else
1653#define SPLAT_ELEMENT(element)                                  \
1654    (ARRAY_SIZE(r->element) - 1 - _SPLAT_MASKED(element))
1655#endif
1656#define VSPLT(suffix, element)                                          \
1657    void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \
1658    {                                                                   \
1659        uint32_t s = b->element[SPLAT_ELEMENT(element)];                \
1660        int i;                                                          \
1661                                                                        \
1662        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
1663            r->element[i] = s;                                          \
1664        }                                                               \
1665    }
1666VSPLT(b, u8)
1667VSPLT(h, u16)
1668VSPLT(w, u32)
1669#undef VSPLT
1670#undef SPLAT_ELEMENT
1671#undef _SPLAT_MASKED
1672
1673#define VSPLTI(suffix, element, splat_type)                     \
1674    void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat)   \
1675    {                                                           \
1676        splat_type x = (int8_t)(splat << 3) >> 3;               \
1677        int i;                                                  \
1678                                                                \
1679        for (i = 0; i < ARRAY_SIZE(r->element); i++) {          \
1680            r->element[i] = x;                                  \
1681        }                                                       \
1682    }
1683VSPLTI(b, s8, int8_t)
1684VSPLTI(h, s16, int16_t)
1685VSPLTI(w, s32, int32_t)
1686#undef VSPLTI
1687
1688#define VSR(suffix, element, mask)                                      \
1689    void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)   \
1690    {                                                                   \
1691        int i;                                                          \
1692                                                                        \
1693        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
1694            unsigned int shift = b->element[i] & mask;                  \
1695            r->element[i] = a->element[i] >> shift;                     \
1696        }                                                               \
1697    }
1698VSR(ab, s8, 0x7)
1699VSR(ah, s16, 0xF)
1700VSR(aw, s32, 0x1F)
1701VSR(ad, s64, 0x3F)
1702VSR(b, u8, 0x7)
1703VSR(h, u16, 0xF)
1704VSR(w, u32, 0x1F)
1705VSR(d, u64, 0x3F)
1706#undef VSR
1707
1708void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1709{
1710    int sh = (b->u8[LO_IDX * 0xf] >> 3) & 0xf;
1711
1712#if defined(HOST_WORDS_BIGENDIAN)
1713    memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1714    memset(&r->u8[0], 0, sh);
1715#else
1716    memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1717    memset(&r->u8[16 - sh], 0, sh);
1718#endif
1719}
1720
1721void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1722{
1723    int i;
1724
1725    for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1726        r->u32[i] = a->u32[i] >= b->u32[i];
1727    }
1728}
1729
1730void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1731{
1732    int64_t t;
1733    int i, upper;
1734    ppc_avr_t result;
1735    int sat = 0;
1736
1737#if defined(HOST_WORDS_BIGENDIAN)
1738    upper = ARRAY_SIZE(r->s32)-1;
1739#else
1740    upper = 0;
1741#endif
1742    t = (int64_t)b->s32[upper];
1743    for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1744        t += a->s32[i];
1745        result.s32[i] = 0;
1746    }
1747    result.s32[upper] = cvtsdsw(t, &sat);
1748    *r = result;
1749
1750    if (sat) {
1751        env->vscr |= (1 << VSCR_SAT);
1752    }
1753}
1754
1755void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1756{
1757    int i, j, upper;
1758    ppc_avr_t result;
1759    int sat = 0;
1760
1761#if defined(HOST_WORDS_BIGENDIAN)
1762    upper = 1;
1763#else
1764    upper = 0;
1765#endif
1766    for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1767        int64_t t = (int64_t)b->s32[upper + i * 2];
1768
1769        result.u64[i] = 0;
1770        for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1771            t += a->s32[2 * i + j];
1772        }
1773        result.s32[upper + i * 2] = cvtsdsw(t, &sat);
1774    }
1775
1776    *r = result;
1777    if (sat) {
1778        env->vscr |= (1 << VSCR_SAT);
1779    }
1780}
1781
1782void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1783{
1784    int i, j;
1785    int sat = 0;
1786
1787    for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1788        int64_t t = (int64_t)b->s32[i];
1789
1790        for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1791            t += a->s8[4 * i + j];
1792        }
1793        r->s32[i] = cvtsdsw(t, &sat);
1794    }
1795
1796    if (sat) {
1797        env->vscr |= (1 << VSCR_SAT);
1798    }
1799}
1800
1801void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1802{
1803    int sat = 0;
1804    int i;
1805
1806    for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1807        int64_t t = (int64_t)b->s32[i];
1808
1809        t += a->s16[2 * i] + a->s16[2 * i + 1];
1810        r->s32[i] = cvtsdsw(t, &sat);
1811    }
1812
1813    if (sat) {
1814        env->vscr |= (1 << VSCR_SAT);
1815    }
1816}
1817
1818void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1819{
1820    int i, j;
1821    int sat = 0;
1822
1823    for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1824        uint64_t t = (uint64_t)b->u32[i];
1825
1826        for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
1827            t += a->u8[4 * i + j];
1828        }
1829        r->u32[i] = cvtuduw(t, &sat);
1830    }
1831
1832    if (sat) {
1833        env->vscr |= (1 << VSCR_SAT);
1834    }
1835}
1836
1837#if defined(HOST_WORDS_BIGENDIAN)
1838#define UPKHI 1
1839#define UPKLO 0
1840#else
1841#define UPKHI 0
1842#define UPKLO 1
1843#endif
1844#define VUPKPX(suffix, hi)                                              \
1845    void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b)                \
1846    {                                                                   \
1847        int i;                                                          \
1848        ppc_avr_t result;                                               \
1849                                                                        \
1850        for (i = 0; i < ARRAY_SIZE(r->u32); i++) {                      \
1851            uint16_t e = b->u16[hi ? i : i+4];                          \
1852            uint8_t a = (e >> 15) ? 0xff : 0;                           \
1853            uint8_t r = (e >> 10) & 0x1f;                               \
1854            uint8_t g = (e >> 5) & 0x1f;                                \
1855            uint8_t b = e & 0x1f;                                       \
1856                                                                        \
1857            result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b;       \
1858        }                                                               \
1859        *r = result;                                                    \
1860    }
1861VUPKPX(lpx, UPKLO)
1862VUPKPX(hpx, UPKHI)
1863#undef VUPKPX
1864
1865#define VUPK(suffix, unpacked, packee, hi)                              \
1866    void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b)                \
1867    {                                                                   \
1868        int i;                                                          \
1869        ppc_avr_t result;                                               \
1870                                                                        \
1871        if (hi) {                                                       \
1872            for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) {             \
1873                result.unpacked[i] = b->packee[i];                      \
1874            }                                                           \
1875        } else {                                                        \
1876            for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
1877                 i++) {                                                 \
1878                result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
1879            }                                                           \
1880        }                                                               \
1881        *r = result;                                                    \
1882    }
1883VUPK(hsb, s16, s8, UPKHI)
1884VUPK(hsh, s32, s16, UPKHI)
1885VUPK(hsw, s64, s32, UPKHI)
1886VUPK(lsb, s16, s8, UPKLO)
1887VUPK(lsh, s32, s16, UPKLO)
1888VUPK(lsw, s64, s32, UPKLO)
1889#undef VUPK
1890#undef UPKHI
1891#undef UPKLO
1892
1893#define VGENERIC_DO(name, element)                                      \
1894    void helper_v##name(ppc_avr_t *r, ppc_avr_t *b)                     \
1895    {                                                                   \
1896        int i;                                                          \
1897                                                                        \
1898        VECTOR_FOR_INORDER_I(i, element) {                              \
1899            r->element[i] = name(b->element[i]);                        \
1900        }                                                               \
1901    }
1902
1903#define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
1904#define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
1905#define clzw(v) clz32((v))
1906#define clzd(v) clz64((v))
1907
1908VGENERIC_DO(clzb, u8)
1909VGENERIC_DO(clzh, u16)
1910VGENERIC_DO(clzw, u32)
1911VGENERIC_DO(clzd, u64)
1912
1913#undef clzb
1914#undef clzh
1915#undef clzw
1916#undef clzd
1917
1918#define popcntb(v) ctpop8(v)
1919#define popcnth(v) ctpop16(v)
1920#define popcntw(v) ctpop32(v)
1921#define popcntd(v) ctpop64(v)
1922
1923VGENERIC_DO(popcntb, u8)
1924VGENERIC_DO(popcnth, u16)
1925VGENERIC_DO(popcntw, u32)
1926VGENERIC_DO(popcntd, u64)
1927
1928#undef popcntb
1929#undef popcnth
1930#undef popcntw
1931#undef popcntd
1932
1933#undef VGENERIC_DO
1934
1935#if defined(HOST_WORDS_BIGENDIAN)
1936#define QW_ONE { .u64 = { 0, 1 } }
1937#else
1938#define QW_ONE { .u64 = { 1, 0 } }
1939#endif
1940
1941#ifndef CONFIG_INT128
1942
1943static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
1944{
1945    t->u64[0] = ~a.u64[0];
1946    t->u64[1] = ~a.u64[1];
1947}
1948
1949static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
1950{
1951    if (a.u64[HI_IDX] < b.u64[HI_IDX]) {
1952        return -1;
1953    } else if (a.u64[HI_IDX] > b.u64[HI_IDX]) {
1954        return 1;
1955    } else if (a.u64[LO_IDX] < b.u64[LO_IDX]) {
1956        return -1;
1957    } else if (a.u64[LO_IDX] > b.u64[LO_IDX]) {
1958        return 1;
1959    } else {
1960        return 0;
1961    }
1962}
1963
1964static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1965{
1966    t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
1967    t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
1968                     (~a.u64[LO_IDX] < b.u64[LO_IDX]);
1969}
1970
1971static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1972{
1973    ppc_avr_t not_a;
1974    t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
1975    t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
1976                     (~a.u64[LO_IDX] < b.u64[LO_IDX]);
1977    avr_qw_not(&not_a, a);
1978    return avr_qw_cmpu(not_a, b) < 0;
1979}
1980
1981#endif
1982
1983void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1984{
1985#ifdef CONFIG_INT128
1986    r->u128 = a->u128 + b->u128;
1987#else
1988    avr_qw_add(r, *a, *b);
1989#endif
1990}
1991
1992void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1993{
1994#ifdef CONFIG_INT128
1995    r->u128 = a->u128 + b->u128 + (c->u128 & 1);
1996#else
1997
1998    if (c->u64[LO_IDX] & 1) {
1999        ppc_avr_t tmp;
2000
2001        tmp.u64[HI_IDX] = 0;
2002        tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2003        avr_qw_add(&tmp, *a, tmp);
2004        avr_qw_add(r, tmp, *b);
2005    } else {
2006        avr_qw_add(r, *a, *b);
2007    }
2008#endif
2009}
2010
2011void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2012{
2013#ifdef CONFIG_INT128
2014    r->u128 = (~a->u128 < b->u128);
2015#else
2016    ppc_avr_t not_a;
2017
2018    avr_qw_not(&not_a, *a);
2019
2020    r->u64[HI_IDX] = 0;
2021    r->u64[LO_IDX] = (avr_qw_cmpu(not_a, *b) < 0);
2022#endif
2023}
2024
2025void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2026{
2027#ifdef CONFIG_INT128
2028    int carry_out = (~a->u128 < b->u128);
2029    if (!carry_out && (c->u128 & 1)) {
2030        carry_out = ((a->u128 + b->u128 + 1) == 0) &&
2031                    ((a->u128 != 0) || (b->u128 != 0));
2032    }
2033    r->u128 = carry_out;
2034#else
2035
2036    int carry_in = c->u64[LO_IDX] & 1;
2037    int carry_out = 0;
2038    ppc_avr_t tmp;
2039
2040    carry_out = avr_qw_addc(&tmp, *a, *b);
2041
2042    if (!carry_out && carry_in) {
2043        ppc_avr_t one = QW_ONE;
2044        carry_out = avr_qw_addc(&tmp, tmp, one);
2045    }
2046    r->u64[HI_IDX] = 0;
2047    r->u64[LO_IDX] = carry_out;
2048#endif
2049}
2050
2051void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2052{
2053#ifdef CONFIG_INT128
2054    r->u128 = a->u128 - b->u128;
2055#else
2056    ppc_avr_t tmp;
2057    ppc_avr_t one = QW_ONE;
2058
2059    avr_qw_not(&tmp, *b);
2060    avr_qw_add(&tmp, *a, tmp);
2061    avr_qw_add(r, tmp, one);
2062#endif
2063}
2064
2065void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2066{
2067#ifdef CONFIG_INT128
2068    r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2069#else
2070    ppc_avr_t tmp, sum;
2071
2072    avr_qw_not(&tmp, *b);
2073    avr_qw_add(&sum, *a, tmp);
2074
2075    tmp.u64[HI_IDX] = 0;
2076    tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2077    avr_qw_add(r, sum, tmp);
2078#endif
2079}
2080
2081void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2082{
2083#ifdef CONFIG_INT128
2084    r->u128 = (~a->u128 < ~b->u128) ||
2085                 (a->u128 + ~b->u128 == (__uint128_t)-1);
2086#else
2087    int carry = (avr_qw_cmpu(*a, *b) > 0);
2088    if (!carry) {
2089        ppc_avr_t tmp;
2090        avr_qw_not(&tmp, *b);
2091        avr_qw_add(&tmp, *a, tmp);
2092        carry = ((tmp.s64[HI_IDX] == -1ull) && (tmp.s64[LO_IDX] == -1ull));
2093    }
2094    r->u64[HI_IDX] = 0;
2095    r->u64[LO_IDX] = carry;
2096#endif
2097}
2098
2099void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2100{
2101#ifdef CONFIG_INT128
2102    r->u128 =
2103        (~a->u128 < ~b->u128) ||
2104        ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2105#else
2106    int carry_in = c->u64[LO_IDX] & 1;
2107    int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2108    if (!carry_out && carry_in) {
2109        ppc_avr_t tmp;
2110        avr_qw_not(&tmp, *b);
2111        avr_qw_add(&tmp, *a, tmp);
2112        carry_out = ((tmp.u64[HI_IDX] == -1ull) && (tmp.u64[LO_IDX] == -1ull));
2113    }
2114
2115    r->u64[HI_IDX] = 0;
2116    r->u64[LO_IDX] = carry_out;
2117#endif
2118}
2119
2120#define BCD_PLUS_PREF_1 0xC
2121#define BCD_PLUS_PREF_2 0xF
2122#define BCD_PLUS_ALT_1  0xA
2123#define BCD_NEG_PREF    0xD
2124#define BCD_NEG_ALT     0xB
2125#define BCD_PLUS_ALT_2  0xE
2126
2127#if defined(HOST_WORDS_BIGENDIAN)
2128#define BCD_DIG_BYTE(n) (15 - (n/2))
2129#else
2130#define BCD_DIG_BYTE(n) (n/2)
2131#endif
2132
2133static int bcd_get_sgn(ppc_avr_t *bcd)
2134{
2135    switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) {
2136    case BCD_PLUS_PREF_1:
2137    case BCD_PLUS_PREF_2:
2138    case BCD_PLUS_ALT_1:
2139    case BCD_PLUS_ALT_2:
2140    {
2141        return 1;
2142    }
2143
2144    case BCD_NEG_PREF:
2145    case BCD_NEG_ALT:
2146    {
2147        return -1;
2148    }
2149
2150    default:
2151    {
2152        return 0;
2153    }
2154    }
2155}
2156
2157static int bcd_preferred_sgn(int sgn, int ps)
2158{
2159    if (sgn >= 0) {
2160        return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2161    } else {
2162        return BCD_NEG_PREF;
2163    }
2164}
2165
2166static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2167{
2168    uint8_t result;
2169    if (n & 1) {
2170        result = bcd->u8[BCD_DIG_BYTE(n)] >> 4;
2171    } else {
2172       result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF;
2173    }
2174
2175    if (unlikely(result > 9)) {
2176        *invalid = true;
2177    }
2178    return result;
2179}
2180
2181static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2182{
2183    if (n & 1) {
2184        bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F;
2185        bcd->u8[BCD_DIG_BYTE(n)] |= (digit<<4);
2186    } else {
2187        bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0;
2188        bcd->u8[BCD_DIG_BYTE(n)] |= digit;
2189    }
2190}
2191
2192static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2193{
2194    int i;
2195    int invalid = 0;
2196    for (i = 31; i > 0; i--) {
2197        uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2198        uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2199        if (unlikely(invalid)) {
2200            return 0; /* doesn't matter */
2201        } else if (dig_a > dig_b) {
2202            return 1;
2203        } else if (dig_a < dig_b) {
2204            return -1;
2205        }
2206    }
2207
2208    return 0;
2209}
2210
2211static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2212                       int *overflow)
2213{
2214    int carry = 0;
2215    int i;
2216    int is_zero = 1;
2217    for (i = 1; i <= 31; i++) {
2218        uint8_t digit = bcd_get_digit(a, i, invalid) +
2219                        bcd_get_digit(b, i, invalid) + carry;
2220        is_zero &= (digit == 0);
2221        if (digit > 9) {
2222            carry = 1;
2223            digit -= 10;
2224        } else {
2225            carry = 0;
2226        }
2227
2228        bcd_put_digit(t, digit, i);
2229
2230        if (unlikely(*invalid)) {
2231            return -1;
2232        }
2233    }
2234
2235    *overflow = carry;
2236    return is_zero;
2237}
2238
2239static int bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2240                       int *overflow)
2241{
2242    int carry = 0;
2243    int i;
2244    int is_zero = 1;
2245    for (i = 1; i <= 31; i++) {
2246        uint8_t digit = bcd_get_digit(a, i, invalid) -
2247                        bcd_get_digit(b, i, invalid) + carry;
2248        is_zero &= (digit == 0);
2249        if (digit & 0x80) {
2250            carry = -1;
2251            digit += 10;
2252        } else {
2253            carry = 0;
2254        }
2255
2256        bcd_put_digit(t, digit, i);
2257
2258        if (unlikely(*invalid)) {
2259            return -1;
2260        }
2261    }
2262
2263    *overflow = carry;
2264    return is_zero;
2265}
2266
2267uint32_t helper_bcdadd(ppc_avr_t *r,  ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2268{
2269
2270    int sgna = bcd_get_sgn(a);
2271    int sgnb = bcd_get_sgn(b);
2272    int invalid = (sgna == 0) || (sgnb == 0);
2273    int overflow = 0;
2274    int zero = 0;
2275    uint32_t cr = 0;
2276    ppc_avr_t result = { .u64 = { 0, 0 } };
2277
2278    if (!invalid) {
2279        if (sgna == sgnb) {
2280            result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2281            zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
2282            cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT;
2283        } else if (bcd_cmp_mag(a, b) > 0) {
2284            result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2285            zero = bcd_sub_mag(&result, a, b, &invalid, &overflow);
2286            cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT;
2287        } else {
2288            result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps);
2289            zero = bcd_sub_mag(&result, b, a, &invalid, &overflow);
2290            cr = (sgnb > 0) ? 1 << CRF_GT : 1 << CRF_LT;
2291        }
2292    }
2293
2294    if (unlikely(invalid)) {
2295        result.u64[HI_IDX] = result.u64[LO_IDX] = -1;
2296        cr = 1 << CRF_SO;
2297    } else if (overflow) {
2298        cr |= 1 << CRF_SO;
2299    } else if (zero) {
2300        cr = 1 << CRF_EQ;
2301    }
2302
2303    *r = result;
2304
2305    return cr;
2306}
2307
2308uint32_t helper_bcdsub(ppc_avr_t *r,  ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2309{
2310    ppc_avr_t bcopy = *b;
2311    int sgnb = bcd_get_sgn(b);
2312    if (sgnb < 0) {
2313        bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2314    } else if (sgnb > 0) {
2315        bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2316    }
2317    /* else invalid ... defer to bcdadd code for proper handling */
2318
2319    return helper_bcdadd(r, a, &bcopy, ps);
2320}
2321
2322void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
2323{
2324    int i;
2325    VECTOR_FOR_INORDER_I(i, u8) {
2326        r->u8[i] = AES_sbox[a->u8[i]];
2327    }
2328}
2329
2330void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2331{
2332    ppc_avr_t result;
2333    int i;
2334
2335    VECTOR_FOR_INORDER_I(i, u32) {
2336        result.AVRW(i) = b->AVRW(i) ^
2337            (AES_Te0[a->AVRB(AES_shifts[4*i + 0])] ^
2338             AES_Te1[a->AVRB(AES_shifts[4*i + 1])] ^
2339             AES_Te2[a->AVRB(AES_shifts[4*i + 2])] ^
2340             AES_Te3[a->AVRB(AES_shifts[4*i + 3])]);
2341    }
2342    *r = result;
2343}
2344
2345void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2346{
2347    ppc_avr_t result;
2348    int i;
2349
2350    VECTOR_FOR_INORDER_I(i, u8) {
2351        result.AVRB(i) = b->AVRB(i) ^ (AES_sbox[a->AVRB(AES_shifts[i])]);
2352    }
2353    *r = result;
2354}
2355
2356void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2357{
2358    /* This differs from what is written in ISA V2.07.  The RTL is */
2359    /* incorrect and will be fixed in V2.07B.                      */
2360    int i;
2361    ppc_avr_t tmp;
2362
2363    VECTOR_FOR_INORDER_I(i, u8) {
2364        tmp.AVRB(i) = b->AVRB(i) ^ AES_isbox[a->AVRB(AES_ishifts[i])];
2365    }
2366
2367    VECTOR_FOR_INORDER_I(i, u32) {
2368        r->AVRW(i) =
2369            AES_imc[tmp.AVRB(4*i + 0)][0] ^
2370            AES_imc[tmp.AVRB(4*i + 1)][1] ^
2371            AES_imc[tmp.AVRB(4*i + 2)][2] ^
2372            AES_imc[tmp.AVRB(4*i + 3)][3];
2373    }
2374}
2375
2376void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2377{
2378    ppc_avr_t result;
2379    int i;
2380
2381    VECTOR_FOR_INORDER_I(i, u8) {
2382        result.AVRB(i) = b->AVRB(i) ^ (AES_isbox[a->AVRB(AES_ishifts[i])]);
2383    }
2384    *r = result;
2385}
2386
2387#define ROTRu32(v, n) (((v) >> (n)) | ((v) << (32-n)))
2388#if defined(HOST_WORDS_BIGENDIAN)
2389#define EL_IDX(i) (i)
2390#else
2391#define EL_IDX(i) (3 - (i))
2392#endif
2393
2394void helper_vshasigmaw(ppc_avr_t *r,  ppc_avr_t *a, uint32_t st_six)
2395{
2396    int st = (st_six & 0x10) != 0;
2397    int six = st_six & 0xF;
2398    int i;
2399
2400    VECTOR_FOR_INORDER_I(i, u32) {
2401        if (st == 0) {
2402            if ((six & (0x8 >> i)) == 0) {
2403                r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 7) ^
2404                                    ROTRu32(a->u32[EL_IDX(i)], 18) ^
2405                                    (a->u32[EL_IDX(i)] >> 3);
2406            } else { /* six.bit[i] == 1 */
2407                r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 17) ^
2408                                    ROTRu32(a->u32[EL_IDX(i)], 19) ^
2409                                    (a->u32[EL_IDX(i)] >> 10);
2410            }
2411        } else { /* st == 1 */
2412            if ((six & (0x8 >> i)) == 0) {
2413                r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 2) ^
2414                                    ROTRu32(a->u32[EL_IDX(i)], 13) ^
2415                                    ROTRu32(a->u32[EL_IDX(i)], 22);
2416            } else { /* six.bit[i] == 1 */
2417                r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 6) ^
2418                                    ROTRu32(a->u32[EL_IDX(i)], 11) ^
2419                                    ROTRu32(a->u32[EL_IDX(i)], 25);
2420            }
2421        }
2422    }
2423}
2424
2425#undef ROTRu32
2426#undef EL_IDX
2427
2428#define ROTRu64(v, n) (((v) >> (n)) | ((v) << (64-n)))
2429#if defined(HOST_WORDS_BIGENDIAN)
2430#define EL_IDX(i) (i)
2431#else
2432#define EL_IDX(i) (1 - (i))
2433#endif
2434
2435void helper_vshasigmad(ppc_avr_t *r,  ppc_avr_t *a, uint32_t st_six)
2436{
2437    int st = (st_six & 0x10) != 0;
2438    int six = st_six & 0xF;
2439    int i;
2440
2441    VECTOR_FOR_INORDER_I(i, u64) {
2442        if (st == 0) {
2443            if ((six & (0x8 >> (2*i))) == 0) {
2444                r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 1) ^
2445                                    ROTRu64(a->u64[EL_IDX(i)], 8) ^
2446                                    (a->u64[EL_IDX(i)] >> 7);
2447            } else { /* six.bit[2*i] == 1 */
2448                r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 19) ^
2449                                    ROTRu64(a->u64[EL_IDX(i)], 61) ^
2450                                    (a->u64[EL_IDX(i)] >> 6);
2451            }
2452        } else { /* st == 1 */
2453            if ((six & (0x8 >> (2*i))) == 0) {
2454                r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 28) ^
2455                                    ROTRu64(a->u64[EL_IDX(i)], 34) ^
2456                                    ROTRu64(a->u64[EL_IDX(i)], 39);
2457            } else { /* six.bit[2*i] == 1 */
2458                r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 14) ^
2459                                    ROTRu64(a->u64[EL_IDX(i)], 18) ^
2460                                    ROTRu64(a->u64[EL_IDX(i)], 41);
2461            }
2462        }
2463    }
2464}
2465
2466#undef ROTRu64
2467#undef EL_IDX
2468
2469void helper_vpermxor(ppc_avr_t *r,  ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2470{
2471    ppc_avr_t result;
2472    int i;
2473
2474    VECTOR_FOR_INORDER_I(i, u8) {
2475        int indexA = c->u8[i] >> 4;
2476        int indexB = c->u8[i] & 0xF;
2477#if defined(HOST_WORDS_BIGENDIAN)
2478        result.u8[i] = a->u8[indexA] ^ b->u8[indexB];
2479#else
2480        result.u8[i] = a->u8[15-indexA] ^ b->u8[15-indexB];
2481#endif
2482    }
2483    *r = result;
2484}
2485
2486#undef VECTOR_FOR_INORDER_I
2487#undef HI_IDX
2488#undef LO_IDX
2489
2490/*****************************************************************************/
2491/* SPE extension helpers */
2492/* Use a table to make this quicker */
2493static const uint8_t hbrev[16] = {
2494    0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
2495    0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
2496};
2497
2498static inline uint8_t byte_reverse(uint8_t val)
2499{
2500    return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
2501}
2502
2503static inline uint32_t word_reverse(uint32_t val)
2504{
2505    return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
2506        (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
2507}
2508
2509#define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
2510target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
2511{
2512    uint32_t a, b, d, mask;
2513
2514    mask = UINT32_MAX >> (32 - MASKBITS);
2515    a = arg1 & mask;
2516    b = arg2 & mask;
2517    d = word_reverse(1 + word_reverse(a | ~b));
2518    return (arg1 & ~mask) | (d & b);
2519}
2520
2521uint32_t helper_cntlsw32(uint32_t val)
2522{
2523    if (val & 0x80000000) {
2524        return clz32(~val);
2525    } else {
2526        return clz32(val);
2527    }
2528}
2529
2530uint32_t helper_cntlzw32(uint32_t val)
2531{
2532    return clz32(val);
2533}
2534
2535/* 440 specific */
2536target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
2537                          target_ulong low, uint32_t update_Rc)
2538{
2539    target_ulong mask;
2540    int i;
2541
2542    i = 1;
2543    for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2544        if ((high & mask) == 0) {
2545            if (update_Rc) {
2546                env->crf[0] = 0x4;
2547            }
2548            goto done;
2549        }
2550        i++;
2551    }
2552    for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2553        if ((low & mask) == 0) {
2554            if (update_Rc) {
2555                env->crf[0] = 0x8;
2556            }
2557            goto done;
2558        }
2559        i++;
2560    }
2561    i = 8;
2562    if (update_Rc) {
2563        env->crf[0] = 0x2;
2564    }
2565 done:
2566    env->xer = (env->xer & ~0x7F) | i;
2567    if (update_Rc) {
2568        env->crf[0] |= xer_so;
2569    }
2570    return i;
2571}
2572