qemu/target/ppc/int_helper.c
<<
>>
Prefs
   1/*
   2 *  PowerPC integer and vector emulation helpers for QEMU.
   3 *
   4 *  Copyright (c) 2003-2007 Jocelyn Mayer
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include "qemu/osdep.h"
  20#include "cpu.h"
  21#include "internal.h"
  22#include "qemu/host-utils.h"
  23#include "exec/helper-proto.h"
  24#include "crypto/aes.h"
  25#include "fpu/softfloat.h"
  26
  27#include "helper_regs.h"
  28/*****************************************************************************/
  29/* Fixed point operations helpers */
  30
  31static inline void helper_update_ov_legacy(CPUPPCState *env, int ov)
  32{
  33    if (unlikely(ov)) {
  34        env->so = env->ov = 1;
  35    } else {
  36        env->ov = 0;
  37    }
  38}
  39
  40target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
  41                           uint32_t oe)
  42{
  43    uint64_t rt = 0;
  44    int overflow = 0;
  45
  46    uint64_t dividend = (uint64_t)ra << 32;
  47    uint64_t divisor = (uint32_t)rb;
  48
  49    if (unlikely(divisor == 0)) {
  50        overflow = 1;
  51    } else {
  52        rt = dividend / divisor;
  53        overflow = rt > UINT32_MAX;
  54    }
  55
  56    if (unlikely(overflow)) {
  57        rt = 0; /* Undefined */
  58    }
  59
  60    if (oe) {
  61        helper_update_ov_legacy(env, overflow);
  62    }
  63
  64    return (target_ulong)rt;
  65}
  66
  67target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
  68                          uint32_t oe)
  69{
  70    int64_t rt = 0;
  71    int overflow = 0;
  72
  73    int64_t dividend = (int64_t)ra << 32;
  74    int64_t divisor = (int64_t)((int32_t)rb);
  75
  76    if (unlikely((divisor == 0) ||
  77                 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
  78        overflow = 1;
  79    } else {
  80        rt = dividend / divisor;
  81        overflow = rt != (int32_t)rt;
  82    }
  83
  84    if (unlikely(overflow)) {
  85        rt = 0; /* Undefined */
  86    }
  87
  88    if (oe) {
  89        helper_update_ov_legacy(env, overflow);
  90    }
  91
  92    return (target_ulong)rt;
  93}
  94
  95#if defined(TARGET_PPC64)
  96
  97uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
  98{
  99    uint64_t rt = 0;
 100    int overflow = 0;
 101
 102    overflow = divu128(&rt, &ra, rb);
 103
 104    if (unlikely(overflow)) {
 105        rt = 0; /* Undefined */
 106    }
 107
 108    if (oe) {
 109        helper_update_ov_legacy(env, overflow);
 110    }
 111
 112    return rt;
 113}
 114
 115uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
 116{
 117    int64_t rt = 0;
 118    int64_t ra = (int64_t)rau;
 119    int64_t rb = (int64_t)rbu;
 120    int overflow = divs128(&rt, &ra, rb);
 121
 122    if (unlikely(overflow)) {
 123        rt = 0; /* Undefined */
 124    }
 125
 126    if (oe) {
 127        helper_update_ov_legacy(env, overflow);
 128    }
 129
 130    return rt;
 131}
 132
 133#endif
 134
 135
 136#if defined(TARGET_PPC64)
 137/* if x = 0xab, returns 0xababababababababa */
 138#define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
 139
 140/* substract 1 from each byte, and with inverse, check if MSB is set at each
 141 * byte.
 142 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
 143 *      (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
 144 */
 145#define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
 146
 147/* When you XOR the pattern and there is a match, that byte will be zero */
 148#define hasvalue(x, n)  (haszero((x) ^ pattern(n)))
 149
 150uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
 151{
 152    return hasvalue(rb, ra) ? CRF_GT : 0;
 153}
 154
 155#undef pattern
 156#undef haszero
 157#undef hasvalue
 158
 159/* Return invalid random number.
 160 *
 161 * FIXME: Add rng backend or other mechanism to get cryptographically suitable
 162 * random number
 163 */
 164target_ulong helper_darn32(void)
 165{
 166    return -1;
 167}
 168
 169target_ulong helper_darn64(void)
 170{
 171    return -1;
 172}
 173
 174#endif
 175
 176#if defined(TARGET_PPC64)
 177
 178uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
 179{
 180    int i;
 181    uint64_t ra = 0;
 182
 183    for (i = 0; i < 8; i++) {
 184        int index = (rs >> (i*8)) & 0xFF;
 185        if (index < 64) {
 186            if (rb & PPC_BIT(index)) {
 187                ra |= 1 << i;
 188            }
 189        }
 190    }
 191    return ra;
 192}
 193
 194#endif
 195
 196target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
 197{
 198    target_ulong mask = 0xff;
 199    target_ulong ra = 0;
 200    int i;
 201
 202    for (i = 0; i < sizeof(target_ulong); i++) {
 203        if ((rs & mask) == (rb & mask)) {
 204            ra |= mask;
 205        }
 206        mask <<= 8;
 207    }
 208    return ra;
 209}
 210
 211/* shift right arithmetic helper */
 212target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
 213                         target_ulong shift)
 214{
 215    int32_t ret;
 216
 217    if (likely(!(shift & 0x20))) {
 218        if (likely((uint32_t)shift != 0)) {
 219            shift &= 0x1f;
 220            ret = (int32_t)value >> shift;
 221            if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
 222                env->ca32 = env->ca = 0;
 223            } else {
 224                env->ca32 = env->ca = 1;
 225            }
 226        } else {
 227            ret = (int32_t)value;
 228            env->ca32 = env->ca = 0;
 229        }
 230    } else {
 231        ret = (int32_t)value >> 31;
 232        env->ca32 = env->ca = (ret != 0);
 233    }
 234    return (target_long)ret;
 235}
 236
 237#if defined(TARGET_PPC64)
 238target_ulong helper_srad(CPUPPCState *env, target_ulong value,
 239                         target_ulong shift)
 240{
 241    int64_t ret;
 242
 243    if (likely(!(shift & 0x40))) {
 244        if (likely((uint64_t)shift != 0)) {
 245            shift &= 0x3f;
 246            ret = (int64_t)value >> shift;
 247            if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
 248                env->ca32 = env->ca = 0;
 249            } else {
 250                env->ca32 = env->ca = 1;
 251            }
 252        } else {
 253            ret = (int64_t)value;
 254            env->ca32 = env->ca = 0;
 255        }
 256    } else {
 257        ret = (int64_t)value >> 63;
 258        env->ca32 = env->ca = (ret != 0);
 259    }
 260    return ret;
 261}
 262#endif
 263
 264#if defined(TARGET_PPC64)
 265target_ulong helper_popcntb(target_ulong val)
 266{
 267    /* Note that we don't fold past bytes */
 268    val = (val & 0x5555555555555555ULL) + ((val >>  1) &
 269                                           0x5555555555555555ULL);
 270    val = (val & 0x3333333333333333ULL) + ((val >>  2) &
 271                                           0x3333333333333333ULL);
 272    val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >>  4) &
 273                                           0x0f0f0f0f0f0f0f0fULL);
 274    return val;
 275}
 276
 277target_ulong helper_popcntw(target_ulong val)
 278{
 279    /* Note that we don't fold past words.  */
 280    val = (val & 0x5555555555555555ULL) + ((val >>  1) &
 281                                           0x5555555555555555ULL);
 282    val = (val & 0x3333333333333333ULL) + ((val >>  2) &
 283                                           0x3333333333333333ULL);
 284    val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >>  4) &
 285                                           0x0f0f0f0f0f0f0f0fULL);
 286    val = (val & 0x00ff00ff00ff00ffULL) + ((val >>  8) &
 287                                           0x00ff00ff00ff00ffULL);
 288    val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
 289                                           0x0000ffff0000ffffULL);
 290    return val;
 291}
 292#else
 293target_ulong helper_popcntb(target_ulong val)
 294{
 295    /* Note that we don't fold past bytes */
 296    val = (val & 0x55555555) + ((val >>  1) & 0x55555555);
 297    val = (val & 0x33333333) + ((val >>  2) & 0x33333333);
 298    val = (val & 0x0f0f0f0f) + ((val >>  4) & 0x0f0f0f0f);
 299    return val;
 300}
 301#endif
 302
 303/*****************************************************************************/
 304/* PowerPC 601 specific instructions (POWER bridge) */
 305target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
 306{
 307    uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
 308
 309    if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
 310        (int32_t)arg2 == 0) {
 311        env->spr[SPR_MQ] = 0;
 312        return INT32_MIN;
 313    } else {
 314        env->spr[SPR_MQ] = tmp % arg2;
 315        return  tmp / (int32_t)arg2;
 316    }
 317}
 318
 319target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
 320                         target_ulong arg2)
 321{
 322    uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
 323
 324    if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
 325        (int32_t)arg2 == 0) {
 326        env->so = env->ov = 1;
 327        env->spr[SPR_MQ] = 0;
 328        return INT32_MIN;
 329    } else {
 330        env->spr[SPR_MQ] = tmp % arg2;
 331        tmp /= (int32_t)arg2;
 332        if ((int32_t)tmp != tmp) {
 333            env->so = env->ov = 1;
 334        } else {
 335            env->ov = 0;
 336        }
 337        return tmp;
 338    }
 339}
 340
 341target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
 342                         target_ulong arg2)
 343{
 344    if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
 345        (int32_t)arg2 == 0) {
 346        env->spr[SPR_MQ] = 0;
 347        return INT32_MIN;
 348    } else {
 349        env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
 350        return (int32_t)arg1 / (int32_t)arg2;
 351    }
 352}
 353
 354target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
 355                          target_ulong arg2)
 356{
 357    if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
 358        (int32_t)arg2 == 0) {
 359        env->so = env->ov = 1;
 360        env->spr[SPR_MQ] = 0;
 361        return INT32_MIN;
 362    } else {
 363        env->ov = 0;
 364        env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
 365        return (int32_t)arg1 / (int32_t)arg2;
 366    }
 367}
 368
 369/*****************************************************************************/
 370/* 602 specific instructions */
 371/* mfrom is the most crazy instruction ever seen, imho ! */
 372/* Real implementation uses a ROM table. Do the same */
 373/* Extremely decomposed:
 374 *                      -arg / 256
 375 * return 256 * log10(10           + 1.0) + 0.5
 376 */
 377#if !defined(CONFIG_USER_ONLY)
 378target_ulong helper_602_mfrom(target_ulong arg)
 379{
 380    if (likely(arg < 602)) {
 381#include "mfrom_table.inc.c"
 382        return mfrom_ROM_table[arg];
 383    } else {
 384        return 0;
 385    }
 386}
 387#endif
 388
 389/*****************************************************************************/
 390/* Altivec extension helpers */
 391#if defined(HOST_WORDS_BIGENDIAN)
 392#define HI_IDX 0
 393#define LO_IDX 1
 394#define AVRB(i) u8[i]
 395#define AVRW(i) u32[i]
 396#else
 397#define HI_IDX 1
 398#define LO_IDX 0
 399#define AVRB(i) u8[15-(i)]
 400#define AVRW(i) u32[3-(i)]
 401#endif
 402
 403#if defined(HOST_WORDS_BIGENDIAN)
 404#define VECTOR_FOR_INORDER_I(index, element)                    \
 405    for (index = 0; index < ARRAY_SIZE(r->element); index++)
 406#else
 407#define VECTOR_FOR_INORDER_I(index, element)                    \
 408    for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--)
 409#endif
 410
 411/* Saturating arithmetic helpers.  */
 412#define SATCVT(from, to, from_type, to_type, min, max)          \
 413    static inline to_type cvt##from##to(from_type x, int *sat)  \
 414    {                                                           \
 415        to_type r;                                              \
 416                                                                \
 417        if (x < (from_type)min) {                               \
 418            r = min;                                            \
 419            *sat = 1;                                           \
 420        } else if (x > (from_type)max) {                        \
 421            r = max;                                            \
 422            *sat = 1;                                           \
 423        } else {                                                \
 424            r = x;                                              \
 425        }                                                       \
 426        return r;                                               \
 427    }
 428#define SATCVTU(from, to, from_type, to_type, min, max)         \
 429    static inline to_type cvt##from##to(from_type x, int *sat)  \
 430    {                                                           \
 431        to_type r;                                              \
 432                                                                \
 433        if (x > (from_type)max) {                               \
 434            r = max;                                            \
 435            *sat = 1;                                           \
 436        } else {                                                \
 437            r = x;                                              \
 438        }                                                       \
 439        return r;                                               \
 440    }
 441SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
 442SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
 443SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
 444
 445SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
 446SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
 447SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
 448SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
 449SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
 450SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
 451#undef SATCVT
 452#undef SATCVTU
 453
 454void helper_lvsl(ppc_avr_t *r, target_ulong sh)
 455{
 456    int i, j = (sh & 0xf);
 457
 458    VECTOR_FOR_INORDER_I(i, u8) {
 459        r->u8[i] = j++;
 460    }
 461}
 462
 463void helper_lvsr(ppc_avr_t *r, target_ulong sh)
 464{
 465    int i, j = 0x10 - (sh & 0xf);
 466
 467    VECTOR_FOR_INORDER_I(i, u8) {
 468        r->u8[i] = j++;
 469    }
 470}
 471
 472void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r)
 473{
 474#if defined(HOST_WORDS_BIGENDIAN)
 475    env->vscr = r->u32[3];
 476#else
 477    env->vscr = r->u32[0];
 478#endif
 479    set_flush_to_zero(vscr_nj, &env->vec_status);
 480}
 481
 482void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
 483{
 484    int i;
 485
 486    for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
 487        r->u32[i] = ~a->u32[i] < b->u32[i];
 488    }
 489}
 490
 491/* vprtybw */
 492void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b)
 493{
 494    int i;
 495    for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
 496        uint64_t res = b->u32[i] ^ (b->u32[i] >> 16);
 497        res ^= res >> 8;
 498        r->u32[i] = res & 1;
 499    }
 500}
 501
 502/* vprtybd */
 503void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b)
 504{
 505    int i;
 506    for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
 507        uint64_t res = b->u64[i] ^ (b->u64[i] >> 32);
 508        res ^= res >> 16;
 509        res ^= res >> 8;
 510        r->u64[i] = res & 1;
 511    }
 512}
 513
 514/* vprtybq */
 515void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b)
 516{
 517    uint64_t res = b->u64[0] ^ b->u64[1];
 518    res ^= res >> 32;
 519    res ^= res >> 16;
 520    res ^= res >> 8;
 521    r->u64[LO_IDX] = res & 1;
 522    r->u64[HI_IDX] = 0;
 523}
 524
 525#define VARITH_DO(name, op, element)                                    \
 526    void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
 527    {                                                                   \
 528        int i;                                                          \
 529                                                                        \
 530        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
 531            r->element[i] = a->element[i] op b->element[i];             \
 532        }                                                               \
 533    }
 534#define VARITH(suffix, element)                 \
 535    VARITH_DO(add##suffix, +, element)          \
 536    VARITH_DO(sub##suffix, -, element)
 537VARITH(ubm, u8)
 538VARITH(uhm, u16)
 539VARITH(uwm, u32)
 540VARITH(udm, u64)
 541VARITH_DO(muluwm, *, u32)
 542#undef VARITH_DO
 543#undef VARITH
 544
 545#define VARITHFP(suffix, func)                                          \
 546    void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
 547                          ppc_avr_t *b)                                 \
 548    {                                                                   \
 549        int i;                                                          \
 550                                                                        \
 551        for (i = 0; i < ARRAY_SIZE(r->f); i++) {                        \
 552            r->f[i] = func(a->f[i], b->f[i], &env->vec_status);         \
 553        }                                                               \
 554    }
 555VARITHFP(addfp, float32_add)
 556VARITHFP(subfp, float32_sub)
 557VARITHFP(minfp, float32_min)
 558VARITHFP(maxfp, float32_max)
 559#undef VARITHFP
 560
 561#define VARITHFPFMA(suffix, type)                                       \
 562    void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
 563                           ppc_avr_t *b, ppc_avr_t *c)                  \
 564    {                                                                   \
 565        int i;                                                          \
 566        for (i = 0; i < ARRAY_SIZE(r->f); i++) {                        \
 567            r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i],         \
 568                                     type, &env->vec_status);           \
 569        }                                                               \
 570    }
 571VARITHFPFMA(maddfp, 0);
 572VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
 573#undef VARITHFPFMA
 574
 575#define VARITHSAT_CASE(type, op, cvt, element)                          \
 576    {                                                                   \
 577        type result = (type)a->element[i] op (type)b->element[i];       \
 578        r->element[i] = cvt(result, &sat);                              \
 579    }
 580
 581#define VARITHSAT_DO(name, op, optype, cvt, element)                    \
 582    void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,   \
 583                        ppc_avr_t *b)                                   \
 584    {                                                                   \
 585        int sat = 0;                                                    \
 586        int i;                                                          \
 587                                                                        \
 588        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
 589            switch (sizeof(r->element[0])) {                            \
 590            case 1:                                                     \
 591                VARITHSAT_CASE(optype, op, cvt, element);               \
 592                break;                                                  \
 593            case 2:                                                     \
 594                VARITHSAT_CASE(optype, op, cvt, element);               \
 595                break;                                                  \
 596            case 4:                                                     \
 597                VARITHSAT_CASE(optype, op, cvt, element);               \
 598                break;                                                  \
 599            }                                                           \
 600        }                                                               \
 601        if (sat) {                                                      \
 602            env->vscr |= (1 << VSCR_SAT);                               \
 603        }                                                               \
 604    }
 605#define VARITHSAT_SIGNED(suffix, element, optype, cvt)          \
 606    VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element)      \
 607    VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
 608#define VARITHSAT_UNSIGNED(suffix, element, optype, cvt)        \
 609    VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element)      \
 610    VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
 611VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
 612VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
 613VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
 614VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
 615VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
 616VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
 617#undef VARITHSAT_CASE
 618#undef VARITHSAT_DO
 619#undef VARITHSAT_SIGNED
 620#undef VARITHSAT_UNSIGNED
 621
 622#define VAVG_DO(name, element, etype)                                   \
 623    void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
 624    {                                                                   \
 625        int i;                                                          \
 626                                                                        \
 627        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
 628            etype x = (etype)a->element[i] + (etype)b->element[i] + 1;  \
 629            r->element[i] = x >> 1;                                     \
 630        }                                                               \
 631    }
 632
 633#define VAVG(type, signed_element, signed_type, unsigned_element,       \
 634             unsigned_type)                                             \
 635    VAVG_DO(avgs##type, signed_element, signed_type)                    \
 636    VAVG_DO(avgu##type, unsigned_element, unsigned_type)
 637VAVG(b, s8, int16_t, u8, uint16_t)
 638VAVG(h, s16, int32_t, u16, uint32_t)
 639VAVG(w, s32, int64_t, u32, uint64_t)
 640#undef VAVG_DO
 641#undef VAVG
 642
 643#define VABSDU_DO(name, element)                                        \
 644void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)           \
 645{                                                                       \
 646    int i;                                                              \
 647                                                                        \
 648    for (i = 0; i < ARRAY_SIZE(r->element); i++) {                      \
 649        r->element[i] = (a->element[i] > b->element[i]) ?               \
 650            (a->element[i] - b->element[i]) :                           \
 651            (b->element[i] - a->element[i]);                            \
 652    }                                                                   \
 653}
 654
 655/* VABSDU - Vector absolute difference unsigned
 656 *   name    - instruction mnemonic suffix (b: byte, h: halfword, w: word)
 657 *   element - element type to access from vector
 658 */
 659#define VABSDU(type, element)                   \
 660    VABSDU_DO(absdu##type, element)
 661VABSDU(b, u8)
 662VABSDU(h, u16)
 663VABSDU(w, u32)
 664#undef VABSDU_DO
 665#undef VABSDU
 666
 667#define VCF(suffix, cvt, element)                                       \
 668    void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r,             \
 669                            ppc_avr_t *b, uint32_t uim)                 \
 670    {                                                                   \
 671        int i;                                                          \
 672                                                                        \
 673        for (i = 0; i < ARRAY_SIZE(r->f); i++) {                        \
 674            float32 t = cvt(b->element[i], &env->vec_status);           \
 675            r->f[i] = float32_scalbn(t, -uim, &env->vec_status);        \
 676        }                                                               \
 677    }
 678VCF(ux, uint32_to_float32, u32)
 679VCF(sx, int32_to_float32, s32)
 680#undef VCF
 681
 682#define VCMP_DO(suffix, compare, element, record)                       \
 683    void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r,            \
 684                             ppc_avr_t *a, ppc_avr_t *b)                \
 685    {                                                                   \
 686        uint64_t ones = (uint64_t)-1;                                   \
 687        uint64_t all = ones;                                            \
 688        uint64_t none = 0;                                              \
 689        int i;                                                          \
 690                                                                        \
 691        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
 692            uint64_t result = (a->element[i] compare b->element[i] ?    \
 693                               ones : 0x0);                             \
 694            switch (sizeof(a->element[0])) {                            \
 695            case 8:                                                     \
 696                r->u64[i] = result;                                     \
 697                break;                                                  \
 698            case 4:                                                     \
 699                r->u32[i] = result;                                     \
 700                break;                                                  \
 701            case 2:                                                     \
 702                r->u16[i] = result;                                     \
 703                break;                                                  \
 704            case 1:                                                     \
 705                r->u8[i] = result;                                      \
 706                break;                                                  \
 707            }                                                           \
 708            all &= result;                                              \
 709            none |= result;                                             \
 710        }                                                               \
 711        if (record) {                                                   \
 712            env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1);       \
 713        }                                                               \
 714    }
 715#define VCMP(suffix, compare, element)          \
 716    VCMP_DO(suffix, compare, element, 0)        \
 717    VCMP_DO(suffix##_dot, compare, element, 1)
 718VCMP(equb, ==, u8)
 719VCMP(equh, ==, u16)
 720VCMP(equw, ==, u32)
 721VCMP(equd, ==, u64)
 722VCMP(gtub, >, u8)
 723VCMP(gtuh, >, u16)
 724VCMP(gtuw, >, u32)
 725VCMP(gtud, >, u64)
 726VCMP(gtsb, >, s8)
 727VCMP(gtsh, >, s16)
 728VCMP(gtsw, >, s32)
 729VCMP(gtsd, >, s64)
 730#undef VCMP_DO
 731#undef VCMP
 732
 733#define VCMPNE_DO(suffix, element, etype, cmpzero, record)              \
 734void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r,              \
 735                            ppc_avr_t *a, ppc_avr_t *b)                 \
 736{                                                                       \
 737    etype ones = (etype)-1;                                             \
 738    etype all = ones;                                                   \
 739    etype result, none = 0;                                             \
 740    int i;                                                              \
 741                                                                        \
 742    for (i = 0; i < ARRAY_SIZE(r->element); i++) {                      \
 743        if (cmpzero) {                                                  \
 744            result = ((a->element[i] == 0)                              \
 745                           || (b->element[i] == 0)                      \
 746                           || (a->element[i] != b->element[i]) ?        \
 747                           ones : 0x0);                                 \
 748        } else {                                                        \
 749            result = (a->element[i] != b->element[i]) ? ones : 0x0;     \
 750        }                                                               \
 751        r->element[i] = result;                                         \
 752        all &= result;                                                  \
 753        none |= result;                                                 \
 754    }                                                                   \
 755    if (record) {                                                       \
 756        env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1);           \
 757    }                                                                   \
 758}
 759
 760/* VCMPNEZ - Vector compare not equal to zero
 761 *   suffix  - instruction mnemonic suffix (b: byte, h: halfword, w: word)
 762 *   element - element type to access from vector
 763 */
 764#define VCMPNE(suffix, element, etype, cmpzero)         \
 765    VCMPNE_DO(suffix, element, etype, cmpzero, 0)       \
 766    VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1)
 767VCMPNE(zb, u8, uint8_t, 1)
 768VCMPNE(zh, u16, uint16_t, 1)
 769VCMPNE(zw, u32, uint32_t, 1)
 770VCMPNE(b, u8, uint8_t, 0)
 771VCMPNE(h, u16, uint16_t, 0)
 772VCMPNE(w, u32, uint32_t, 0)
 773#undef VCMPNE_DO
 774#undef VCMPNE
 775
 776#define VCMPFP_DO(suffix, compare, order, record)                       \
 777    void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r,            \
 778                             ppc_avr_t *a, ppc_avr_t *b)                \
 779    {                                                                   \
 780        uint32_t ones = (uint32_t)-1;                                   \
 781        uint32_t all = ones;                                            \
 782        uint32_t none = 0;                                              \
 783        int i;                                                          \
 784                                                                        \
 785        for (i = 0; i < ARRAY_SIZE(r->f); i++) {                        \
 786            uint32_t result;                                            \
 787            int rel = float32_compare_quiet(a->f[i], b->f[i],           \
 788                                            &env->vec_status);          \
 789            if (rel == float_relation_unordered) {                      \
 790                result = 0;                                             \
 791            } else if (rel compare order) {                             \
 792                result = ones;                                          \
 793            } else {                                                    \
 794                result = 0;                                             \
 795            }                                                           \
 796            r->u32[i] = result;                                         \
 797            all &= result;                                              \
 798            none |= result;                                             \
 799        }                                                               \
 800        if (record) {                                                   \
 801            env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1);       \
 802        }                                                               \
 803    }
 804#define VCMPFP(suffix, compare, order)          \
 805    VCMPFP_DO(suffix, compare, order, 0)        \
 806    VCMPFP_DO(suffix##_dot, compare, order, 1)
 807VCMPFP(eqfp, ==, float_relation_equal)
 808VCMPFP(gefp, !=, float_relation_less)
 809VCMPFP(gtfp, ==, float_relation_greater)
 810#undef VCMPFP_DO
 811#undef VCMPFP
 812
 813static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
 814                                    ppc_avr_t *a, ppc_avr_t *b, int record)
 815{
 816    int i;
 817    int all_in = 0;
 818
 819    for (i = 0; i < ARRAY_SIZE(r->f); i++) {
 820        int le_rel = float32_compare_quiet(a->f[i], b->f[i], &env->vec_status);
 821        if (le_rel == float_relation_unordered) {
 822            r->u32[i] = 0xc0000000;
 823            all_in = 1;
 824        } else {
 825            float32 bneg = float32_chs(b->f[i]);
 826            int ge_rel = float32_compare_quiet(a->f[i], bneg, &env->vec_status);
 827            int le = le_rel != float_relation_greater;
 828            int ge = ge_rel != float_relation_less;
 829
 830            r->u32[i] = ((!le) << 31) | ((!ge) << 30);
 831            all_in |= (!le | !ge);
 832        }
 833    }
 834    if (record) {
 835        env->crf[6] = (all_in == 0) << 1;
 836    }
 837}
 838
 839void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
 840{
 841    vcmpbfp_internal(env, r, a, b, 0);
 842}
 843
 844void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
 845                        ppc_avr_t *b)
 846{
 847    vcmpbfp_internal(env, r, a, b, 1);
 848}
 849
 850#define VCT(suffix, satcvt, element)                                    \
 851    void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r,             \
 852                            ppc_avr_t *b, uint32_t uim)                 \
 853    {                                                                   \
 854        int i;                                                          \
 855        int sat = 0;                                                    \
 856        float_status s = env->vec_status;                               \
 857                                                                        \
 858        set_float_rounding_mode(float_round_to_zero, &s);               \
 859        for (i = 0; i < ARRAY_SIZE(r->f); i++) {                        \
 860            if (float32_is_any_nan(b->f[i])) {                          \
 861                r->element[i] = 0;                                      \
 862            } else {                                                    \
 863                float64 t = float32_to_float64(b->f[i], &s);            \
 864                int64_t j;                                              \
 865                                                                        \
 866                t = float64_scalbn(t, uim, &s);                         \
 867                j = float64_to_int64(t, &s);                            \
 868                r->element[i] = satcvt(j, &sat);                        \
 869            }                                                           \
 870        }                                                               \
 871        if (sat) {                                                      \
 872            env->vscr |= (1 << VSCR_SAT);                               \
 873        }                                                               \
 874    }
 875VCT(uxs, cvtsduw, u32)
 876VCT(sxs, cvtsdsw, s32)
 877#undef VCT
 878
 879target_ulong helper_vclzlsbb(ppc_avr_t *r)
 880{
 881    target_ulong count = 0;
 882    int i;
 883    VECTOR_FOR_INORDER_I(i, u8) {
 884        if (r->u8[i] & 0x01) {
 885            break;
 886        }
 887        count++;
 888    }
 889    return count;
 890}
 891
 892target_ulong helper_vctzlsbb(ppc_avr_t *r)
 893{
 894    target_ulong count = 0;
 895    int i;
 896#if defined(HOST_WORDS_BIGENDIAN)
 897    for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
 898#else
 899    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
 900#endif
 901        if (r->u8[i] & 0x01) {
 902            break;
 903        }
 904        count++;
 905    }
 906    return count;
 907}
 908
 909void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
 910                      ppc_avr_t *b, ppc_avr_t *c)
 911{
 912    int sat = 0;
 913    int i;
 914
 915    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
 916        int32_t prod = a->s16[i] * b->s16[i];
 917        int32_t t = (int32_t)c->s16[i] + (prod >> 15);
 918
 919        r->s16[i] = cvtswsh(t, &sat);
 920    }
 921
 922    if (sat) {
 923        env->vscr |= (1 << VSCR_SAT);
 924    }
 925}
 926
 927void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
 928                       ppc_avr_t *b, ppc_avr_t *c)
 929{
 930    int sat = 0;
 931    int i;
 932
 933    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
 934        int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
 935        int32_t t = (int32_t)c->s16[i] + (prod >> 15);
 936        r->s16[i] = cvtswsh(t, &sat);
 937    }
 938
 939    if (sat) {
 940        env->vscr |= (1 << VSCR_SAT);
 941    }
 942}
 943
 944#define VMINMAX_DO(name, compare, element)                              \
 945    void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
 946    {                                                                   \
 947        int i;                                                          \
 948                                                                        \
 949        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
 950            if (a->element[i] compare b->element[i]) {                  \
 951                r->element[i] = b->element[i];                          \
 952            } else {                                                    \
 953                r->element[i] = a->element[i];                          \
 954            }                                                           \
 955        }                                                               \
 956    }
 957#define VMINMAX(suffix, element)                \
 958    VMINMAX_DO(min##suffix, >, element)         \
 959    VMINMAX_DO(max##suffix, <, element)
 960VMINMAX(sb, s8)
 961VMINMAX(sh, s16)
 962VMINMAX(sw, s32)
 963VMINMAX(sd, s64)
 964VMINMAX(ub, u8)
 965VMINMAX(uh, u16)
 966VMINMAX(uw, u32)
 967VMINMAX(ud, u64)
 968#undef VMINMAX_DO
 969#undef VMINMAX
 970
 971void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
 972{
 973    int i;
 974
 975    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
 976        int32_t prod = a->s16[i] * b->s16[i];
 977        r->s16[i] = (int16_t) (prod + c->s16[i]);
 978    }
 979}
 980
 981#define VMRG_DO(name, element, highp)                                   \
 982    void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
 983    {                                                                   \
 984        ppc_avr_t result;                                               \
 985        int i;                                                          \
 986        size_t n_elems = ARRAY_SIZE(r->element);                        \
 987                                                                        \
 988        for (i = 0; i < n_elems / 2; i++) {                             \
 989            if (highp) {                                                \
 990                result.element[i*2+HI_IDX] = a->element[i];             \
 991                result.element[i*2+LO_IDX] = b->element[i];             \
 992            } else {                                                    \
 993                result.element[n_elems - i * 2 - (1 + HI_IDX)] =        \
 994                    b->element[n_elems - i - 1];                        \
 995                result.element[n_elems - i * 2 - (1 + LO_IDX)] =        \
 996                    a->element[n_elems - i - 1];                        \
 997            }                                                           \
 998        }                                                               \
 999        *r = result;                                                    \
1000    }
1001#if defined(HOST_WORDS_BIGENDIAN)
1002#define MRGHI 0
1003#define MRGLO 1
1004#else
1005#define MRGHI 1
1006#define MRGLO 0
1007#endif
1008#define VMRG(suffix, element)                   \
1009    VMRG_DO(mrgl##suffix, element, MRGHI)       \
1010    VMRG_DO(mrgh##suffix, element, MRGLO)
1011VMRG(b, u8)
1012VMRG(h, u16)
1013VMRG(w, u32)
1014#undef VMRG_DO
1015#undef VMRG
1016#undef MRGHI
1017#undef MRGLO
1018
1019void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1020                     ppc_avr_t *b, ppc_avr_t *c)
1021{
1022    int32_t prod[16];
1023    int i;
1024
1025    for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
1026        prod[i] = (int32_t)a->s8[i] * b->u8[i];
1027    }
1028
1029    VECTOR_FOR_INORDER_I(i, s32) {
1030        r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
1031            prod[4 * i + 2] + prod[4 * i + 3];
1032    }
1033}
1034
1035void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1036                     ppc_avr_t *b, ppc_avr_t *c)
1037{
1038    int32_t prod[8];
1039    int i;
1040
1041    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1042        prod[i] = a->s16[i] * b->s16[i];
1043    }
1044
1045    VECTOR_FOR_INORDER_I(i, s32) {
1046        r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1047    }
1048}
1049
1050void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1051                     ppc_avr_t *b, ppc_avr_t *c)
1052{
1053    int32_t prod[8];
1054    int i;
1055    int sat = 0;
1056
1057    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1058        prod[i] = (int32_t)a->s16[i] * b->s16[i];
1059    }
1060
1061    VECTOR_FOR_INORDER_I(i, s32) {
1062        int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1063
1064        r->u32[i] = cvtsdsw(t, &sat);
1065    }
1066
1067    if (sat) {
1068        env->vscr |= (1 << VSCR_SAT);
1069    }
1070}
1071
1072void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1073                     ppc_avr_t *b, ppc_avr_t *c)
1074{
1075    uint16_t prod[16];
1076    int i;
1077
1078    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1079        prod[i] = a->u8[i] * b->u8[i];
1080    }
1081
1082    VECTOR_FOR_INORDER_I(i, u32) {
1083        r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
1084            prod[4 * i + 2] + prod[4 * i + 3];
1085    }
1086}
1087
1088void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1089                     ppc_avr_t *b, ppc_avr_t *c)
1090{
1091    uint32_t prod[8];
1092    int i;
1093
1094    for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1095        prod[i] = a->u16[i] * b->u16[i];
1096    }
1097
1098    VECTOR_FOR_INORDER_I(i, u32) {
1099        r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1100    }
1101}
1102
1103void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1104                     ppc_avr_t *b, ppc_avr_t *c)
1105{
1106    uint32_t prod[8];
1107    int i;
1108    int sat = 0;
1109
1110    for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1111        prod[i] = a->u16[i] * b->u16[i];
1112    }
1113
1114    VECTOR_FOR_INORDER_I(i, s32) {
1115        uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1116
1117        r->u32[i] = cvtuduw(t, &sat);
1118    }
1119
1120    if (sat) {
1121        env->vscr |= (1 << VSCR_SAT);
1122    }
1123}
1124
1125#define VMUL_DO(name, mul_element, prod_element, cast, evenp)           \
1126    void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
1127    {                                                                   \
1128        int i;                                                          \
1129                                                                        \
1130        VECTOR_FOR_INORDER_I(i, prod_element) {                         \
1131            if (evenp) {                                                \
1132                r->prod_element[i] =                                    \
1133                    (cast)a->mul_element[i * 2 + HI_IDX] *              \
1134                    (cast)b->mul_element[i * 2 + HI_IDX];               \
1135            } else {                                                    \
1136                r->prod_element[i] =                                    \
1137                    (cast)a->mul_element[i * 2 + LO_IDX] *              \
1138                    (cast)b->mul_element[i * 2 + LO_IDX];               \
1139            }                                                           \
1140        }                                                               \
1141    }
1142#define VMUL(suffix, mul_element, prod_element, cast)            \
1143    VMUL_DO(mule##suffix, mul_element, prod_element, cast, 1)    \
1144    VMUL_DO(mulo##suffix, mul_element, prod_element, cast, 0)
1145VMUL(sb, s8, s16, int16_t)
1146VMUL(sh, s16, s32, int32_t)
1147VMUL(sw, s32, s64, int64_t)
1148VMUL(ub, u8, u16, uint16_t)
1149VMUL(uh, u16, u32, uint32_t)
1150VMUL(uw, u32, u64, uint64_t)
1151#undef VMUL_DO
1152#undef VMUL
1153
1154void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1155                  ppc_avr_t *c)
1156{
1157    ppc_avr_t result;
1158    int i;
1159
1160    VECTOR_FOR_INORDER_I(i, u8) {
1161        int s = c->u8[i] & 0x1f;
1162#if defined(HOST_WORDS_BIGENDIAN)
1163        int index = s & 0xf;
1164#else
1165        int index = 15 - (s & 0xf);
1166#endif
1167
1168        if (s & 0x10) {
1169            result.u8[i] = b->u8[index];
1170        } else {
1171            result.u8[i] = a->u8[index];
1172        }
1173    }
1174    *r = result;
1175}
1176
1177void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1178                  ppc_avr_t *c)
1179{
1180    ppc_avr_t result;
1181    int i;
1182
1183    VECTOR_FOR_INORDER_I(i, u8) {
1184        int s = c->u8[i] & 0x1f;
1185#if defined(HOST_WORDS_BIGENDIAN)
1186        int index = 15 - (s & 0xf);
1187#else
1188        int index = s & 0xf;
1189#endif
1190
1191        if (s & 0x10) {
1192            result.u8[i] = a->u8[index];
1193        } else {
1194            result.u8[i] = b->u8[index];
1195        }
1196    }
1197    *r = result;
1198}
1199
1200#if defined(HOST_WORDS_BIGENDIAN)
1201#define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1202#define VBPERMD_INDEX(i) (i)
1203#define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1204#define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
1205#else
1206#define VBPERMQ_INDEX(avr, i) ((avr)->u8[15-(i)])
1207#define VBPERMD_INDEX(i) (1 - i)
1208#define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1209#define EXTRACT_BIT(avr, i, index) \
1210        (extract64((avr)->u64[1 - i], 63 - index, 1))
1211#endif
1212
1213void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1214{
1215    int i, j;
1216    ppc_avr_t result = { .u64 = { 0, 0 } };
1217    VECTOR_FOR_INORDER_I(i, u64) {
1218        for (j = 0; j < 8; j++) {
1219            int index = VBPERMQ_INDEX(b, (i * 8) + j);
1220            if (index < 64 && EXTRACT_BIT(a, i, index)) {
1221                result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1222            }
1223        }
1224    }
1225    *r = result;
1226}
1227
1228void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1229{
1230    int i;
1231    uint64_t perm = 0;
1232
1233    VECTOR_FOR_INORDER_I(i, u8) {
1234        int index = VBPERMQ_INDEX(b, i);
1235
1236        if (index < 128) {
1237            uint64_t mask = (1ull << (63-(index & 0x3F)));
1238            if (a->u64[VBPERMQ_DW(index)] & mask) {
1239                perm |= (0x8000 >> i);
1240            }
1241        }
1242    }
1243
1244    r->u64[HI_IDX] = perm;
1245    r->u64[LO_IDX] = 0;
1246}
1247
1248#undef VBPERMQ_INDEX
1249#undef VBPERMQ_DW
1250
1251static const uint64_t VGBBD_MASKS[256] = {
1252    0x0000000000000000ull, /* 00 */
1253    0x0000000000000080ull, /* 01 */
1254    0x0000000000008000ull, /* 02 */
1255    0x0000000000008080ull, /* 03 */
1256    0x0000000000800000ull, /* 04 */
1257    0x0000000000800080ull, /* 05 */
1258    0x0000000000808000ull, /* 06 */
1259    0x0000000000808080ull, /* 07 */
1260    0x0000000080000000ull, /* 08 */
1261    0x0000000080000080ull, /* 09 */
1262    0x0000000080008000ull, /* 0A */
1263    0x0000000080008080ull, /* 0B */
1264    0x0000000080800000ull, /* 0C */
1265    0x0000000080800080ull, /* 0D */
1266    0x0000000080808000ull, /* 0E */
1267    0x0000000080808080ull, /* 0F */
1268    0x0000008000000000ull, /* 10 */
1269    0x0000008000000080ull, /* 11 */
1270    0x0000008000008000ull, /* 12 */
1271    0x0000008000008080ull, /* 13 */
1272    0x0000008000800000ull, /* 14 */
1273    0x0000008000800080ull, /* 15 */
1274    0x0000008000808000ull, /* 16 */
1275    0x0000008000808080ull, /* 17 */
1276    0x0000008080000000ull, /* 18 */
1277    0x0000008080000080ull, /* 19 */
1278    0x0000008080008000ull, /* 1A */
1279    0x0000008080008080ull, /* 1B */
1280    0x0000008080800000ull, /* 1C */
1281    0x0000008080800080ull, /* 1D */
1282    0x0000008080808000ull, /* 1E */
1283    0x0000008080808080ull, /* 1F */
1284    0x0000800000000000ull, /* 20 */
1285    0x0000800000000080ull, /* 21 */
1286    0x0000800000008000ull, /* 22 */
1287    0x0000800000008080ull, /* 23 */
1288    0x0000800000800000ull, /* 24 */
1289    0x0000800000800080ull, /* 25 */
1290    0x0000800000808000ull, /* 26 */
1291    0x0000800000808080ull, /* 27 */
1292    0x0000800080000000ull, /* 28 */
1293    0x0000800080000080ull, /* 29 */
1294    0x0000800080008000ull, /* 2A */
1295    0x0000800080008080ull, /* 2B */
1296    0x0000800080800000ull, /* 2C */
1297    0x0000800080800080ull, /* 2D */
1298    0x0000800080808000ull, /* 2E */
1299    0x0000800080808080ull, /* 2F */
1300    0x0000808000000000ull, /* 30 */
1301    0x0000808000000080ull, /* 31 */
1302    0x0000808000008000ull, /* 32 */
1303    0x0000808000008080ull, /* 33 */
1304    0x0000808000800000ull, /* 34 */
1305    0x0000808000800080ull, /* 35 */
1306    0x0000808000808000ull, /* 36 */
1307    0x0000808000808080ull, /* 37 */
1308    0x0000808080000000ull, /* 38 */
1309    0x0000808080000080ull, /* 39 */
1310    0x0000808080008000ull, /* 3A */
1311    0x0000808080008080ull, /* 3B */
1312    0x0000808080800000ull, /* 3C */
1313    0x0000808080800080ull, /* 3D */
1314    0x0000808080808000ull, /* 3E */
1315    0x0000808080808080ull, /* 3F */
1316    0x0080000000000000ull, /* 40 */
1317    0x0080000000000080ull, /* 41 */
1318    0x0080000000008000ull, /* 42 */
1319    0x0080000000008080ull, /* 43 */
1320    0x0080000000800000ull, /* 44 */
1321    0x0080000000800080ull, /* 45 */
1322    0x0080000000808000ull, /* 46 */
1323    0x0080000000808080ull, /* 47 */
1324    0x0080000080000000ull, /* 48 */
1325    0x0080000080000080ull, /* 49 */
1326    0x0080000080008000ull, /* 4A */
1327    0x0080000080008080ull, /* 4B */
1328    0x0080000080800000ull, /* 4C */
1329    0x0080000080800080ull, /* 4D */
1330    0x0080000080808000ull, /* 4E */
1331    0x0080000080808080ull, /* 4F */
1332    0x0080008000000000ull, /* 50 */
1333    0x0080008000000080ull, /* 51 */
1334    0x0080008000008000ull, /* 52 */
1335    0x0080008000008080ull, /* 53 */
1336    0x0080008000800000ull, /* 54 */
1337    0x0080008000800080ull, /* 55 */
1338    0x0080008000808000ull, /* 56 */
1339    0x0080008000808080ull, /* 57 */
1340    0x0080008080000000ull, /* 58 */
1341    0x0080008080000080ull, /* 59 */
1342    0x0080008080008000ull, /* 5A */
1343    0x0080008080008080ull, /* 5B */
1344    0x0080008080800000ull, /* 5C */
1345    0x0080008080800080ull, /* 5D */
1346    0x0080008080808000ull, /* 5E */
1347    0x0080008080808080ull, /* 5F */
1348    0x0080800000000000ull, /* 60 */
1349    0x0080800000000080ull, /* 61 */
1350    0x0080800000008000ull, /* 62 */
1351    0x0080800000008080ull, /* 63 */
1352    0x0080800000800000ull, /* 64 */
1353    0x0080800000800080ull, /* 65 */
1354    0x0080800000808000ull, /* 66 */
1355    0x0080800000808080ull, /* 67 */
1356    0x0080800080000000ull, /* 68 */
1357    0x0080800080000080ull, /* 69 */
1358    0x0080800080008000ull, /* 6A */
1359    0x0080800080008080ull, /* 6B */
1360    0x0080800080800000ull, /* 6C */
1361    0x0080800080800080ull, /* 6D */
1362    0x0080800080808000ull, /* 6E */
1363    0x0080800080808080ull, /* 6F */
1364    0x0080808000000000ull, /* 70 */
1365    0x0080808000000080ull, /* 71 */
1366    0x0080808000008000ull, /* 72 */
1367    0x0080808000008080ull, /* 73 */
1368    0x0080808000800000ull, /* 74 */
1369    0x0080808000800080ull, /* 75 */
1370    0x0080808000808000ull, /* 76 */
1371    0x0080808000808080ull, /* 77 */
1372    0x0080808080000000ull, /* 78 */
1373    0x0080808080000080ull, /* 79 */
1374    0x0080808080008000ull, /* 7A */
1375    0x0080808080008080ull, /* 7B */
1376    0x0080808080800000ull, /* 7C */
1377    0x0080808080800080ull, /* 7D */
1378    0x0080808080808000ull, /* 7E */
1379    0x0080808080808080ull, /* 7F */
1380    0x8000000000000000ull, /* 80 */
1381    0x8000000000000080ull, /* 81 */
1382    0x8000000000008000ull, /* 82 */
1383    0x8000000000008080ull, /* 83 */
1384    0x8000000000800000ull, /* 84 */
1385    0x8000000000800080ull, /* 85 */
1386    0x8000000000808000ull, /* 86 */
1387    0x8000000000808080ull, /* 87 */
1388    0x8000000080000000ull, /* 88 */
1389    0x8000000080000080ull, /* 89 */
1390    0x8000000080008000ull, /* 8A */
1391    0x8000000080008080ull, /* 8B */
1392    0x8000000080800000ull, /* 8C */
1393    0x8000000080800080ull, /* 8D */
1394    0x8000000080808000ull, /* 8E */
1395    0x8000000080808080ull, /* 8F */
1396    0x8000008000000000ull, /* 90 */
1397    0x8000008000000080ull, /* 91 */
1398    0x8000008000008000ull, /* 92 */
1399    0x8000008000008080ull, /* 93 */
1400    0x8000008000800000ull, /* 94 */
1401    0x8000008000800080ull, /* 95 */
1402    0x8000008000808000ull, /* 96 */
1403    0x8000008000808080ull, /* 97 */
1404    0x8000008080000000ull, /* 98 */
1405    0x8000008080000080ull, /* 99 */
1406    0x8000008080008000ull, /* 9A */
1407    0x8000008080008080ull, /* 9B */
1408    0x8000008080800000ull, /* 9C */
1409    0x8000008080800080ull, /* 9D */
1410    0x8000008080808000ull, /* 9E */
1411    0x8000008080808080ull, /* 9F */
1412    0x8000800000000000ull, /* A0 */
1413    0x8000800000000080ull, /* A1 */
1414    0x8000800000008000ull, /* A2 */
1415    0x8000800000008080ull, /* A3 */
1416    0x8000800000800000ull, /* A4 */
1417    0x8000800000800080ull, /* A5 */
1418    0x8000800000808000ull, /* A6 */
1419    0x8000800000808080ull, /* A7 */
1420    0x8000800080000000ull, /* A8 */
1421    0x8000800080000080ull, /* A9 */
1422    0x8000800080008000ull, /* AA */
1423    0x8000800080008080ull, /* AB */
1424    0x8000800080800000ull, /* AC */
1425    0x8000800080800080ull, /* AD */
1426    0x8000800080808000ull, /* AE */
1427    0x8000800080808080ull, /* AF */
1428    0x8000808000000000ull, /* B0 */
1429    0x8000808000000080ull, /* B1 */
1430    0x8000808000008000ull, /* B2 */
1431    0x8000808000008080ull, /* B3 */
1432    0x8000808000800000ull, /* B4 */
1433    0x8000808000800080ull, /* B5 */
1434    0x8000808000808000ull, /* B6 */
1435    0x8000808000808080ull, /* B7 */
1436    0x8000808080000000ull, /* B8 */
1437    0x8000808080000080ull, /* B9 */
1438    0x8000808080008000ull, /* BA */
1439    0x8000808080008080ull, /* BB */
1440    0x8000808080800000ull, /* BC */
1441    0x8000808080800080ull, /* BD */
1442    0x8000808080808000ull, /* BE */
1443    0x8000808080808080ull, /* BF */
1444    0x8080000000000000ull, /* C0 */
1445    0x8080000000000080ull, /* C1 */
1446    0x8080000000008000ull, /* C2 */
1447    0x8080000000008080ull, /* C3 */
1448    0x8080000000800000ull, /* C4 */
1449    0x8080000000800080ull, /* C5 */
1450    0x8080000000808000ull, /* C6 */
1451    0x8080000000808080ull, /* C7 */
1452    0x8080000080000000ull, /* C8 */
1453    0x8080000080000080ull, /* C9 */
1454    0x8080000080008000ull, /* CA */
1455    0x8080000080008080ull, /* CB */
1456    0x8080000080800000ull, /* CC */
1457    0x8080000080800080ull, /* CD */
1458    0x8080000080808000ull, /* CE */
1459    0x8080000080808080ull, /* CF */
1460    0x8080008000000000ull, /* D0 */
1461    0x8080008000000080ull, /* D1 */
1462    0x8080008000008000ull, /* D2 */
1463    0x8080008000008080ull, /* D3 */
1464    0x8080008000800000ull, /* D4 */
1465    0x8080008000800080ull, /* D5 */
1466    0x8080008000808000ull, /* D6 */
1467    0x8080008000808080ull, /* D7 */
1468    0x8080008080000000ull, /* D8 */
1469    0x8080008080000080ull, /* D9 */
1470    0x8080008080008000ull, /* DA */
1471    0x8080008080008080ull, /* DB */
1472    0x8080008080800000ull, /* DC */
1473    0x8080008080800080ull, /* DD */
1474    0x8080008080808000ull, /* DE */
1475    0x8080008080808080ull, /* DF */
1476    0x8080800000000000ull, /* E0 */
1477    0x8080800000000080ull, /* E1 */
1478    0x8080800000008000ull, /* E2 */
1479    0x8080800000008080ull, /* E3 */
1480    0x8080800000800000ull, /* E4 */
1481    0x8080800000800080ull, /* E5 */
1482    0x8080800000808000ull, /* E6 */
1483    0x8080800000808080ull, /* E7 */
1484    0x8080800080000000ull, /* E8 */
1485    0x8080800080000080ull, /* E9 */
1486    0x8080800080008000ull, /* EA */
1487    0x8080800080008080ull, /* EB */
1488    0x8080800080800000ull, /* EC */
1489    0x8080800080800080ull, /* ED */
1490    0x8080800080808000ull, /* EE */
1491    0x8080800080808080ull, /* EF */
1492    0x8080808000000000ull, /* F0 */
1493    0x8080808000000080ull, /* F1 */
1494    0x8080808000008000ull, /* F2 */
1495    0x8080808000008080ull, /* F3 */
1496    0x8080808000800000ull, /* F4 */
1497    0x8080808000800080ull, /* F5 */
1498    0x8080808000808000ull, /* F6 */
1499    0x8080808000808080ull, /* F7 */
1500    0x8080808080000000ull, /* F8 */
1501    0x8080808080000080ull, /* F9 */
1502    0x8080808080008000ull, /* FA */
1503    0x8080808080008080ull, /* FB */
1504    0x8080808080800000ull, /* FC */
1505    0x8080808080800080ull, /* FD */
1506    0x8080808080808000ull, /* FE */
1507    0x8080808080808080ull, /* FF */
1508};
1509
1510void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b)
1511{
1512    int i;
1513    uint64_t t[2] = { 0, 0 };
1514
1515    VECTOR_FOR_INORDER_I(i, u8) {
1516#if defined(HOST_WORDS_BIGENDIAN)
1517        t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7);
1518#else
1519        t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (7-(i & 7));
1520#endif
1521    }
1522
1523    r->u64[0] = t[0];
1524    r->u64[1] = t[1];
1525}
1526
1527#define PMSUM(name, srcfld, trgfld, trgtyp)                   \
1528void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)  \
1529{                                                             \
1530    int i, j;                                                 \
1531    trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])];      \
1532                                                              \
1533    VECTOR_FOR_INORDER_I(i, srcfld) {                         \
1534        prod[i] = 0;                                          \
1535        for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) {      \
1536            if (a->srcfld[i] & (1ull<<j)) {                   \
1537                prod[i] ^= ((trgtyp)b->srcfld[i] << j);       \
1538            }                                                 \
1539        }                                                     \
1540    }                                                         \
1541                                                              \
1542    VECTOR_FOR_INORDER_I(i, trgfld) {                         \
1543        r->trgfld[i] = prod[2*i] ^ prod[2*i+1];               \
1544    }                                                         \
1545}
1546
1547PMSUM(vpmsumb, u8, u16, uint16_t)
1548PMSUM(vpmsumh, u16, u32, uint32_t)
1549PMSUM(vpmsumw, u32, u64, uint64_t)
1550
1551void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1552{
1553
1554#ifdef CONFIG_INT128
1555    int i, j;
1556    __uint128_t prod[2];
1557
1558    VECTOR_FOR_INORDER_I(i, u64) {
1559        prod[i] = 0;
1560        for (j = 0; j < 64; j++) {
1561            if (a->u64[i] & (1ull<<j)) {
1562                prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1563            }
1564        }
1565    }
1566
1567    r->u128 = prod[0] ^ prod[1];
1568
1569#else
1570    int i, j;
1571    ppc_avr_t prod[2];
1572
1573    VECTOR_FOR_INORDER_I(i, u64) {
1574        prod[i].u64[LO_IDX] = prod[i].u64[HI_IDX] = 0;
1575        for (j = 0; j < 64; j++) {
1576            if (a->u64[i] & (1ull<<j)) {
1577                ppc_avr_t bshift;
1578                if (j == 0) {
1579                    bshift.u64[HI_IDX] = 0;
1580                    bshift.u64[LO_IDX] = b->u64[i];
1581                } else {
1582                    bshift.u64[HI_IDX] = b->u64[i] >> (64-j);
1583                    bshift.u64[LO_IDX] = b->u64[i] << j;
1584                }
1585                prod[i].u64[LO_IDX] ^= bshift.u64[LO_IDX];
1586                prod[i].u64[HI_IDX] ^= bshift.u64[HI_IDX];
1587            }
1588        }
1589    }
1590
1591    r->u64[LO_IDX] = prod[0].u64[LO_IDX] ^ prod[1].u64[LO_IDX];
1592    r->u64[HI_IDX] = prod[0].u64[HI_IDX] ^ prod[1].u64[HI_IDX];
1593#endif
1594}
1595
1596
1597#if defined(HOST_WORDS_BIGENDIAN)
1598#define PKBIG 1
1599#else
1600#define PKBIG 0
1601#endif
1602void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1603{
1604    int i, j;
1605    ppc_avr_t result;
1606#if defined(HOST_WORDS_BIGENDIAN)
1607    const ppc_avr_t *x[2] = { a, b };
1608#else
1609    const ppc_avr_t *x[2] = { b, a };
1610#endif
1611
1612    VECTOR_FOR_INORDER_I(i, u64) {
1613        VECTOR_FOR_INORDER_I(j, u32) {
1614            uint32_t e = x[i]->u32[j];
1615
1616            result.u16[4*i+j] = (((e >> 9) & 0xfc00) |
1617                                 ((e >> 6) & 0x3e0) |
1618                                 ((e >> 3) & 0x1f));
1619        }
1620    }
1621    *r = result;
1622}
1623
1624#define VPK(suffix, from, to, cvt, dosat)                               \
1625    void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r,             \
1626                            ppc_avr_t *a, ppc_avr_t *b)                 \
1627    {                                                                   \
1628        int i;                                                          \
1629        int sat = 0;                                                    \
1630        ppc_avr_t result;                                               \
1631        ppc_avr_t *a0 = PKBIG ? a : b;                                  \
1632        ppc_avr_t *a1 = PKBIG ? b : a;                                  \
1633                                                                        \
1634        VECTOR_FOR_INORDER_I(i, from) {                                 \
1635            result.to[i] = cvt(a0->from[i], &sat);                      \
1636            result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);  \
1637        }                                                               \
1638        *r = result;                                                    \
1639        if (dosat && sat) {                                             \
1640            env->vscr |= (1 << VSCR_SAT);                               \
1641        }                                                               \
1642    }
1643#define I(x, y) (x)
1644VPK(shss, s16, s8, cvtshsb, 1)
1645VPK(shus, s16, u8, cvtshub, 1)
1646VPK(swss, s32, s16, cvtswsh, 1)
1647VPK(swus, s32, u16, cvtswuh, 1)
1648VPK(sdss, s64, s32, cvtsdsw, 1)
1649VPK(sdus, s64, u32, cvtsduw, 1)
1650VPK(uhus, u16, u8, cvtuhub, 1)
1651VPK(uwus, u32, u16, cvtuwuh, 1)
1652VPK(udus, u64, u32, cvtuduw, 1)
1653VPK(uhum, u16, u8, I, 0)
1654VPK(uwum, u32, u16, I, 0)
1655VPK(udum, u64, u32, I, 0)
1656#undef I
1657#undef VPK
1658#undef PKBIG
1659
1660void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1661{
1662    int i;
1663
1664    for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1665        r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status);
1666    }
1667}
1668
1669#define VRFI(suffix, rounding)                                  \
1670    void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r,    \
1671                             ppc_avr_t *b)                      \
1672    {                                                           \
1673        int i;                                                  \
1674        float_status s = env->vec_status;                       \
1675                                                                \
1676        set_float_rounding_mode(rounding, &s);                  \
1677        for (i = 0; i < ARRAY_SIZE(r->f); i++) {                \
1678            r->f[i] = float32_round_to_int (b->f[i], &s);       \
1679        }                                                       \
1680    }
1681VRFI(n, float_round_nearest_even)
1682VRFI(m, float_round_down)
1683VRFI(p, float_round_up)
1684VRFI(z, float_round_to_zero)
1685#undef VRFI
1686
1687#define VROTATE(suffix, element, mask)                                  \
1688    void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)   \
1689    {                                                                   \
1690        int i;                                                          \
1691                                                                        \
1692        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
1693            unsigned int shift = b->element[i] & mask;                  \
1694            r->element[i] = (a->element[i] << shift) |                  \
1695                (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1696        }                                                               \
1697    }
1698VROTATE(b, u8, 0x7)
1699VROTATE(h, u16, 0xF)
1700VROTATE(w, u32, 0x1F)
1701VROTATE(d, u64, 0x3F)
1702#undef VROTATE
1703
1704void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1705{
1706    int i;
1707
1708    for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1709        float32 t = float32_sqrt(b->f[i], &env->vec_status);
1710
1711        r->f[i] = float32_div(float32_one, t, &env->vec_status);
1712    }
1713}
1714
1715#define VRLMI(name, size, element, insert)                            \
1716void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)          \
1717{                                                                     \
1718    int i;                                                            \
1719    for (i = 0; i < ARRAY_SIZE(r->element); i++) {                    \
1720        uint##size##_t src1 = a->element[i];                          \
1721        uint##size##_t src2 = b->element[i];                          \
1722        uint##size##_t src3 = r->element[i];                          \
1723        uint##size##_t begin, end, shift, mask, rot_val;              \
1724                                                                      \
1725        shift = extract##size(src2, 0, 6);                            \
1726        end   = extract##size(src2, 8, 6);                            \
1727        begin = extract##size(src2, 16, 6);                           \
1728        rot_val = rol##size(src1, shift);                             \
1729        mask = mask_u##size(begin, end);                              \
1730        if (insert) {                                                 \
1731            r->element[i] = (rot_val & mask) | (src3 & ~mask);        \
1732        } else {                                                      \
1733            r->element[i] = (rot_val & mask);                         \
1734        }                                                             \
1735    }                                                                 \
1736}
1737
1738VRLMI(vrldmi, 64, u64, 1);
1739VRLMI(vrlwmi, 32, u32, 1);
1740VRLMI(vrldnm, 64, u64, 0);
1741VRLMI(vrlwnm, 32, u32, 0);
1742
1743void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1744                 ppc_avr_t *c)
1745{
1746    r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1747    r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1748}
1749
1750void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1751{
1752    int i;
1753
1754    for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1755        r->f[i] = float32_exp2(b->f[i], &env->vec_status);
1756    }
1757}
1758
1759void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1760{
1761    int i;
1762
1763    for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1764        r->f[i] = float32_log2(b->f[i], &env->vec_status);
1765    }
1766}
1767
1768#if defined(HOST_WORDS_BIGENDIAN)
1769#define VEXTU_X_DO(name, size, left)                                \
1770    target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b)  \
1771    {                                                               \
1772        int index;                                                  \
1773        if (left) {                                                 \
1774            index = (a & 0xf) * 8;                                  \
1775        } else {                                                    \
1776            index = ((15 - (a & 0xf) + 1) * 8) - size;              \
1777        }                                                           \
1778        return int128_getlo(int128_rshift(b->s128, index)) &        \
1779            MAKE_64BIT_MASK(0, size);                               \
1780    }
1781#else
1782#define VEXTU_X_DO(name, size, left)                                \
1783    target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b)  \
1784    {                                                               \
1785        int index;                                                  \
1786        if (left) {                                                 \
1787            index = ((15 - (a & 0xf) + 1) * 8) - size;              \
1788        } else {                                                    \
1789            index = (a & 0xf) * 8;                                  \
1790        }                                                           \
1791        return int128_getlo(int128_rshift(b->s128, index)) &        \
1792            MAKE_64BIT_MASK(0, size);                               \
1793    }
1794#endif
1795
1796VEXTU_X_DO(vextublx,  8, 1)
1797VEXTU_X_DO(vextuhlx, 16, 1)
1798VEXTU_X_DO(vextuwlx, 32, 1)
1799VEXTU_X_DO(vextubrx,  8, 0)
1800VEXTU_X_DO(vextuhrx, 16, 0)
1801VEXTU_X_DO(vextuwrx, 32, 0)
1802#undef VEXTU_X_DO
1803
1804/* The specification says that the results are undefined if all of the
1805 * shift counts are not identical.  We check to make sure that they are
1806 * to conform to what real hardware appears to do.  */
1807#define VSHIFT(suffix, leftp)                                           \
1808    void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)    \
1809    {                                                                   \
1810        int shift = b->u8[LO_IDX*15] & 0x7;                             \
1811        int doit = 1;                                                   \
1812        int i;                                                          \
1813                                                                        \
1814        for (i = 0; i < ARRAY_SIZE(r->u8); i++) {                       \
1815            doit = doit && ((b->u8[i] & 0x7) == shift);                 \
1816        }                                                               \
1817        if (doit) {                                                     \
1818            if (shift == 0) {                                           \
1819                *r = *a;                                                \
1820            } else if (leftp) {                                         \
1821                uint64_t carry = a->u64[LO_IDX] >> (64 - shift);        \
1822                                                                        \
1823                r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry;     \
1824                r->u64[LO_IDX] = a->u64[LO_IDX] << shift;               \
1825            } else {                                                    \
1826                uint64_t carry = a->u64[HI_IDX] << (64 - shift);        \
1827                                                                        \
1828                r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry;     \
1829                r->u64[HI_IDX] = a->u64[HI_IDX] >> shift;               \
1830            }                                                           \
1831        }                                                               \
1832    }
1833VSHIFT(l, 1)
1834VSHIFT(r, 0)
1835#undef VSHIFT
1836
1837#define VSL(suffix, element, mask)                                      \
1838    void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)   \
1839    {                                                                   \
1840        int i;                                                          \
1841                                                                        \
1842        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
1843            unsigned int shift = b->element[i] & mask;                  \
1844                                                                        \
1845            r->element[i] = a->element[i] << shift;                     \
1846        }                                                               \
1847    }
1848VSL(b, u8, 0x7)
1849VSL(h, u16, 0x0F)
1850VSL(w, u32, 0x1F)
1851VSL(d, u64, 0x3F)
1852#undef VSL
1853
1854void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1855{
1856    int i;
1857    unsigned int shift, bytes, size;
1858
1859    size = ARRAY_SIZE(r->u8);
1860    for (i = 0; i < size; i++) {
1861        shift = b->u8[i] & 0x7;             /* extract shift value */
1862        bytes = (a->u8[i] << 8) +             /* extract adjacent bytes */
1863            (((i + 1) < size) ? a->u8[i + 1] : 0);
1864        r->u8[i] = (bytes << shift) >> 8;   /* shift and store result */
1865    }
1866}
1867
1868void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1869{
1870    int i;
1871    unsigned int shift, bytes;
1872
1873    /* Use reverse order, as destination and source register can be same. Its
1874     * being modified in place saving temporary, reverse order will guarantee
1875     * that computed result is not fed back.
1876     */
1877    for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1878        shift = b->u8[i] & 0x7;                 /* extract shift value */
1879        bytes = ((i ? a->u8[i - 1] : 0) << 8) + a->u8[i];
1880                                                /* extract adjacent bytes */
1881        r->u8[i] = (bytes >> shift) & 0xFF;     /* shift and store result */
1882    }
1883}
1884
1885void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1886{
1887    int sh = shift & 0xf;
1888    int i;
1889    ppc_avr_t result;
1890
1891#if defined(HOST_WORDS_BIGENDIAN)
1892    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1893        int index = sh + i;
1894        if (index > 0xf) {
1895            result.u8[i] = b->u8[index - 0x10];
1896        } else {
1897            result.u8[i] = a->u8[index];
1898        }
1899    }
1900#else
1901    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1902        int index = (16 - sh) + i;
1903        if (index > 0xf) {
1904            result.u8[i] = a->u8[index - 0x10];
1905        } else {
1906            result.u8[i] = b->u8[index];
1907        }
1908    }
1909#endif
1910    *r = result;
1911}
1912
1913void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1914{
1915    int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf;
1916
1917#if defined(HOST_WORDS_BIGENDIAN)
1918    memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1919    memset(&r->u8[16-sh], 0, sh);
1920#else
1921    memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1922    memset(&r->u8[0], 0, sh);
1923#endif
1924}
1925
1926/* Experimental testing shows that hardware masks the immediate.  */
1927#define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1))
1928#if defined(HOST_WORDS_BIGENDIAN)
1929#define SPLAT_ELEMENT(element) _SPLAT_MASKED(element)
1930#else
1931#define SPLAT_ELEMENT(element)                                  \
1932    (ARRAY_SIZE(r->element) - 1 - _SPLAT_MASKED(element))
1933#endif
1934#define VSPLT(suffix, element)                                          \
1935    void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \
1936    {                                                                   \
1937        uint32_t s = b->element[SPLAT_ELEMENT(element)];                \
1938        int i;                                                          \
1939                                                                        \
1940        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
1941            r->element[i] = s;                                          \
1942        }                                                               \
1943    }
1944VSPLT(b, u8)
1945VSPLT(h, u16)
1946VSPLT(w, u32)
1947#undef VSPLT
1948#undef SPLAT_ELEMENT
1949#undef _SPLAT_MASKED
1950#if defined(HOST_WORDS_BIGENDIAN)
1951#define VINSERT(suffix, element)                                            \
1952    void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1953    {                                                                       \
1954        memmove(&r->u8[index], &b->u8[8 - sizeof(r->element[0])],           \
1955               sizeof(r->element[0]));                                      \
1956    }
1957#else
1958#define VINSERT(suffix, element)                                            \
1959    void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1960    {                                                                       \
1961        uint32_t d = (16 - index) - sizeof(r->element[0]);                  \
1962        memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0]));               \
1963    }
1964#endif
1965VINSERT(b, u8)
1966VINSERT(h, u16)
1967VINSERT(w, u32)
1968VINSERT(d, u64)
1969#undef VINSERT
1970#if defined(HOST_WORDS_BIGENDIAN)
1971#define VEXTRACT(suffix, element)                                            \
1972    void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1973    {                                                                        \
1974        uint32_t es = sizeof(r->element[0]);                                 \
1975        memmove(&r->u8[8 - es], &b->u8[index], es);                          \
1976        memset(&r->u8[8], 0, 8);                                             \
1977        memset(&r->u8[0], 0, 8 - es);                                        \
1978    }
1979#else
1980#define VEXTRACT(suffix, element)                                            \
1981    void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1982    {                                                                        \
1983        uint32_t es = sizeof(r->element[0]);                                 \
1984        uint32_t s = (16 - index) - es;                                      \
1985        memmove(&r->u8[8], &b->u8[s], es);                                   \
1986        memset(&r->u8[0], 0, 8);                                             \
1987        memset(&r->u8[8 + es], 0, 8 - es);                                   \
1988    }
1989#endif
1990VEXTRACT(ub, u8)
1991VEXTRACT(uh, u16)
1992VEXTRACT(uw, u32)
1993VEXTRACT(d, u64)
1994#undef VEXTRACT
1995
1996void helper_xxextractuw(CPUPPCState *env, target_ulong xtn,
1997                        target_ulong xbn, uint32_t index)
1998{
1999    ppc_vsr_t xt, xb;
2000    size_t es = sizeof(uint32_t);
2001    uint32_t ext_index;
2002    int i;
2003
2004    getVSR(xbn, &xb, env);
2005    memset(&xt, 0, sizeof(xt));
2006
2007#if defined(HOST_WORDS_BIGENDIAN)
2008    ext_index = index;
2009    for (i = 0; i < es; i++, ext_index++) {
2010        xt.u8[8 - es + i] = xb.u8[ext_index % 16];
2011    }
2012#else
2013    ext_index = 15 - index;
2014    for (i = es - 1; i >= 0; i--, ext_index--) {
2015        xt.u8[8 + i] = xb.u8[ext_index % 16];
2016    }
2017#endif
2018
2019    putVSR(xtn, &xt, env);
2020}
2021
2022void helper_xxinsertw(CPUPPCState *env, target_ulong xtn,
2023                      target_ulong xbn, uint32_t index)
2024{
2025    ppc_vsr_t xt, xb;
2026    size_t es = sizeof(uint32_t);
2027    int ins_index, i = 0;
2028
2029    getVSR(xbn, &xb, env);
2030    getVSR(xtn, &xt, env);
2031
2032#if defined(HOST_WORDS_BIGENDIAN)
2033    ins_index = index;
2034    for (i = 0; i < es && ins_index < 16; i++, ins_index++) {
2035        xt.u8[ins_index] = xb.u8[8 - es + i];
2036    }
2037#else
2038    ins_index = 15 - index;
2039    for (i = es - 1; i >= 0 && ins_index >= 0; i--, ins_index--) {
2040        xt.u8[ins_index] = xb.u8[8 + i];
2041    }
2042#endif
2043
2044    putVSR(xtn, &xt, env);
2045}
2046
2047#define VEXT_SIGNED(name, element, mask, cast, recast)              \
2048void helper_##name(ppc_avr_t *r, ppc_avr_t *b)                      \
2049{                                                                   \
2050    int i;                                                          \
2051    VECTOR_FOR_INORDER_I(i, element) {                              \
2052        r->element[i] = (recast)((cast)(b->element[i] & mask));     \
2053    }                                                               \
2054}
2055VEXT_SIGNED(vextsb2w, s32, UINT8_MAX, int8_t, int32_t)
2056VEXT_SIGNED(vextsb2d, s64, UINT8_MAX, int8_t, int64_t)
2057VEXT_SIGNED(vextsh2w, s32, UINT16_MAX, int16_t, int32_t)
2058VEXT_SIGNED(vextsh2d, s64, UINT16_MAX, int16_t, int64_t)
2059VEXT_SIGNED(vextsw2d, s64, UINT32_MAX, int32_t, int64_t)
2060#undef VEXT_SIGNED
2061
2062#define VNEG(name, element)                                         \
2063void helper_##name(ppc_avr_t *r, ppc_avr_t *b)                      \
2064{                                                                   \
2065    int i;                                                          \
2066    VECTOR_FOR_INORDER_I(i, element) {                              \
2067        r->element[i] = -b->element[i];                             \
2068    }                                                               \
2069}
2070VNEG(vnegw, s32)
2071VNEG(vnegd, s64)
2072#undef VNEG
2073
2074#define VSPLTI(suffix, element, splat_type)                     \
2075    void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat)   \
2076    {                                                           \
2077        splat_type x = (int8_t)(splat << 3) >> 3;               \
2078        int i;                                                  \
2079                                                                \
2080        for (i = 0; i < ARRAY_SIZE(r->element); i++) {          \
2081            r->element[i] = x;                                  \
2082        }                                                       \
2083    }
2084VSPLTI(b, s8, int8_t)
2085VSPLTI(h, s16, int16_t)
2086VSPLTI(w, s32, int32_t)
2087#undef VSPLTI
2088
2089#define VSR(suffix, element, mask)                                      \
2090    void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)   \
2091    {                                                                   \
2092        int i;                                                          \
2093                                                                        \
2094        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
2095            unsigned int shift = b->element[i] & mask;                  \
2096            r->element[i] = a->element[i] >> shift;                     \
2097        }                                                               \
2098    }
2099VSR(ab, s8, 0x7)
2100VSR(ah, s16, 0xF)
2101VSR(aw, s32, 0x1F)
2102VSR(ad, s64, 0x3F)
2103VSR(b, u8, 0x7)
2104VSR(h, u16, 0xF)
2105VSR(w, u32, 0x1F)
2106VSR(d, u64, 0x3F)
2107#undef VSR
2108
2109void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2110{
2111    int sh = (b->u8[LO_IDX * 0xf] >> 3) & 0xf;
2112
2113#if defined(HOST_WORDS_BIGENDIAN)
2114    memmove(&r->u8[sh], &a->u8[0], 16 - sh);
2115    memset(&r->u8[0], 0, sh);
2116#else
2117    memmove(&r->u8[0], &a->u8[sh], 16 - sh);
2118    memset(&r->u8[16 - sh], 0, sh);
2119#endif
2120}
2121
2122void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2123{
2124    int i;
2125
2126    for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2127        r->u32[i] = a->u32[i] >= b->u32[i];
2128    }
2129}
2130
2131void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2132{
2133    int64_t t;
2134    int i, upper;
2135    ppc_avr_t result;
2136    int sat = 0;
2137
2138#if defined(HOST_WORDS_BIGENDIAN)
2139    upper = ARRAY_SIZE(r->s32)-1;
2140#else
2141    upper = 0;
2142#endif
2143    t = (int64_t)b->s32[upper];
2144    for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2145        t += a->s32[i];
2146        result.s32[i] = 0;
2147    }
2148    result.s32[upper] = cvtsdsw(t, &sat);
2149    *r = result;
2150
2151    if (sat) {
2152        env->vscr |= (1 << VSCR_SAT);
2153    }
2154}
2155
2156void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2157{
2158    int i, j, upper;
2159    ppc_avr_t result;
2160    int sat = 0;
2161
2162#if defined(HOST_WORDS_BIGENDIAN)
2163    upper = 1;
2164#else
2165    upper = 0;
2166#endif
2167    for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
2168        int64_t t = (int64_t)b->s32[upper + i * 2];
2169
2170        result.u64[i] = 0;
2171        for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
2172            t += a->s32[2 * i + j];
2173        }
2174        result.s32[upper + i * 2] = cvtsdsw(t, &sat);
2175    }
2176
2177    *r = result;
2178    if (sat) {
2179        env->vscr |= (1 << VSCR_SAT);
2180    }
2181}
2182
2183void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2184{
2185    int i, j;
2186    int sat = 0;
2187
2188    for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2189        int64_t t = (int64_t)b->s32[i];
2190
2191        for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
2192            t += a->s8[4 * i + j];
2193        }
2194        r->s32[i] = cvtsdsw(t, &sat);
2195    }
2196
2197    if (sat) {
2198        env->vscr |= (1 << VSCR_SAT);
2199    }
2200}
2201
2202void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2203{
2204    int sat = 0;
2205    int i;
2206
2207    for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2208        int64_t t = (int64_t)b->s32[i];
2209
2210        t += a->s16[2 * i] + a->s16[2 * i + 1];
2211        r->s32[i] = cvtsdsw(t, &sat);
2212    }
2213
2214    if (sat) {
2215        env->vscr |= (1 << VSCR_SAT);
2216    }
2217}
2218
2219void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2220{
2221    int i, j;
2222    int sat = 0;
2223
2224    for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2225        uint64_t t = (uint64_t)b->u32[i];
2226
2227        for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
2228            t += a->u8[4 * i + j];
2229        }
2230        r->u32[i] = cvtuduw(t, &sat);
2231    }
2232
2233    if (sat) {
2234        env->vscr |= (1 << VSCR_SAT);
2235    }
2236}
2237
2238#if defined(HOST_WORDS_BIGENDIAN)
2239#define UPKHI 1
2240#define UPKLO 0
2241#else
2242#define UPKHI 0
2243#define UPKLO 1
2244#endif
2245#define VUPKPX(suffix, hi)                                              \
2246    void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b)                \
2247    {                                                                   \
2248        int i;                                                          \
2249        ppc_avr_t result;                                               \
2250                                                                        \
2251        for (i = 0; i < ARRAY_SIZE(r->u32); i++) {                      \
2252            uint16_t e = b->u16[hi ? i : i+4];                          \
2253            uint8_t a = (e >> 15) ? 0xff : 0;                           \
2254            uint8_t r = (e >> 10) & 0x1f;                               \
2255            uint8_t g = (e >> 5) & 0x1f;                                \
2256            uint8_t b = e & 0x1f;                                       \
2257                                                                        \
2258            result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b;       \
2259        }                                                               \
2260        *r = result;                                                    \
2261    }
2262VUPKPX(lpx, UPKLO)
2263VUPKPX(hpx, UPKHI)
2264#undef VUPKPX
2265
2266#define VUPK(suffix, unpacked, packee, hi)                              \
2267    void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b)                \
2268    {                                                                   \
2269        int i;                                                          \
2270        ppc_avr_t result;                                               \
2271                                                                        \
2272        if (hi) {                                                       \
2273            for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) {             \
2274                result.unpacked[i] = b->packee[i];                      \
2275            }                                                           \
2276        } else {                                                        \
2277            for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
2278                 i++) {                                                 \
2279                result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
2280            }                                                           \
2281        }                                                               \
2282        *r = result;                                                    \
2283    }
2284VUPK(hsb, s16, s8, UPKHI)
2285VUPK(hsh, s32, s16, UPKHI)
2286VUPK(hsw, s64, s32, UPKHI)
2287VUPK(lsb, s16, s8, UPKLO)
2288VUPK(lsh, s32, s16, UPKLO)
2289VUPK(lsw, s64, s32, UPKLO)
2290#undef VUPK
2291#undef UPKHI
2292#undef UPKLO
2293
2294#define VGENERIC_DO(name, element)                                      \
2295    void helper_v##name(ppc_avr_t *r, ppc_avr_t *b)                     \
2296    {                                                                   \
2297        int i;                                                          \
2298                                                                        \
2299        VECTOR_FOR_INORDER_I(i, element) {                              \
2300            r->element[i] = name(b->element[i]);                        \
2301        }                                                               \
2302    }
2303
2304#define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
2305#define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
2306#define clzw(v) clz32((v))
2307#define clzd(v) clz64((v))
2308
2309VGENERIC_DO(clzb, u8)
2310VGENERIC_DO(clzh, u16)
2311VGENERIC_DO(clzw, u32)
2312VGENERIC_DO(clzd, u64)
2313
2314#undef clzb
2315#undef clzh
2316#undef clzw
2317#undef clzd
2318
2319#define ctzb(v) ((v) ? ctz32(v) : 8)
2320#define ctzh(v) ((v) ? ctz32(v) : 16)
2321#define ctzw(v) ctz32((v))
2322#define ctzd(v) ctz64((v))
2323
2324VGENERIC_DO(ctzb, u8)
2325VGENERIC_DO(ctzh, u16)
2326VGENERIC_DO(ctzw, u32)
2327VGENERIC_DO(ctzd, u64)
2328
2329#undef ctzb
2330#undef ctzh
2331#undef ctzw
2332#undef ctzd
2333
2334#define popcntb(v) ctpop8(v)
2335#define popcnth(v) ctpop16(v)
2336#define popcntw(v) ctpop32(v)
2337#define popcntd(v) ctpop64(v)
2338
2339VGENERIC_DO(popcntb, u8)
2340VGENERIC_DO(popcnth, u16)
2341VGENERIC_DO(popcntw, u32)
2342VGENERIC_DO(popcntd, u64)
2343
2344#undef popcntb
2345#undef popcnth
2346#undef popcntw
2347#undef popcntd
2348
2349#undef VGENERIC_DO
2350
2351#if defined(HOST_WORDS_BIGENDIAN)
2352#define QW_ONE { .u64 = { 0, 1 } }
2353#else
2354#define QW_ONE { .u64 = { 1, 0 } }
2355#endif
2356
2357#ifndef CONFIG_INT128
2358
2359static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
2360{
2361    t->u64[0] = ~a.u64[0];
2362    t->u64[1] = ~a.u64[1];
2363}
2364
2365static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
2366{
2367    if (a.u64[HI_IDX] < b.u64[HI_IDX]) {
2368        return -1;
2369    } else if (a.u64[HI_IDX] > b.u64[HI_IDX]) {
2370        return 1;
2371    } else if (a.u64[LO_IDX] < b.u64[LO_IDX]) {
2372        return -1;
2373    } else if (a.u64[LO_IDX] > b.u64[LO_IDX]) {
2374        return 1;
2375    } else {
2376        return 0;
2377    }
2378}
2379
2380static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2381{
2382    t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
2383    t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
2384                     (~a.u64[LO_IDX] < b.u64[LO_IDX]);
2385}
2386
2387static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2388{
2389    ppc_avr_t not_a;
2390    t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
2391    t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
2392                     (~a.u64[LO_IDX] < b.u64[LO_IDX]);
2393    avr_qw_not(&not_a, a);
2394    return avr_qw_cmpu(not_a, b) < 0;
2395}
2396
2397#endif
2398
2399void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2400{
2401#ifdef CONFIG_INT128
2402    r->u128 = a->u128 + b->u128;
2403#else
2404    avr_qw_add(r, *a, *b);
2405#endif
2406}
2407
2408void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2409{
2410#ifdef CONFIG_INT128
2411    r->u128 = a->u128 + b->u128 + (c->u128 & 1);
2412#else
2413
2414    if (c->u64[LO_IDX] & 1) {
2415        ppc_avr_t tmp;
2416
2417        tmp.u64[HI_IDX] = 0;
2418        tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2419        avr_qw_add(&tmp, *a, tmp);
2420        avr_qw_add(r, tmp, *b);
2421    } else {
2422        avr_qw_add(r, *a, *b);
2423    }
2424#endif
2425}
2426
2427void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2428{
2429#ifdef CONFIG_INT128
2430    r->u128 = (~a->u128 < b->u128);
2431#else
2432    ppc_avr_t not_a;
2433
2434    avr_qw_not(&not_a, *a);
2435
2436    r->u64[HI_IDX] = 0;
2437    r->u64[LO_IDX] = (avr_qw_cmpu(not_a, *b) < 0);
2438#endif
2439}
2440
2441void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2442{
2443#ifdef CONFIG_INT128
2444    int carry_out = (~a->u128 < b->u128);
2445    if (!carry_out && (c->u128 & 1)) {
2446        carry_out = ((a->u128 + b->u128 + 1) == 0) &&
2447                    ((a->u128 != 0) || (b->u128 != 0));
2448    }
2449    r->u128 = carry_out;
2450#else
2451
2452    int carry_in = c->u64[LO_IDX] & 1;
2453    int carry_out = 0;
2454    ppc_avr_t tmp;
2455
2456    carry_out = avr_qw_addc(&tmp, *a, *b);
2457
2458    if (!carry_out && carry_in) {
2459        ppc_avr_t one = QW_ONE;
2460        carry_out = avr_qw_addc(&tmp, tmp, one);
2461    }
2462    r->u64[HI_IDX] = 0;
2463    r->u64[LO_IDX] = carry_out;
2464#endif
2465}
2466
2467void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2468{
2469#ifdef CONFIG_INT128
2470    r->u128 = a->u128 - b->u128;
2471#else
2472    ppc_avr_t tmp;
2473    ppc_avr_t one = QW_ONE;
2474
2475    avr_qw_not(&tmp, *b);
2476    avr_qw_add(&tmp, *a, tmp);
2477    avr_qw_add(r, tmp, one);
2478#endif
2479}
2480
2481void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2482{
2483#ifdef CONFIG_INT128
2484    r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2485#else
2486    ppc_avr_t tmp, sum;
2487
2488    avr_qw_not(&tmp, *b);
2489    avr_qw_add(&sum, *a, tmp);
2490
2491    tmp.u64[HI_IDX] = 0;
2492    tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2493    avr_qw_add(r, sum, tmp);
2494#endif
2495}
2496
2497void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2498{
2499#ifdef CONFIG_INT128
2500    r->u128 = (~a->u128 < ~b->u128) ||
2501                 (a->u128 + ~b->u128 == (__uint128_t)-1);
2502#else
2503    int carry = (avr_qw_cmpu(*a, *b) > 0);
2504    if (!carry) {
2505        ppc_avr_t tmp;
2506        avr_qw_not(&tmp, *b);
2507        avr_qw_add(&tmp, *a, tmp);
2508        carry = ((tmp.s64[HI_IDX] == -1ull) && (tmp.s64[LO_IDX] == -1ull));
2509    }
2510    r->u64[HI_IDX] = 0;
2511    r->u64[LO_IDX] = carry;
2512#endif
2513}
2514
2515void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2516{
2517#ifdef CONFIG_INT128
2518    r->u128 =
2519        (~a->u128 < ~b->u128) ||
2520        ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2521#else
2522    int carry_in = c->u64[LO_IDX] & 1;
2523    int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2524    if (!carry_out && carry_in) {
2525        ppc_avr_t tmp;
2526        avr_qw_not(&tmp, *b);
2527        avr_qw_add(&tmp, *a, tmp);
2528        carry_out = ((tmp.u64[HI_IDX] == -1ull) && (tmp.u64[LO_IDX] == -1ull));
2529    }
2530
2531    r->u64[HI_IDX] = 0;
2532    r->u64[LO_IDX] = carry_out;
2533#endif
2534}
2535
2536#define BCD_PLUS_PREF_1 0xC
2537#define BCD_PLUS_PREF_2 0xF
2538#define BCD_PLUS_ALT_1  0xA
2539#define BCD_NEG_PREF    0xD
2540#define BCD_NEG_ALT     0xB
2541#define BCD_PLUS_ALT_2  0xE
2542#define NATIONAL_PLUS   0x2B
2543#define NATIONAL_NEG    0x2D
2544
2545#if defined(HOST_WORDS_BIGENDIAN)
2546#define BCD_DIG_BYTE(n) (15 - ((n) / 2))
2547#else
2548#define BCD_DIG_BYTE(n) ((n) / 2)
2549#endif
2550
2551static int bcd_get_sgn(ppc_avr_t *bcd)
2552{
2553    switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) {
2554    case BCD_PLUS_PREF_1:
2555    case BCD_PLUS_PREF_2:
2556    case BCD_PLUS_ALT_1:
2557    case BCD_PLUS_ALT_2:
2558    {
2559        return 1;
2560    }
2561
2562    case BCD_NEG_PREF:
2563    case BCD_NEG_ALT:
2564    {
2565        return -1;
2566    }
2567
2568    default:
2569    {
2570        return 0;
2571    }
2572    }
2573}
2574
2575static int bcd_preferred_sgn(int sgn, int ps)
2576{
2577    if (sgn >= 0) {
2578        return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2579    } else {
2580        return BCD_NEG_PREF;
2581    }
2582}
2583
2584static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2585{
2586    uint8_t result;
2587    if (n & 1) {
2588        result = bcd->u8[BCD_DIG_BYTE(n)] >> 4;
2589    } else {
2590       result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF;
2591    }
2592
2593    if (unlikely(result > 9)) {
2594        *invalid = true;
2595    }
2596    return result;
2597}
2598
2599static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2600{
2601    if (n & 1) {
2602        bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F;
2603        bcd->u8[BCD_DIG_BYTE(n)] |= (digit<<4);
2604    } else {
2605        bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0;
2606        bcd->u8[BCD_DIG_BYTE(n)] |= digit;
2607    }
2608}
2609
2610static bool bcd_is_valid(ppc_avr_t *bcd)
2611{
2612    int i;
2613    int invalid = 0;
2614
2615    if (bcd_get_sgn(bcd) == 0) {
2616        return false;
2617    }
2618
2619    for (i = 1; i < 32; i++) {
2620        bcd_get_digit(bcd, i, &invalid);
2621        if (unlikely(invalid)) {
2622            return false;
2623        }
2624    }
2625    return true;
2626}
2627
2628static int bcd_cmp_zero(ppc_avr_t *bcd)
2629{
2630    if (bcd->u64[HI_IDX] == 0 && (bcd->u64[LO_IDX] >> 4) == 0) {
2631        return CRF_EQ;
2632    } else {
2633        return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT;
2634    }
2635}
2636
2637static uint16_t get_national_digit(ppc_avr_t *reg, int n)
2638{
2639#if defined(HOST_WORDS_BIGENDIAN)
2640    return reg->u16[7 - n];
2641#else
2642    return reg->u16[n];
2643#endif
2644}
2645
2646static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
2647{
2648#if defined(HOST_WORDS_BIGENDIAN)
2649    reg->u16[7 - n] = val;
2650#else
2651    reg->u16[n] = val;
2652#endif
2653}
2654
2655static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2656{
2657    int i;
2658    int invalid = 0;
2659    for (i = 31; i > 0; i--) {
2660        uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2661        uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2662        if (unlikely(invalid)) {
2663            return 0; /* doesn't matter */
2664        } else if (dig_a > dig_b) {
2665            return 1;
2666        } else if (dig_a < dig_b) {
2667            return -1;
2668        }
2669    }
2670
2671    return 0;
2672}
2673
2674static void bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2675                       int *overflow)
2676{
2677    int carry = 0;
2678    int i;
2679    for (i = 1; i <= 31; i++) {
2680        uint8_t digit = bcd_get_digit(a, i, invalid) +
2681                        bcd_get_digit(b, i, invalid) + carry;
2682        if (digit > 9) {
2683            carry = 1;
2684            digit -= 10;
2685        } else {
2686            carry = 0;
2687        }
2688
2689        bcd_put_digit(t, digit, i);
2690    }
2691
2692    *overflow = carry;
2693}
2694
2695static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2696                       int *overflow)
2697{
2698    int carry = 0;
2699    int i;
2700
2701    for (i = 1; i <= 31; i++) {
2702        uint8_t digit = bcd_get_digit(a, i, invalid) -
2703                        bcd_get_digit(b, i, invalid) + carry;
2704        if (digit & 0x80) {
2705            carry = -1;
2706            digit += 10;
2707        } else {
2708            carry = 0;
2709        }
2710
2711        bcd_put_digit(t, digit, i);
2712    }
2713
2714    *overflow = carry;
2715}
2716
2717uint32_t helper_bcdadd(ppc_avr_t *r,  ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2718{
2719
2720    int sgna = bcd_get_sgn(a);
2721    int sgnb = bcd_get_sgn(b);
2722    int invalid = (sgna == 0) || (sgnb == 0);
2723    int overflow = 0;
2724    uint32_t cr = 0;
2725    ppc_avr_t result = { .u64 = { 0, 0 } };
2726
2727    if (!invalid) {
2728        if (sgna == sgnb) {
2729            result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2730            bcd_add_mag(&result, a, b, &invalid, &overflow);
2731            cr = bcd_cmp_zero(&result);
2732        } else {
2733            int magnitude = bcd_cmp_mag(a, b);
2734            if (magnitude > 0) {
2735                result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2736                bcd_sub_mag(&result, a, b, &invalid, &overflow);
2737                cr = (sgna > 0) ? CRF_GT : CRF_LT;
2738            } else if (magnitude < 0) {
2739                result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps);
2740                bcd_sub_mag(&result, b, a, &invalid, &overflow);
2741                cr = (sgnb > 0) ? CRF_GT : CRF_LT;
2742            } else {
2743                result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(0, ps);
2744                cr = CRF_EQ;
2745            }
2746        }
2747    }
2748
2749    if (unlikely(invalid)) {
2750        result.u64[HI_IDX] = result.u64[LO_IDX] = -1;
2751        cr = CRF_SO;
2752    } else if (overflow) {
2753        cr |= CRF_SO;
2754    }
2755
2756    *r = result;
2757
2758    return cr;
2759}
2760
2761uint32_t helper_bcdsub(ppc_avr_t *r,  ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2762{
2763    ppc_avr_t bcopy = *b;
2764    int sgnb = bcd_get_sgn(b);
2765    if (sgnb < 0) {
2766        bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2767    } else if (sgnb > 0) {
2768        bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2769    }
2770    /* else invalid ... defer to bcdadd code for proper handling */
2771
2772    return helper_bcdadd(r, a, &bcopy, ps);
2773}
2774
2775uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2776{
2777    int i;
2778    int cr = 0;
2779    uint16_t national = 0;
2780    uint16_t sgnb = get_national_digit(b, 0);
2781    ppc_avr_t ret = { .u64 = { 0, 0 } };
2782    int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
2783
2784    for (i = 1; i < 8; i++) {
2785        national = get_national_digit(b, i);
2786        if (unlikely(national < 0x30 || national > 0x39)) {
2787            invalid = 1;
2788            break;
2789        }
2790
2791        bcd_put_digit(&ret, national & 0xf, i);
2792    }
2793
2794    if (sgnb == NATIONAL_PLUS) {
2795        bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
2796    } else {
2797        bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2798    }
2799
2800    cr = bcd_cmp_zero(&ret);
2801
2802    if (unlikely(invalid)) {
2803        cr = CRF_SO;
2804    }
2805
2806    *r = ret;
2807
2808    return cr;
2809}
2810
2811uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2812{
2813    int i;
2814    int cr = 0;
2815    int sgnb = bcd_get_sgn(b);
2816    int invalid = (sgnb == 0);
2817    ppc_avr_t ret = { .u64 = { 0, 0 } };
2818
2819    int ox_flag = (b->u64[HI_IDX] != 0) || ((b->u64[LO_IDX] >> 32) != 0);
2820
2821    for (i = 1; i < 8; i++) {
2822        set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
2823
2824        if (unlikely(invalid)) {
2825            break;
2826        }
2827    }
2828    set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
2829
2830    cr = bcd_cmp_zero(b);
2831
2832    if (ox_flag) {
2833        cr |= CRF_SO;
2834    }
2835
2836    if (unlikely(invalid)) {
2837        cr = CRF_SO;
2838    }
2839
2840    *r = ret;
2841
2842    return cr;
2843}
2844
2845uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2846{
2847    int i;
2848    int cr = 0;
2849    int invalid = 0;
2850    int zone_digit = 0;
2851    int zone_lead = ps ? 0xF : 0x3;
2852    int digit = 0;
2853    ppc_avr_t ret = { .u64 = { 0, 0 } };
2854    int sgnb = b->u8[BCD_DIG_BYTE(0)] >> 4;
2855
2856    if (unlikely((sgnb < 0xA) && ps)) {
2857        invalid = 1;
2858    }
2859
2860    for (i = 0; i < 16; i++) {
2861        zone_digit = i ? b->u8[BCD_DIG_BYTE(i * 2)] >> 4 : zone_lead;
2862        digit = b->u8[BCD_DIG_BYTE(i * 2)] & 0xF;
2863        if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
2864            invalid = 1;
2865            break;
2866        }
2867
2868        bcd_put_digit(&ret, digit, i + 1);
2869    }
2870
2871    if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
2872            (!ps && (sgnb & 0x4))) {
2873        bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2874    } else {
2875        bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
2876    }
2877
2878    cr = bcd_cmp_zero(&ret);
2879
2880    if (unlikely(invalid)) {
2881        cr = CRF_SO;
2882    }
2883
2884    *r = ret;
2885
2886    return cr;
2887}
2888
2889uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2890{
2891    int i;
2892    int cr = 0;
2893    uint8_t digit = 0;
2894    int sgnb = bcd_get_sgn(b);
2895    int zone_lead = (ps) ? 0xF0 : 0x30;
2896    int invalid = (sgnb == 0);
2897    ppc_avr_t ret = { .u64 = { 0, 0 } };
2898
2899    int ox_flag = ((b->u64[HI_IDX] >> 4) != 0);
2900
2901    for (i = 0; i < 16; i++) {
2902        digit = bcd_get_digit(b, i + 1, &invalid);
2903
2904        if (unlikely(invalid)) {
2905            break;
2906        }
2907
2908        ret.u8[BCD_DIG_BYTE(i * 2)] = zone_lead + digit;
2909    }
2910
2911    if (ps) {
2912        bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
2913    } else {
2914        bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
2915    }
2916
2917    cr = bcd_cmp_zero(b);
2918
2919    if (ox_flag) {
2920        cr |= CRF_SO;
2921    }
2922
2923    if (unlikely(invalid)) {
2924        cr = CRF_SO;
2925    }
2926
2927    *r = ret;
2928
2929    return cr;
2930}
2931
2932uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2933{
2934    int i;
2935    int cr = 0;
2936    uint64_t lo_value;
2937    uint64_t hi_value;
2938    ppc_avr_t ret = { .u64 = { 0, 0 } };
2939
2940    if (b->s64[HI_IDX] < 0) {
2941        lo_value = -b->s64[LO_IDX];
2942        hi_value = ~b->u64[HI_IDX] + !lo_value;
2943        bcd_put_digit(&ret, 0xD, 0);
2944    } else {
2945        lo_value = b->u64[LO_IDX];
2946        hi_value = b->u64[HI_IDX];
2947        bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0);
2948    }
2949
2950    if (divu128(&lo_value, &hi_value, 1000000000000000ULL) ||
2951            lo_value > 9999999999999999ULL) {
2952        cr = CRF_SO;
2953    }
2954
2955    for (i = 1; i < 16; hi_value /= 10, i++) {
2956        bcd_put_digit(&ret, hi_value % 10, i);
2957    }
2958
2959    for (; i < 32; lo_value /= 10, i++) {
2960        bcd_put_digit(&ret, lo_value % 10, i);
2961    }
2962
2963    cr |= bcd_cmp_zero(&ret);
2964
2965    *r = ret;
2966
2967    return cr;
2968}
2969
2970uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2971{
2972    uint8_t i;
2973    int cr;
2974    uint64_t carry;
2975    uint64_t unused;
2976    uint64_t lo_value;
2977    uint64_t hi_value = 0;
2978    int sgnb = bcd_get_sgn(b);
2979    int invalid = (sgnb == 0);
2980
2981    lo_value = bcd_get_digit(b, 31, &invalid);
2982    for (i = 30; i > 0; i--) {
2983        mulu64(&lo_value, &carry, lo_value, 10ULL);
2984        mulu64(&hi_value, &unused, hi_value, 10ULL);
2985        lo_value += bcd_get_digit(b, i, &invalid);
2986        hi_value += carry;
2987
2988        if (unlikely(invalid)) {
2989            break;
2990        }
2991    }
2992
2993    if (sgnb == -1) {
2994        r->s64[LO_IDX] = -lo_value;
2995        r->s64[HI_IDX] = ~hi_value + !r->s64[LO_IDX];
2996    } else {
2997        r->s64[LO_IDX] = lo_value;
2998        r->s64[HI_IDX] = hi_value;
2999    }
3000
3001    cr = bcd_cmp_zero(b);
3002
3003    if (unlikely(invalid)) {
3004        cr = CRF_SO;
3005    }
3006
3007    return cr;
3008}
3009
3010uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3011{
3012    int i;
3013    int invalid = 0;
3014
3015    if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) {
3016        return CRF_SO;
3017    }
3018
3019    *r = *a;
3020    bcd_put_digit(r, b->u8[BCD_DIG_BYTE(0)] & 0xF, 0);
3021
3022    for (i = 1; i < 32; i++) {
3023        bcd_get_digit(a, i, &invalid);
3024        bcd_get_digit(b, i, &invalid);
3025        if (unlikely(invalid)) {
3026            return CRF_SO;
3027        }
3028    }
3029
3030    return bcd_cmp_zero(r);
3031}
3032
3033uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
3034{
3035    int sgnb = bcd_get_sgn(b);
3036
3037    *r = *b;
3038    bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0);
3039
3040    if (bcd_is_valid(b) == false) {
3041        return CRF_SO;
3042    }
3043
3044    return bcd_cmp_zero(r);
3045}
3046
3047uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3048{
3049    int cr;
3050#if defined(HOST_WORDS_BIGENDIAN)
3051    int i = a->s8[7];
3052#else
3053    int i = a->s8[8];
3054#endif
3055    bool ox_flag = false;
3056    int sgnb = bcd_get_sgn(b);
3057    ppc_avr_t ret = *b;
3058    ret.u64[LO_IDX] &= ~0xf;
3059
3060    if (bcd_is_valid(b) == false) {
3061        return CRF_SO;
3062    }
3063
3064    if (unlikely(i > 31)) {
3065        i = 31;
3066    } else if (unlikely(i < -31)) {
3067        i = -31;
3068    }
3069
3070    if (i > 0) {
3071        ulshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], i * 4, &ox_flag);
3072    } else {
3073        urshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], -i * 4);
3074    }
3075    bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
3076
3077    *r = ret;
3078
3079    cr = bcd_cmp_zero(r);
3080    if (ox_flag) {
3081        cr |= CRF_SO;
3082    }
3083
3084    return cr;
3085}
3086
3087uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3088{
3089    int cr;
3090    int i;
3091    int invalid = 0;
3092    bool ox_flag = false;
3093    ppc_avr_t ret = *b;
3094
3095    for (i = 0; i < 32; i++) {
3096        bcd_get_digit(b, i, &invalid);
3097
3098        if (unlikely(invalid)) {
3099            return CRF_SO;
3100        }
3101    }
3102
3103#if defined(HOST_WORDS_BIGENDIAN)
3104    i = a->s8[7];
3105#else
3106    i = a->s8[8];
3107#endif
3108    if (i >= 32) {
3109        ox_flag = true;
3110        ret.u64[LO_IDX] = ret.u64[HI_IDX] = 0;
3111    } else if (i <= -32) {
3112        ret.u64[LO_IDX] = ret.u64[HI_IDX] = 0;
3113    } else if (i > 0) {
3114        ulshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], i * 4, &ox_flag);
3115    } else {
3116        urshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], -i * 4);
3117    }
3118    *r = ret;
3119
3120    cr = bcd_cmp_zero(r);
3121    if (ox_flag) {
3122        cr |= CRF_SO;
3123    }
3124
3125    return cr;
3126}
3127
3128uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3129{
3130    int cr;
3131    int unused = 0;
3132    int invalid = 0;
3133    bool ox_flag = false;
3134    int sgnb = bcd_get_sgn(b);
3135    ppc_avr_t ret = *b;
3136    ret.u64[LO_IDX] &= ~0xf;
3137
3138#if defined(HOST_WORDS_BIGENDIAN)
3139    int i = a->s8[7];
3140    ppc_avr_t bcd_one = { .u64 = { 0, 0x10 } };
3141#else
3142    int i = a->s8[8];
3143    ppc_avr_t bcd_one = { .u64 = { 0x10, 0 } };
3144#endif
3145
3146    if (bcd_is_valid(b) == false) {
3147        return CRF_SO;
3148    }
3149
3150    if (unlikely(i > 31)) {
3151        i = 31;
3152    } else if (unlikely(i < -31)) {
3153        i = -31;
3154    }
3155
3156    if (i > 0) {
3157        ulshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], i * 4, &ox_flag);
3158    } else {
3159        urshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], -i * 4);
3160
3161        if (bcd_get_digit(&ret, 0, &invalid) >= 5) {
3162            bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused);
3163        }
3164    }
3165    bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
3166
3167    cr = bcd_cmp_zero(&ret);
3168    if (ox_flag) {
3169        cr |= CRF_SO;
3170    }
3171    *r = ret;
3172
3173    return cr;
3174}
3175
3176uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3177{
3178    uint64_t mask;
3179    uint32_t ox_flag = 0;
3180#if defined(HOST_WORDS_BIGENDIAN)
3181    int i = a->s16[3] + 1;
3182#else
3183    int i = a->s16[4] + 1;
3184#endif
3185    ppc_avr_t ret = *b;
3186
3187    if (bcd_is_valid(b) == false) {
3188        return CRF_SO;
3189    }
3190
3191    if (i > 16 && i < 32) {
3192        mask = (uint64_t)-1 >> (128 - i * 4);
3193        if (ret.u64[HI_IDX] & ~mask) {
3194            ox_flag = CRF_SO;
3195        }
3196
3197        ret.u64[HI_IDX] &= mask;
3198    } else if (i >= 0 && i <= 16) {
3199        mask = (uint64_t)-1 >> (64 - i * 4);
3200        if (ret.u64[HI_IDX] || (ret.u64[LO_IDX] & ~mask)) {
3201            ox_flag = CRF_SO;
3202        }
3203
3204        ret.u64[LO_IDX] &= mask;
3205        ret.u64[HI_IDX] = 0;
3206    }
3207    bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0);
3208    *r = ret;
3209
3210    return bcd_cmp_zero(&ret) | ox_flag;
3211}
3212
3213uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3214{
3215    int i;
3216    uint64_t mask;
3217    uint32_t ox_flag = 0;
3218    int invalid = 0;
3219    ppc_avr_t ret = *b;
3220
3221    for (i = 0; i < 32; i++) {
3222        bcd_get_digit(b, i, &invalid);
3223
3224        if (unlikely(invalid)) {
3225            return CRF_SO;
3226        }
3227    }
3228
3229#if defined(HOST_WORDS_BIGENDIAN)
3230    i = a->s16[3];
3231#else
3232    i = a->s16[4];
3233#endif
3234    if (i > 16 && i < 33) {
3235        mask = (uint64_t)-1 >> (128 - i * 4);
3236        if (ret.u64[HI_IDX] & ~mask) {
3237            ox_flag = CRF_SO;
3238        }
3239
3240        ret.u64[HI_IDX] &= mask;
3241    } else if (i > 0 && i <= 16) {
3242        mask = (uint64_t)-1 >> (64 - i * 4);
3243        if (ret.u64[HI_IDX] || (ret.u64[LO_IDX] & ~mask)) {
3244            ox_flag = CRF_SO;
3245        }
3246
3247        ret.u64[LO_IDX] &= mask;
3248        ret.u64[HI_IDX] = 0;
3249    } else if (i == 0) {
3250        if (ret.u64[HI_IDX] || ret.u64[LO_IDX]) {
3251            ox_flag = CRF_SO;
3252        }
3253        ret.u64[HI_IDX] = ret.u64[LO_IDX] = 0;
3254    }
3255
3256    *r = ret;
3257    if (r->u64[HI_IDX] == 0 && r->u64[LO_IDX] == 0) {
3258        return ox_flag | CRF_EQ;
3259    }
3260
3261    return ox_flag | CRF_GT;
3262}
3263
3264void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
3265{
3266    int i;
3267    VECTOR_FOR_INORDER_I(i, u8) {
3268        r->u8[i] = AES_sbox[a->u8[i]];
3269    }
3270}
3271
3272void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3273{
3274    ppc_avr_t result;
3275    int i;
3276
3277    VECTOR_FOR_INORDER_I(i, u32) {
3278        result.AVRW(i) = b->AVRW(i) ^
3279            (AES_Te0[a->AVRB(AES_shifts[4*i + 0])] ^
3280             AES_Te1[a->AVRB(AES_shifts[4*i + 1])] ^
3281             AES_Te2[a->AVRB(AES_shifts[4*i + 2])] ^
3282             AES_Te3[a->AVRB(AES_shifts[4*i + 3])]);
3283    }
3284    *r = result;
3285}
3286
3287void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3288{
3289    ppc_avr_t result;
3290    int i;
3291
3292    VECTOR_FOR_INORDER_I(i, u8) {
3293        result.AVRB(i) = b->AVRB(i) ^ (AES_sbox[a->AVRB(AES_shifts[i])]);
3294    }
3295    *r = result;
3296}
3297
3298void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3299{
3300    /* This differs from what is written in ISA V2.07.  The RTL is */
3301    /* incorrect and will be fixed in V2.07B.                      */
3302    int i;
3303    ppc_avr_t tmp;
3304
3305    VECTOR_FOR_INORDER_I(i, u8) {
3306        tmp.AVRB(i) = b->AVRB(i) ^ AES_isbox[a->AVRB(AES_ishifts[i])];
3307    }
3308
3309    VECTOR_FOR_INORDER_I(i, u32) {
3310        r->AVRW(i) =
3311            AES_imc[tmp.AVRB(4*i + 0)][0] ^
3312            AES_imc[tmp.AVRB(4*i + 1)][1] ^
3313            AES_imc[tmp.AVRB(4*i + 2)][2] ^
3314            AES_imc[tmp.AVRB(4*i + 3)][3];
3315    }
3316}
3317
3318void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3319{
3320    ppc_avr_t result;
3321    int i;
3322
3323    VECTOR_FOR_INORDER_I(i, u8) {
3324        result.AVRB(i) = b->AVRB(i) ^ (AES_isbox[a->AVRB(AES_ishifts[i])]);
3325    }
3326    *r = result;
3327}
3328
3329#define ROTRu32(v, n) (((v) >> (n)) | ((v) << (32-n)))
3330#if defined(HOST_WORDS_BIGENDIAN)
3331#define EL_IDX(i) (i)
3332#else
3333#define EL_IDX(i) (3 - (i))
3334#endif
3335
3336void helper_vshasigmaw(ppc_avr_t *r,  ppc_avr_t *a, uint32_t st_six)
3337{
3338    int st = (st_six & 0x10) != 0;
3339    int six = st_six & 0xF;
3340    int i;
3341
3342    VECTOR_FOR_INORDER_I(i, u32) {
3343        if (st == 0) {
3344            if ((six & (0x8 >> i)) == 0) {
3345                r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 7) ^
3346                                    ROTRu32(a->u32[EL_IDX(i)], 18) ^
3347                                    (a->u32[EL_IDX(i)] >> 3);
3348            } else { /* six.bit[i] == 1 */
3349                r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 17) ^
3350                                    ROTRu32(a->u32[EL_IDX(i)], 19) ^
3351                                    (a->u32[EL_IDX(i)] >> 10);
3352            }
3353        } else { /* st == 1 */
3354            if ((six & (0x8 >> i)) == 0) {
3355                r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 2) ^
3356                                    ROTRu32(a->u32[EL_IDX(i)], 13) ^
3357                                    ROTRu32(a->u32[EL_IDX(i)], 22);
3358            } else { /* six.bit[i] == 1 */
3359                r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 6) ^
3360                                    ROTRu32(a->u32[EL_IDX(i)], 11) ^
3361                                    ROTRu32(a->u32[EL_IDX(i)], 25);
3362            }
3363        }
3364    }
3365}
3366
3367#undef ROTRu32
3368#undef EL_IDX
3369
3370#define ROTRu64(v, n) (((v) >> (n)) | ((v) << (64-n)))
3371#if defined(HOST_WORDS_BIGENDIAN)
3372#define EL_IDX(i) (i)
3373#else
3374#define EL_IDX(i) (1 - (i))
3375#endif
3376
3377void helper_vshasigmad(ppc_avr_t *r,  ppc_avr_t *a, uint32_t st_six)
3378{
3379    int st = (st_six & 0x10) != 0;
3380    int six = st_six & 0xF;
3381    int i;
3382
3383    VECTOR_FOR_INORDER_I(i, u64) {
3384        if (st == 0) {
3385            if ((six & (0x8 >> (2*i))) == 0) {
3386                r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 1) ^
3387                                    ROTRu64(a->u64[EL_IDX(i)], 8) ^
3388                                    (a->u64[EL_IDX(i)] >> 7);
3389            } else { /* six.bit[2*i] == 1 */
3390                r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 19) ^
3391                                    ROTRu64(a->u64[EL_IDX(i)], 61) ^
3392                                    (a->u64[EL_IDX(i)] >> 6);
3393            }
3394        } else { /* st == 1 */
3395            if ((six & (0x8 >> (2*i))) == 0) {
3396                r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 28) ^
3397                                    ROTRu64(a->u64[EL_IDX(i)], 34) ^
3398                                    ROTRu64(a->u64[EL_IDX(i)], 39);
3399            } else { /* six.bit[2*i] == 1 */
3400                r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 14) ^
3401                                    ROTRu64(a->u64[EL_IDX(i)], 18) ^
3402                                    ROTRu64(a->u64[EL_IDX(i)], 41);
3403            }
3404        }
3405    }
3406}
3407
3408#undef ROTRu64
3409#undef EL_IDX
3410
3411void helper_vpermxor(ppc_avr_t *r,  ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
3412{
3413    ppc_avr_t result;
3414    int i;
3415
3416    VECTOR_FOR_INORDER_I(i, u8) {
3417        int indexA = c->u8[i] >> 4;
3418        int indexB = c->u8[i] & 0xF;
3419#if defined(HOST_WORDS_BIGENDIAN)
3420        result.u8[i] = a->u8[indexA] ^ b->u8[indexB];
3421#else
3422        result.u8[i] = a->u8[15-indexA] ^ b->u8[15-indexB];
3423#endif
3424    }
3425    *r = result;
3426}
3427
3428#undef VECTOR_FOR_INORDER_I
3429#undef HI_IDX
3430#undef LO_IDX
3431
3432/*****************************************************************************/
3433/* SPE extension helpers */
3434/* Use a table to make this quicker */
3435static const uint8_t hbrev[16] = {
3436    0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
3437    0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
3438};
3439
3440static inline uint8_t byte_reverse(uint8_t val)
3441{
3442    return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
3443}
3444
3445static inline uint32_t word_reverse(uint32_t val)
3446{
3447    return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
3448        (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
3449}
3450
3451#define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
3452target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
3453{
3454    uint32_t a, b, d, mask;
3455
3456    mask = UINT32_MAX >> (32 - MASKBITS);
3457    a = arg1 & mask;
3458    b = arg2 & mask;
3459    d = word_reverse(1 + word_reverse(a | ~b));
3460    return (arg1 & ~mask) | (d & b);
3461}
3462
3463uint32_t helper_cntlsw32(uint32_t val)
3464{
3465    if (val & 0x80000000) {
3466        return clz32(~val);
3467    } else {
3468        return clz32(val);
3469    }
3470}
3471
3472uint32_t helper_cntlzw32(uint32_t val)
3473{
3474    return clz32(val);
3475}
3476
3477/* 440 specific */
3478target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
3479                          target_ulong low, uint32_t update_Rc)
3480{
3481    target_ulong mask;
3482    int i;
3483
3484    i = 1;
3485    for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3486        if ((high & mask) == 0) {
3487            if (update_Rc) {
3488                env->crf[0] = 0x4;
3489            }
3490            goto done;
3491        }
3492        i++;
3493    }
3494    for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3495        if ((low & mask) == 0) {
3496            if (update_Rc) {
3497                env->crf[0] = 0x8;
3498            }
3499            goto done;
3500        }
3501        i++;
3502    }
3503    i = 8;
3504    if (update_Rc) {
3505        env->crf[0] = 0x2;
3506    }
3507 done:
3508    env->xer = (env->xer & ~0x7F) | i;
3509    if (update_Rc) {
3510        env->crf[0] |= xer_so;
3511    }
3512    return i;
3513}
3514