qemu/target/ppc/int_helper.c
<<
>>
Prefs
   1/*
   2 *  PowerPC integer and vector emulation helpers for QEMU.
   3 *
   4 *  Copyright (c) 2003-2007 Jocelyn Mayer
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include "qemu/osdep.h"
  20#include "cpu.h"
  21#include "internal.h"
  22#include "exec/exec-all.h"
  23#include "qemu/host-utils.h"
  24#include "exec/helper-proto.h"
  25#include "crypto/aes.h"
  26#include "fpu/softfloat.h"
  27
  28#include "helper_regs.h"
  29/*****************************************************************************/
  30/* Fixed point operations helpers */
  31
  32static inline void helper_update_ov_legacy(CPUPPCState *env, int ov)
  33{
  34    if (unlikely(ov)) {
  35        env->so = env->ov = 1;
  36    } else {
  37        env->ov = 0;
  38    }
  39}
  40
  41target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
  42                           uint32_t oe)
  43{
  44    uint64_t rt = 0;
  45    int overflow = 0;
  46
  47    uint64_t dividend = (uint64_t)ra << 32;
  48    uint64_t divisor = (uint32_t)rb;
  49
  50    if (unlikely(divisor == 0)) {
  51        overflow = 1;
  52    } else {
  53        rt = dividend / divisor;
  54        overflow = rt > UINT32_MAX;
  55    }
  56
  57    if (unlikely(overflow)) {
  58        rt = 0; /* Undefined */
  59    }
  60
  61    if (oe) {
  62        helper_update_ov_legacy(env, overflow);
  63    }
  64
  65    return (target_ulong)rt;
  66}
  67
  68target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
  69                          uint32_t oe)
  70{
  71    int64_t rt = 0;
  72    int overflow = 0;
  73
  74    int64_t dividend = (int64_t)ra << 32;
  75    int64_t divisor = (int64_t)((int32_t)rb);
  76
  77    if (unlikely((divisor == 0) ||
  78                 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
  79        overflow = 1;
  80    } else {
  81        rt = dividend / divisor;
  82        overflow = rt != (int32_t)rt;
  83    }
  84
  85    if (unlikely(overflow)) {
  86        rt = 0; /* Undefined */
  87    }
  88
  89    if (oe) {
  90        helper_update_ov_legacy(env, overflow);
  91    }
  92
  93    return (target_ulong)rt;
  94}
  95
  96#if defined(TARGET_PPC64)
  97
  98uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
  99{
 100    uint64_t rt = 0;
 101    int overflow = 0;
 102
 103    overflow = divu128(&rt, &ra, rb);
 104
 105    if (unlikely(overflow)) {
 106        rt = 0; /* Undefined */
 107    }
 108
 109    if (oe) {
 110        helper_update_ov_legacy(env, overflow);
 111    }
 112
 113    return rt;
 114}
 115
 116uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
 117{
 118    int64_t rt = 0;
 119    int64_t ra = (int64_t)rau;
 120    int64_t rb = (int64_t)rbu;
 121    int overflow = divs128(&rt, &ra, rb);
 122
 123    if (unlikely(overflow)) {
 124        rt = 0; /* Undefined */
 125    }
 126
 127    if (oe) {
 128        helper_update_ov_legacy(env, overflow);
 129    }
 130
 131    return rt;
 132}
 133
 134#endif
 135
 136
 137#if defined(TARGET_PPC64)
 138/* if x = 0xab, returns 0xababababababababa */
 139#define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
 140
 141/* substract 1 from each byte, and with inverse, check if MSB is set at each
 142 * byte.
 143 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
 144 *      (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
 145 */
 146#define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
 147
 148/* When you XOR the pattern and there is a match, that byte will be zero */
 149#define hasvalue(x, n)  (haszero((x) ^ pattern(n)))
 150
 151uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
 152{
 153    return hasvalue(rb, ra) ? CRF_GT : 0;
 154}
 155
 156#undef pattern
 157#undef haszero
 158#undef hasvalue
 159
 160/* Return invalid random number.
 161 *
 162 * FIXME: Add rng backend or other mechanism to get cryptographically suitable
 163 * random number
 164 */
 165target_ulong helper_darn32(void)
 166{
 167    return -1;
 168}
 169
 170target_ulong helper_darn64(void)
 171{
 172    return -1;
 173}
 174
 175#endif
 176
 177#if defined(TARGET_PPC64)
 178
 179uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
 180{
 181    int i;
 182    uint64_t ra = 0;
 183
 184    for (i = 0; i < 8; i++) {
 185        int index = (rs >> (i*8)) & 0xFF;
 186        if (index < 64) {
 187            if (rb & PPC_BIT(index)) {
 188                ra |= 1 << i;
 189            }
 190        }
 191    }
 192    return ra;
 193}
 194
 195#endif
 196
 197target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
 198{
 199    target_ulong mask = 0xff;
 200    target_ulong ra = 0;
 201    int i;
 202
 203    for (i = 0; i < sizeof(target_ulong); i++) {
 204        if ((rs & mask) == (rb & mask)) {
 205            ra |= mask;
 206        }
 207        mask <<= 8;
 208    }
 209    return ra;
 210}
 211
 212/* shift right arithmetic helper */
 213target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
 214                         target_ulong shift)
 215{
 216    int32_t ret;
 217
 218    if (likely(!(shift & 0x20))) {
 219        if (likely((uint32_t)shift != 0)) {
 220            shift &= 0x1f;
 221            ret = (int32_t)value >> shift;
 222            if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
 223                env->ca32 = env->ca = 0;
 224            } else {
 225                env->ca32 = env->ca = 1;
 226            }
 227        } else {
 228            ret = (int32_t)value;
 229            env->ca32 = env->ca = 0;
 230        }
 231    } else {
 232        ret = (int32_t)value >> 31;
 233        env->ca32 = env->ca = (ret != 0);
 234    }
 235    return (target_long)ret;
 236}
 237
 238#if defined(TARGET_PPC64)
 239target_ulong helper_srad(CPUPPCState *env, target_ulong value,
 240                         target_ulong shift)
 241{
 242    int64_t ret;
 243
 244    if (likely(!(shift & 0x40))) {
 245        if (likely((uint64_t)shift != 0)) {
 246            shift &= 0x3f;
 247            ret = (int64_t)value >> shift;
 248            if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
 249                env->ca32 = env->ca = 0;
 250            } else {
 251                env->ca32 = env->ca = 1;
 252            }
 253        } else {
 254            ret = (int64_t)value;
 255            env->ca32 = env->ca = 0;
 256        }
 257    } else {
 258        ret = (int64_t)value >> 63;
 259        env->ca32 = env->ca = (ret != 0);
 260    }
 261    return ret;
 262}
 263#endif
 264
 265#if defined(TARGET_PPC64)
 266target_ulong helper_popcntb(target_ulong val)
 267{
 268    /* Note that we don't fold past bytes */
 269    val = (val & 0x5555555555555555ULL) + ((val >>  1) &
 270                                           0x5555555555555555ULL);
 271    val = (val & 0x3333333333333333ULL) + ((val >>  2) &
 272                                           0x3333333333333333ULL);
 273    val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >>  4) &
 274                                           0x0f0f0f0f0f0f0f0fULL);
 275    return val;
 276}
 277
 278target_ulong helper_popcntw(target_ulong val)
 279{
 280    /* Note that we don't fold past words.  */
 281    val = (val & 0x5555555555555555ULL) + ((val >>  1) &
 282                                           0x5555555555555555ULL);
 283    val = (val & 0x3333333333333333ULL) + ((val >>  2) &
 284                                           0x3333333333333333ULL);
 285    val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >>  4) &
 286                                           0x0f0f0f0f0f0f0f0fULL);
 287    val = (val & 0x00ff00ff00ff00ffULL) + ((val >>  8) &
 288                                           0x00ff00ff00ff00ffULL);
 289    val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
 290                                           0x0000ffff0000ffffULL);
 291    return val;
 292}
 293#else
 294target_ulong helper_popcntb(target_ulong val)
 295{
 296    /* Note that we don't fold past bytes */
 297    val = (val & 0x55555555) + ((val >>  1) & 0x55555555);
 298    val = (val & 0x33333333) + ((val >>  2) & 0x33333333);
 299    val = (val & 0x0f0f0f0f) + ((val >>  4) & 0x0f0f0f0f);
 300    return val;
 301}
 302#endif
 303
 304/*****************************************************************************/
 305/* PowerPC 601 specific instructions (POWER bridge) */
 306target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
 307{
 308    uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
 309
 310    if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
 311        (int32_t)arg2 == 0) {
 312        env->spr[SPR_MQ] = 0;
 313        return INT32_MIN;
 314    } else {
 315        env->spr[SPR_MQ] = tmp % arg2;
 316        return  tmp / (int32_t)arg2;
 317    }
 318}
 319
 320target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
 321                         target_ulong arg2)
 322{
 323    uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
 324
 325    if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
 326        (int32_t)arg2 == 0) {
 327        env->so = env->ov = 1;
 328        env->spr[SPR_MQ] = 0;
 329        return INT32_MIN;
 330    } else {
 331        env->spr[SPR_MQ] = tmp % arg2;
 332        tmp /= (int32_t)arg2;
 333        if ((int32_t)tmp != tmp) {
 334            env->so = env->ov = 1;
 335        } else {
 336            env->ov = 0;
 337        }
 338        return tmp;
 339    }
 340}
 341
 342target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
 343                         target_ulong arg2)
 344{
 345    if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
 346        (int32_t)arg2 == 0) {
 347        env->spr[SPR_MQ] = 0;
 348        return INT32_MIN;
 349    } else {
 350        env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
 351        return (int32_t)arg1 / (int32_t)arg2;
 352    }
 353}
 354
 355target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
 356                          target_ulong arg2)
 357{
 358    if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
 359        (int32_t)arg2 == 0) {
 360        env->so = env->ov = 1;
 361        env->spr[SPR_MQ] = 0;
 362        return INT32_MIN;
 363    } else {
 364        env->ov = 0;
 365        env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
 366        return (int32_t)arg1 / (int32_t)arg2;
 367    }
 368}
 369
 370/*****************************************************************************/
 371/* 602 specific instructions */
 372/* mfrom is the most crazy instruction ever seen, imho ! */
 373/* Real implementation uses a ROM table. Do the same */
 374/* Extremely decomposed:
 375 *                      -arg / 256
 376 * return 256 * log10(10           + 1.0) + 0.5
 377 */
 378#if !defined(CONFIG_USER_ONLY)
 379target_ulong helper_602_mfrom(target_ulong arg)
 380{
 381    if (likely(arg < 602)) {
 382#include "mfrom_table.c"
 383        return mfrom_ROM_table[arg];
 384    } else {
 385        return 0;
 386    }
 387}
 388#endif
 389
 390/*****************************************************************************/
 391/* Altivec extension helpers */
 392#if defined(HOST_WORDS_BIGENDIAN)
 393#define HI_IDX 0
 394#define LO_IDX 1
 395#define AVRB(i) u8[i]
 396#define AVRW(i) u32[i]
 397#else
 398#define HI_IDX 1
 399#define LO_IDX 0
 400#define AVRB(i) u8[15-(i)]
 401#define AVRW(i) u32[3-(i)]
 402#endif
 403
 404#if defined(HOST_WORDS_BIGENDIAN)
 405#define VECTOR_FOR_INORDER_I(index, element)                    \
 406    for (index = 0; index < ARRAY_SIZE(r->element); index++)
 407#else
 408#define VECTOR_FOR_INORDER_I(index, element)                    \
 409    for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--)
 410#endif
 411
 412/* Saturating arithmetic helpers.  */
 413#define SATCVT(from, to, from_type, to_type, min, max)          \
 414    static inline to_type cvt##from##to(from_type x, int *sat)  \
 415    {                                                           \
 416        to_type r;                                              \
 417                                                                \
 418        if (x < (from_type)min) {                               \
 419            r = min;                                            \
 420            *sat = 1;                                           \
 421        } else if (x > (from_type)max) {                        \
 422            r = max;                                            \
 423            *sat = 1;                                           \
 424        } else {                                                \
 425            r = x;                                              \
 426        }                                                       \
 427        return r;                                               \
 428    }
 429#define SATCVTU(from, to, from_type, to_type, min, max)         \
 430    static inline to_type cvt##from##to(from_type x, int *sat)  \
 431    {                                                           \
 432        to_type r;                                              \
 433                                                                \
 434        if (x > (from_type)max) {                               \
 435            r = max;                                            \
 436            *sat = 1;                                           \
 437        } else {                                                \
 438            r = x;                                              \
 439        }                                                       \
 440        return r;                                               \
 441    }
 442SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
 443SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
 444SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
 445
 446SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
 447SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
 448SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
 449SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
 450SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
 451SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
 452#undef SATCVT
 453#undef SATCVTU
 454
 455void helper_lvsl(ppc_avr_t *r, target_ulong sh)
 456{
 457    int i, j = (sh & 0xf);
 458
 459    VECTOR_FOR_INORDER_I(i, u8) {
 460        r->u8[i] = j++;
 461    }
 462}
 463
 464void helper_lvsr(ppc_avr_t *r, target_ulong sh)
 465{
 466    int i, j = 0x10 - (sh & 0xf);
 467
 468    VECTOR_FOR_INORDER_I(i, u8) {
 469        r->u8[i] = j++;
 470    }
 471}
 472
 473void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r)
 474{
 475#if defined(HOST_WORDS_BIGENDIAN)
 476    env->vscr = r->u32[3];
 477#else
 478    env->vscr = r->u32[0];
 479#endif
 480    set_flush_to_zero(vscr_nj, &env->vec_status);
 481}
 482
 483void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
 484{
 485    int i;
 486
 487    for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
 488        r->u32[i] = ~a->u32[i] < b->u32[i];
 489    }
 490}
 491
 492/* vprtybw */
 493void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b)
 494{
 495    int i;
 496    for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
 497        uint64_t res = b->u32[i] ^ (b->u32[i] >> 16);
 498        res ^= res >> 8;
 499        r->u32[i] = res & 1;
 500    }
 501}
 502
 503/* vprtybd */
 504void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b)
 505{
 506    int i;
 507    for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
 508        uint64_t res = b->u64[i] ^ (b->u64[i] >> 32);
 509        res ^= res >> 16;
 510        res ^= res >> 8;
 511        r->u64[i] = res & 1;
 512    }
 513}
 514
 515/* vprtybq */
 516void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b)
 517{
 518    uint64_t res = b->u64[0] ^ b->u64[1];
 519    res ^= res >> 32;
 520    res ^= res >> 16;
 521    res ^= res >> 8;
 522    r->u64[LO_IDX] = res & 1;
 523    r->u64[HI_IDX] = 0;
 524}
 525
 526#define VARITH_DO(name, op, element)                                    \
 527    void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
 528    {                                                                   \
 529        int i;                                                          \
 530                                                                        \
 531        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
 532            r->element[i] = a->element[i] op b->element[i];             \
 533        }                                                               \
 534    }
 535#define VARITH(suffix, element)                 \
 536    VARITH_DO(add##suffix, +, element)          \
 537    VARITH_DO(sub##suffix, -, element)
 538VARITH(ubm, u8)
 539VARITH(uhm, u16)
 540VARITH(uwm, u32)
 541VARITH(udm, u64)
 542VARITH_DO(muluwm, *, u32)
 543#undef VARITH_DO
 544#undef VARITH
 545
 546#define VARITHFP(suffix, func)                                          \
 547    void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
 548                          ppc_avr_t *b)                                 \
 549    {                                                                   \
 550        int i;                                                          \
 551                                                                        \
 552        for (i = 0; i < ARRAY_SIZE(r->f); i++) {                        \
 553            r->f[i] = func(a->f[i], b->f[i], &env->vec_status);         \
 554        }                                                               \
 555    }
 556VARITHFP(addfp, float32_add)
 557VARITHFP(subfp, float32_sub)
 558VARITHFP(minfp, float32_min)
 559VARITHFP(maxfp, float32_max)
 560#undef VARITHFP
 561
 562#define VARITHFPFMA(suffix, type)                                       \
 563    void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
 564                           ppc_avr_t *b, ppc_avr_t *c)                  \
 565    {                                                                   \
 566        int i;                                                          \
 567        for (i = 0; i < ARRAY_SIZE(r->f); i++) {                        \
 568            r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i],         \
 569                                     type, &env->vec_status);           \
 570        }                                                               \
 571    }
 572VARITHFPFMA(maddfp, 0);
 573VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
 574#undef VARITHFPFMA
 575
 576#define VARITHSAT_CASE(type, op, cvt, element)                          \
 577    {                                                                   \
 578        type result = (type)a->element[i] op (type)b->element[i];       \
 579        r->element[i] = cvt(result, &sat);                              \
 580    }
 581
 582#define VARITHSAT_DO(name, op, optype, cvt, element)                    \
 583    void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,   \
 584                        ppc_avr_t *b)                                   \
 585    {                                                                   \
 586        int sat = 0;                                                    \
 587        int i;                                                          \
 588                                                                        \
 589        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
 590            switch (sizeof(r->element[0])) {                            \
 591            case 1:                                                     \
 592                VARITHSAT_CASE(optype, op, cvt, element);               \
 593                break;                                                  \
 594            case 2:                                                     \
 595                VARITHSAT_CASE(optype, op, cvt, element);               \
 596                break;                                                  \
 597            case 4:                                                     \
 598                VARITHSAT_CASE(optype, op, cvt, element);               \
 599                break;                                                  \
 600            }                                                           \
 601        }                                                               \
 602        if (sat) {                                                      \
 603            env->vscr |= (1 << VSCR_SAT);                               \
 604        }                                                               \
 605    }
 606#define VARITHSAT_SIGNED(suffix, element, optype, cvt)          \
 607    VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element)      \
 608    VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
 609#define VARITHSAT_UNSIGNED(suffix, element, optype, cvt)        \
 610    VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element)      \
 611    VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
 612VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
 613VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
 614VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
 615VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
 616VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
 617VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
 618#undef VARITHSAT_CASE
 619#undef VARITHSAT_DO
 620#undef VARITHSAT_SIGNED
 621#undef VARITHSAT_UNSIGNED
 622
 623#define VAVG_DO(name, element, etype)                                   \
 624    void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
 625    {                                                                   \
 626        int i;                                                          \
 627                                                                        \
 628        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
 629            etype x = (etype)a->element[i] + (etype)b->element[i] + 1;  \
 630            r->element[i] = x >> 1;                                     \
 631        }                                                               \
 632    }
 633
 634#define VAVG(type, signed_element, signed_type, unsigned_element,       \
 635             unsigned_type)                                             \
 636    VAVG_DO(avgs##type, signed_element, signed_type)                    \
 637    VAVG_DO(avgu##type, unsigned_element, unsigned_type)
 638VAVG(b, s8, int16_t, u8, uint16_t)
 639VAVG(h, s16, int32_t, u16, uint32_t)
 640VAVG(w, s32, int64_t, u32, uint64_t)
 641#undef VAVG_DO
 642#undef VAVG
 643
 644#define VABSDU_DO(name, element)                                        \
 645void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)           \
 646{                                                                       \
 647    int i;                                                              \
 648                                                                        \
 649    for (i = 0; i < ARRAY_SIZE(r->element); i++) {                      \
 650        r->element[i] = (a->element[i] > b->element[i]) ?               \
 651            (a->element[i] - b->element[i]) :                           \
 652            (b->element[i] - a->element[i]);                            \
 653    }                                                                   \
 654}
 655
 656/* VABSDU - Vector absolute difference unsigned
 657 *   name    - instruction mnemonic suffix (b: byte, h: halfword, w: word)
 658 *   element - element type to access from vector
 659 */
 660#define VABSDU(type, element)                   \
 661    VABSDU_DO(absdu##type, element)
 662VABSDU(b, u8)
 663VABSDU(h, u16)
 664VABSDU(w, u32)
 665#undef VABSDU_DO
 666#undef VABSDU
 667
 668#define VCF(suffix, cvt, element)                                       \
 669    void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r,             \
 670                            ppc_avr_t *b, uint32_t uim)                 \
 671    {                                                                   \
 672        int i;                                                          \
 673                                                                        \
 674        for (i = 0; i < ARRAY_SIZE(r->f); i++) {                        \
 675            float32 t = cvt(b->element[i], &env->vec_status);           \
 676            r->f[i] = float32_scalbn(t, -uim, &env->vec_status);        \
 677        }                                                               \
 678    }
 679VCF(ux, uint32_to_float32, u32)
 680VCF(sx, int32_to_float32, s32)
 681#undef VCF
 682
 683#define VCMP_DO(suffix, compare, element, record)                       \
 684    void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r,            \
 685                             ppc_avr_t *a, ppc_avr_t *b)                \
 686    {                                                                   \
 687        uint64_t ones = (uint64_t)-1;                                   \
 688        uint64_t all = ones;                                            \
 689        uint64_t none = 0;                                              \
 690        int i;                                                          \
 691                                                                        \
 692        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
 693            uint64_t result = (a->element[i] compare b->element[i] ?    \
 694                               ones : 0x0);                             \
 695            switch (sizeof(a->element[0])) {                            \
 696            case 8:                                                     \
 697                r->u64[i] = result;                                     \
 698                break;                                                  \
 699            case 4:                                                     \
 700                r->u32[i] = result;                                     \
 701                break;                                                  \
 702            case 2:                                                     \
 703                r->u16[i] = result;                                     \
 704                break;                                                  \
 705            case 1:                                                     \
 706                r->u8[i] = result;                                      \
 707                break;                                                  \
 708            }                                                           \
 709            all &= result;                                              \
 710            none |= result;                                             \
 711        }                                                               \
 712        if (record) {                                                   \
 713            env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1);       \
 714        }                                                               \
 715    }
 716#define VCMP(suffix, compare, element)          \
 717    VCMP_DO(suffix, compare, element, 0)        \
 718    VCMP_DO(suffix##_dot, compare, element, 1)
 719VCMP(equb, ==, u8)
 720VCMP(equh, ==, u16)
 721VCMP(equw, ==, u32)
 722VCMP(equd, ==, u64)
 723VCMP(gtub, >, u8)
 724VCMP(gtuh, >, u16)
 725VCMP(gtuw, >, u32)
 726VCMP(gtud, >, u64)
 727VCMP(gtsb, >, s8)
 728VCMP(gtsh, >, s16)
 729VCMP(gtsw, >, s32)
 730VCMP(gtsd, >, s64)
 731#undef VCMP_DO
 732#undef VCMP
 733
 734#define VCMPNE_DO(suffix, element, etype, cmpzero, record)              \
 735void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r,              \
 736                            ppc_avr_t *a, ppc_avr_t *b)                 \
 737{                                                                       \
 738    etype ones = (etype)-1;                                             \
 739    etype all = ones;                                                   \
 740    etype result, none = 0;                                             \
 741    int i;                                                              \
 742                                                                        \
 743    for (i = 0; i < ARRAY_SIZE(r->element); i++) {                      \
 744        if (cmpzero) {                                                  \
 745            result = ((a->element[i] == 0)                              \
 746                           || (b->element[i] == 0)                      \
 747                           || (a->element[i] != b->element[i]) ?        \
 748                           ones : 0x0);                                 \
 749        } else {                                                        \
 750            result = (a->element[i] != b->element[i]) ? ones : 0x0;     \
 751        }                                                               \
 752        r->element[i] = result;                                         \
 753        all &= result;                                                  \
 754        none |= result;                                                 \
 755    }                                                                   \
 756    if (record) {                                                       \
 757        env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1);           \
 758    }                                                                   \
 759}
 760
 761/* VCMPNEZ - Vector compare not equal to zero
 762 *   suffix  - instruction mnemonic suffix (b: byte, h: halfword, w: word)
 763 *   element - element type to access from vector
 764 */
 765#define VCMPNE(suffix, element, etype, cmpzero)         \
 766    VCMPNE_DO(suffix, element, etype, cmpzero, 0)       \
 767    VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1)
 768VCMPNE(zb, u8, uint8_t, 1)
 769VCMPNE(zh, u16, uint16_t, 1)
 770VCMPNE(zw, u32, uint32_t, 1)
 771VCMPNE(b, u8, uint8_t, 0)
 772VCMPNE(h, u16, uint16_t, 0)
 773VCMPNE(w, u32, uint32_t, 0)
 774#undef VCMPNE_DO
 775#undef VCMPNE
 776
 777#define VCMPFP_DO(suffix, compare, order, record)                       \
 778    void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r,            \
 779                             ppc_avr_t *a, ppc_avr_t *b)                \
 780    {                                                                   \
 781        uint32_t ones = (uint32_t)-1;                                   \
 782        uint32_t all = ones;                                            \
 783        uint32_t none = 0;                                              \
 784        int i;                                                          \
 785                                                                        \
 786        for (i = 0; i < ARRAY_SIZE(r->f); i++) {                        \
 787            uint32_t result;                                            \
 788            int rel = float32_compare_quiet(a->f[i], b->f[i],           \
 789                                            &env->vec_status);          \
 790            if (rel == float_relation_unordered) {                      \
 791                result = 0;                                             \
 792            } else if (rel compare order) {                             \
 793                result = ones;                                          \
 794            } else {                                                    \
 795                result = 0;                                             \
 796            }                                                           \
 797            r->u32[i] = result;                                         \
 798            all &= result;                                              \
 799            none |= result;                                             \
 800        }                                                               \
 801        if (record) {                                                   \
 802            env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1);       \
 803        }                                                               \
 804    }
 805#define VCMPFP(suffix, compare, order)          \
 806    VCMPFP_DO(suffix, compare, order, 0)        \
 807    VCMPFP_DO(suffix##_dot, compare, order, 1)
 808VCMPFP(eqfp, ==, float_relation_equal)
 809VCMPFP(gefp, !=, float_relation_less)
 810VCMPFP(gtfp, ==, float_relation_greater)
 811#undef VCMPFP_DO
 812#undef VCMPFP
 813
 814static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
 815                                    ppc_avr_t *a, ppc_avr_t *b, int record)
 816{
 817    int i;
 818    int all_in = 0;
 819
 820    for (i = 0; i < ARRAY_SIZE(r->f); i++) {
 821        int le_rel = float32_compare_quiet(a->f[i], b->f[i], &env->vec_status);
 822        if (le_rel == float_relation_unordered) {
 823            r->u32[i] = 0xc0000000;
 824            all_in = 1;
 825        } else {
 826            float32 bneg = float32_chs(b->f[i]);
 827            int ge_rel = float32_compare_quiet(a->f[i], bneg, &env->vec_status);
 828            int le = le_rel != float_relation_greater;
 829            int ge = ge_rel != float_relation_less;
 830
 831            r->u32[i] = ((!le) << 31) | ((!ge) << 30);
 832            all_in |= (!le | !ge);
 833        }
 834    }
 835    if (record) {
 836        env->crf[6] = (all_in == 0) << 1;
 837    }
 838}
 839
 840void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
 841{
 842    vcmpbfp_internal(env, r, a, b, 0);
 843}
 844
 845void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
 846                        ppc_avr_t *b)
 847{
 848    vcmpbfp_internal(env, r, a, b, 1);
 849}
 850
 851#define VCT(suffix, satcvt, element)                                    \
 852    void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r,             \
 853                            ppc_avr_t *b, uint32_t uim)                 \
 854    {                                                                   \
 855        int i;                                                          \
 856        int sat = 0;                                                    \
 857        float_status s = env->vec_status;                               \
 858                                                                        \
 859        set_float_rounding_mode(float_round_to_zero, &s);               \
 860        for (i = 0; i < ARRAY_SIZE(r->f); i++) {                        \
 861            if (float32_is_any_nan(b->f[i])) {                          \
 862                r->element[i] = 0;                                      \
 863            } else {                                                    \
 864                float64 t = float32_to_float64(b->f[i], &s);            \
 865                int64_t j;                                              \
 866                                                                        \
 867                t = float64_scalbn(t, uim, &s);                         \
 868                j = float64_to_int64(t, &s);                            \
 869                r->element[i] = satcvt(j, &sat);                        \
 870            }                                                           \
 871        }                                                               \
 872        if (sat) {                                                      \
 873            env->vscr |= (1 << VSCR_SAT);                               \
 874        }                                                               \
 875    }
 876VCT(uxs, cvtsduw, u32)
 877VCT(sxs, cvtsdsw, s32)
 878#undef VCT
 879
 880target_ulong helper_vclzlsbb(ppc_avr_t *r)
 881{
 882    target_ulong count = 0;
 883    int i;
 884    VECTOR_FOR_INORDER_I(i, u8) {
 885        if (r->u8[i] & 0x01) {
 886            break;
 887        }
 888        count++;
 889    }
 890    return count;
 891}
 892
 893target_ulong helper_vctzlsbb(ppc_avr_t *r)
 894{
 895    target_ulong count = 0;
 896    int i;
 897#if defined(HOST_WORDS_BIGENDIAN)
 898    for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
 899#else
 900    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
 901#endif
 902        if (r->u8[i] & 0x01) {
 903            break;
 904        }
 905        count++;
 906    }
 907    return count;
 908}
 909
 910void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
 911                      ppc_avr_t *b, ppc_avr_t *c)
 912{
 913    int sat = 0;
 914    int i;
 915
 916    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
 917        int32_t prod = a->s16[i] * b->s16[i];
 918        int32_t t = (int32_t)c->s16[i] + (prod >> 15);
 919
 920        r->s16[i] = cvtswsh(t, &sat);
 921    }
 922
 923    if (sat) {
 924        env->vscr |= (1 << VSCR_SAT);
 925    }
 926}
 927
 928void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
 929                       ppc_avr_t *b, ppc_avr_t *c)
 930{
 931    int sat = 0;
 932    int i;
 933
 934    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
 935        int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
 936        int32_t t = (int32_t)c->s16[i] + (prod >> 15);
 937        r->s16[i] = cvtswsh(t, &sat);
 938    }
 939
 940    if (sat) {
 941        env->vscr |= (1 << VSCR_SAT);
 942    }
 943}
 944
 945#define VMINMAX_DO(name, compare, element)                              \
 946    void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
 947    {                                                                   \
 948        int i;                                                          \
 949                                                                        \
 950        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
 951            if (a->element[i] compare b->element[i]) {                  \
 952                r->element[i] = b->element[i];                          \
 953            } else {                                                    \
 954                r->element[i] = a->element[i];                          \
 955            }                                                           \
 956        }                                                               \
 957    }
 958#define VMINMAX(suffix, element)                \
 959    VMINMAX_DO(min##suffix, >, element)         \
 960    VMINMAX_DO(max##suffix, <, element)
 961VMINMAX(sb, s8)
 962VMINMAX(sh, s16)
 963VMINMAX(sw, s32)
 964VMINMAX(sd, s64)
 965VMINMAX(ub, u8)
 966VMINMAX(uh, u16)
 967VMINMAX(uw, u32)
 968VMINMAX(ud, u64)
 969#undef VMINMAX_DO
 970#undef VMINMAX
 971
 972void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
 973{
 974    int i;
 975
 976    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
 977        int32_t prod = a->s16[i] * b->s16[i];
 978        r->s16[i] = (int16_t) (prod + c->s16[i]);
 979    }
 980}
 981
 982#define VMRG_DO(name, element, highp)                                   \
 983    void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
 984    {                                                                   \
 985        ppc_avr_t result;                                               \
 986        int i;                                                          \
 987        size_t n_elems = ARRAY_SIZE(r->element);                        \
 988                                                                        \
 989        for (i = 0; i < n_elems / 2; i++) {                             \
 990            if (highp) {                                                \
 991                result.element[i*2+HI_IDX] = a->element[i];             \
 992                result.element[i*2+LO_IDX] = b->element[i];             \
 993            } else {                                                    \
 994                result.element[n_elems - i * 2 - (1 + HI_IDX)] =        \
 995                    b->element[n_elems - i - 1];                        \
 996                result.element[n_elems - i * 2 - (1 + LO_IDX)] =        \
 997                    a->element[n_elems - i - 1];                        \
 998            }                                                           \
 999        }                                                               \
1000        *r = result;                                                    \
1001    }
1002#if defined(HOST_WORDS_BIGENDIAN)
1003#define MRGHI 0
1004#define MRGLO 1
1005#else
1006#define MRGHI 1
1007#define MRGLO 0
1008#endif
1009#define VMRG(suffix, element)                   \
1010    VMRG_DO(mrgl##suffix, element, MRGHI)       \
1011    VMRG_DO(mrgh##suffix, element, MRGLO)
1012VMRG(b, u8)
1013VMRG(h, u16)
1014VMRG(w, u32)
1015#undef VMRG_DO
1016#undef VMRG
1017#undef MRGHI
1018#undef MRGLO
1019
1020void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1021                     ppc_avr_t *b, ppc_avr_t *c)
1022{
1023    int32_t prod[16];
1024    int i;
1025
1026    for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
1027        prod[i] = (int32_t)a->s8[i] * b->u8[i];
1028    }
1029
1030    VECTOR_FOR_INORDER_I(i, s32) {
1031        r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
1032            prod[4 * i + 2] + prod[4 * i + 3];
1033    }
1034}
1035
1036void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1037                     ppc_avr_t *b, ppc_avr_t *c)
1038{
1039    int32_t prod[8];
1040    int i;
1041
1042    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1043        prod[i] = a->s16[i] * b->s16[i];
1044    }
1045
1046    VECTOR_FOR_INORDER_I(i, s32) {
1047        r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1048    }
1049}
1050
1051void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1052                     ppc_avr_t *b, ppc_avr_t *c)
1053{
1054    int32_t prod[8];
1055    int i;
1056    int sat = 0;
1057
1058    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1059        prod[i] = (int32_t)a->s16[i] * b->s16[i];
1060    }
1061
1062    VECTOR_FOR_INORDER_I(i, s32) {
1063        int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1064
1065        r->u32[i] = cvtsdsw(t, &sat);
1066    }
1067
1068    if (sat) {
1069        env->vscr |= (1 << VSCR_SAT);
1070    }
1071}
1072
1073void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1074                     ppc_avr_t *b, ppc_avr_t *c)
1075{
1076    uint16_t prod[16];
1077    int i;
1078
1079    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1080        prod[i] = a->u8[i] * b->u8[i];
1081    }
1082
1083    VECTOR_FOR_INORDER_I(i, u32) {
1084        r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
1085            prod[4 * i + 2] + prod[4 * i + 3];
1086    }
1087}
1088
1089void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1090                     ppc_avr_t *b, ppc_avr_t *c)
1091{
1092    uint32_t prod[8];
1093    int i;
1094
1095    for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1096        prod[i] = a->u16[i] * b->u16[i];
1097    }
1098
1099    VECTOR_FOR_INORDER_I(i, u32) {
1100        r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1101    }
1102}
1103
1104void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1105                     ppc_avr_t *b, ppc_avr_t *c)
1106{
1107    uint32_t prod[8];
1108    int i;
1109    int sat = 0;
1110
1111    for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1112        prod[i] = a->u16[i] * b->u16[i];
1113    }
1114
1115    VECTOR_FOR_INORDER_I(i, s32) {
1116        uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1117
1118        r->u32[i] = cvtuduw(t, &sat);
1119    }
1120
1121    if (sat) {
1122        env->vscr |= (1 << VSCR_SAT);
1123    }
1124}
1125
1126#define VMUL_DO(name, mul_element, prod_element, cast, evenp)           \
1127    void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
1128    {                                                                   \
1129        int i;                                                          \
1130                                                                        \
1131        VECTOR_FOR_INORDER_I(i, prod_element) {                         \
1132            if (evenp) {                                                \
1133                r->prod_element[i] =                                    \
1134                    (cast)a->mul_element[i * 2 + HI_IDX] *              \
1135                    (cast)b->mul_element[i * 2 + HI_IDX];               \
1136            } else {                                                    \
1137                r->prod_element[i] =                                    \
1138                    (cast)a->mul_element[i * 2 + LO_IDX] *              \
1139                    (cast)b->mul_element[i * 2 + LO_IDX];               \
1140            }                                                           \
1141        }                                                               \
1142    }
1143#define VMUL(suffix, mul_element, prod_element, cast)            \
1144    VMUL_DO(mule##suffix, mul_element, prod_element, cast, 1)    \
1145    VMUL_DO(mulo##suffix, mul_element, prod_element, cast, 0)
1146VMUL(sb, s8, s16, int16_t)
1147VMUL(sh, s16, s32, int32_t)
1148VMUL(sw, s32, s64, int64_t)
1149VMUL(ub, u8, u16, uint16_t)
1150VMUL(uh, u16, u32, uint32_t)
1151VMUL(uw, u32, u64, uint64_t)
1152#undef VMUL_DO
1153#undef VMUL
1154
1155void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1156                  ppc_avr_t *c)
1157{
1158    ppc_avr_t result;
1159    int i;
1160
1161    VECTOR_FOR_INORDER_I(i, u8) {
1162        int s = c->u8[i] & 0x1f;
1163#if defined(HOST_WORDS_BIGENDIAN)
1164        int index = s & 0xf;
1165#else
1166        int index = 15 - (s & 0xf);
1167#endif
1168
1169        if (s & 0x10) {
1170            result.u8[i] = b->u8[index];
1171        } else {
1172            result.u8[i] = a->u8[index];
1173        }
1174    }
1175    *r = result;
1176}
1177
1178void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1179                  ppc_avr_t *c)
1180{
1181    ppc_avr_t result;
1182    int i;
1183
1184    VECTOR_FOR_INORDER_I(i, u8) {
1185        int s = c->u8[i] & 0x1f;
1186#if defined(HOST_WORDS_BIGENDIAN)
1187        int index = 15 - (s & 0xf);
1188#else
1189        int index = s & 0xf;
1190#endif
1191
1192        if (s & 0x10) {
1193            result.u8[i] = a->u8[index];
1194        } else {
1195            result.u8[i] = b->u8[index];
1196        }
1197    }
1198    *r = result;
1199}
1200
1201#if defined(HOST_WORDS_BIGENDIAN)
1202#define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1203#define VBPERMD_INDEX(i) (i)
1204#define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1205#define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
1206#else
1207#define VBPERMQ_INDEX(avr, i) ((avr)->u8[15-(i)])
1208#define VBPERMD_INDEX(i) (1 - i)
1209#define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1210#define EXTRACT_BIT(avr, i, index) \
1211        (extract64((avr)->u64[1 - i], 63 - index, 1))
1212#endif
1213
1214void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1215{
1216    int i, j;
1217    ppc_avr_t result = { .u64 = { 0, 0 } };
1218    VECTOR_FOR_INORDER_I(i, u64) {
1219        for (j = 0; j < 8; j++) {
1220            int index = VBPERMQ_INDEX(b, (i * 8) + j);
1221            if (index < 64 && EXTRACT_BIT(a, i, index)) {
1222                result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1223            }
1224        }
1225    }
1226    *r = result;
1227}
1228
1229void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1230{
1231    int i;
1232    uint64_t perm = 0;
1233
1234    VECTOR_FOR_INORDER_I(i, u8) {
1235        int index = VBPERMQ_INDEX(b, i);
1236
1237        if (index < 128) {
1238            uint64_t mask = (1ull << (63-(index & 0x3F)));
1239            if (a->u64[VBPERMQ_DW(index)] & mask) {
1240                perm |= (0x8000 >> i);
1241            }
1242        }
1243    }
1244
1245    r->u64[HI_IDX] = perm;
1246    r->u64[LO_IDX] = 0;
1247}
1248
1249#undef VBPERMQ_INDEX
1250#undef VBPERMQ_DW
1251
1252static const uint64_t VGBBD_MASKS[256] = {
1253    0x0000000000000000ull, /* 00 */
1254    0x0000000000000080ull, /* 01 */
1255    0x0000000000008000ull, /* 02 */
1256    0x0000000000008080ull, /* 03 */
1257    0x0000000000800000ull, /* 04 */
1258    0x0000000000800080ull, /* 05 */
1259    0x0000000000808000ull, /* 06 */
1260    0x0000000000808080ull, /* 07 */
1261    0x0000000080000000ull, /* 08 */
1262    0x0000000080000080ull, /* 09 */
1263    0x0000000080008000ull, /* 0A */
1264    0x0000000080008080ull, /* 0B */
1265    0x0000000080800000ull, /* 0C */
1266    0x0000000080800080ull, /* 0D */
1267    0x0000000080808000ull, /* 0E */
1268    0x0000000080808080ull, /* 0F */
1269    0x0000008000000000ull, /* 10 */
1270    0x0000008000000080ull, /* 11 */
1271    0x0000008000008000ull, /* 12 */
1272    0x0000008000008080ull, /* 13 */
1273    0x0000008000800000ull, /* 14 */
1274    0x0000008000800080ull, /* 15 */
1275    0x0000008000808000ull, /* 16 */
1276    0x0000008000808080ull, /* 17 */
1277    0x0000008080000000ull, /* 18 */
1278    0x0000008080000080ull, /* 19 */
1279    0x0000008080008000ull, /* 1A */
1280    0x0000008080008080ull, /* 1B */
1281    0x0000008080800000ull, /* 1C */
1282    0x0000008080800080ull, /* 1D */
1283    0x0000008080808000ull, /* 1E */
1284    0x0000008080808080ull, /* 1F */
1285    0x0000800000000000ull, /* 20 */
1286    0x0000800000000080ull, /* 21 */
1287    0x0000800000008000ull, /* 22 */
1288    0x0000800000008080ull, /* 23 */
1289    0x0000800000800000ull, /* 24 */
1290    0x0000800000800080ull, /* 25 */
1291    0x0000800000808000ull, /* 26 */
1292    0x0000800000808080ull, /* 27 */
1293    0x0000800080000000ull, /* 28 */
1294    0x0000800080000080ull, /* 29 */
1295    0x0000800080008000ull, /* 2A */
1296    0x0000800080008080ull, /* 2B */
1297    0x0000800080800000ull, /* 2C */
1298    0x0000800080800080ull, /* 2D */
1299    0x0000800080808000ull, /* 2E */
1300    0x0000800080808080ull, /* 2F */
1301    0x0000808000000000ull, /* 30 */
1302    0x0000808000000080ull, /* 31 */
1303    0x0000808000008000ull, /* 32 */
1304    0x0000808000008080ull, /* 33 */
1305    0x0000808000800000ull, /* 34 */
1306    0x0000808000800080ull, /* 35 */
1307    0x0000808000808000ull, /* 36 */
1308    0x0000808000808080ull, /* 37 */
1309    0x0000808080000000ull, /* 38 */
1310    0x0000808080000080ull, /* 39 */
1311    0x0000808080008000ull, /* 3A */
1312    0x0000808080008080ull, /* 3B */
1313    0x0000808080800000ull, /* 3C */
1314    0x0000808080800080ull, /* 3D */
1315    0x0000808080808000ull, /* 3E */
1316    0x0000808080808080ull, /* 3F */
1317    0x0080000000000000ull, /* 40 */
1318    0x0080000000000080ull, /* 41 */
1319    0x0080000000008000ull, /* 42 */
1320    0x0080000000008080ull, /* 43 */
1321    0x0080000000800000ull, /* 44 */
1322    0x0080000000800080ull, /* 45 */
1323    0x0080000000808000ull, /* 46 */
1324    0x0080000000808080ull, /* 47 */
1325    0x0080000080000000ull, /* 48 */
1326    0x0080000080000080ull, /* 49 */
1327    0x0080000080008000ull, /* 4A */
1328    0x0080000080008080ull, /* 4B */
1329    0x0080000080800000ull, /* 4C */
1330    0x0080000080800080ull, /* 4D */
1331    0x0080000080808000ull, /* 4E */
1332    0x0080000080808080ull, /* 4F */
1333    0x0080008000000000ull, /* 50 */
1334    0x0080008000000080ull, /* 51 */
1335    0x0080008000008000ull, /* 52 */
1336    0x0080008000008080ull, /* 53 */
1337    0x0080008000800000ull, /* 54 */
1338    0x0080008000800080ull, /* 55 */
1339    0x0080008000808000ull, /* 56 */
1340    0x0080008000808080ull, /* 57 */
1341    0x0080008080000000ull, /* 58 */
1342    0x0080008080000080ull, /* 59 */
1343    0x0080008080008000ull, /* 5A */
1344    0x0080008080008080ull, /* 5B */
1345    0x0080008080800000ull, /* 5C */
1346    0x0080008080800080ull, /* 5D */
1347    0x0080008080808000ull, /* 5E */
1348    0x0080008080808080ull, /* 5F */
1349    0x0080800000000000ull, /* 60 */
1350    0x0080800000000080ull, /* 61 */
1351    0x0080800000008000ull, /* 62 */
1352    0x0080800000008080ull, /* 63 */
1353    0x0080800000800000ull, /* 64 */
1354    0x0080800000800080ull, /* 65 */
1355    0x0080800000808000ull, /* 66 */
1356    0x0080800000808080ull, /* 67 */
1357    0x0080800080000000ull, /* 68 */
1358    0x0080800080000080ull, /* 69 */
1359    0x0080800080008000ull, /* 6A */
1360    0x0080800080008080ull, /* 6B */
1361    0x0080800080800000ull, /* 6C */
1362    0x0080800080800080ull, /* 6D */
1363    0x0080800080808000ull, /* 6E */
1364    0x0080800080808080ull, /* 6F */
1365    0x0080808000000000ull, /* 70 */
1366    0x0080808000000080ull, /* 71 */
1367    0x0080808000008000ull, /* 72 */
1368    0x0080808000008080ull, /* 73 */
1369    0x0080808000800000ull, /* 74 */
1370    0x0080808000800080ull, /* 75 */
1371    0x0080808000808000ull, /* 76 */
1372    0x0080808000808080ull, /* 77 */
1373    0x0080808080000000ull, /* 78 */
1374    0x0080808080000080ull, /* 79 */
1375    0x0080808080008000ull, /* 7A */
1376    0x0080808080008080ull, /* 7B */
1377    0x0080808080800000ull, /* 7C */
1378    0x0080808080800080ull, /* 7D */
1379    0x0080808080808000ull, /* 7E */
1380    0x0080808080808080ull, /* 7F */
1381    0x8000000000000000ull, /* 80 */
1382    0x8000000000000080ull, /* 81 */
1383    0x8000000000008000ull, /* 82 */
1384    0x8000000000008080ull, /* 83 */
1385    0x8000000000800000ull, /* 84 */
1386    0x8000000000800080ull, /* 85 */
1387    0x8000000000808000ull, /* 86 */
1388    0x8000000000808080ull, /* 87 */
1389    0x8000000080000000ull, /* 88 */
1390    0x8000000080000080ull, /* 89 */
1391    0x8000000080008000ull, /* 8A */
1392    0x8000000080008080ull, /* 8B */
1393    0x8000000080800000ull, /* 8C */
1394    0x8000000080800080ull, /* 8D */
1395    0x8000000080808000ull, /* 8E */
1396    0x8000000080808080ull, /* 8F */
1397    0x8000008000000000ull, /* 90 */
1398    0x8000008000000080ull, /* 91 */
1399    0x8000008000008000ull, /* 92 */
1400    0x8000008000008080ull, /* 93 */
1401    0x8000008000800000ull, /* 94 */
1402    0x8000008000800080ull, /* 95 */
1403    0x8000008000808000ull, /* 96 */
1404    0x8000008000808080ull, /* 97 */
1405    0x8000008080000000ull, /* 98 */
1406    0x8000008080000080ull, /* 99 */
1407    0x8000008080008000ull, /* 9A */
1408    0x8000008080008080ull, /* 9B */
1409    0x8000008080800000ull, /* 9C */
1410    0x8000008080800080ull, /* 9D */
1411    0x8000008080808000ull, /* 9E */
1412    0x8000008080808080ull, /* 9F */
1413    0x8000800000000000ull, /* A0 */
1414    0x8000800000000080ull, /* A1 */
1415    0x8000800000008000ull, /* A2 */
1416    0x8000800000008080ull, /* A3 */
1417    0x8000800000800000ull, /* A4 */
1418    0x8000800000800080ull, /* A5 */
1419    0x8000800000808000ull, /* A6 */
1420    0x8000800000808080ull, /* A7 */
1421    0x8000800080000000ull, /* A8 */
1422    0x8000800080000080ull, /* A9 */
1423    0x8000800080008000ull, /* AA */
1424    0x8000800080008080ull, /* AB */
1425    0x8000800080800000ull, /* AC */
1426    0x8000800080800080ull, /* AD */
1427    0x8000800080808000ull, /* AE */
1428    0x8000800080808080ull, /* AF */
1429    0x8000808000000000ull, /* B0 */
1430    0x8000808000000080ull, /* B1 */
1431    0x8000808000008000ull, /* B2 */
1432    0x8000808000008080ull, /* B3 */
1433    0x8000808000800000ull, /* B4 */
1434    0x8000808000800080ull, /* B5 */
1435    0x8000808000808000ull, /* B6 */
1436    0x8000808000808080ull, /* B7 */
1437    0x8000808080000000ull, /* B8 */
1438    0x8000808080000080ull, /* B9 */
1439    0x8000808080008000ull, /* BA */
1440    0x8000808080008080ull, /* BB */
1441    0x8000808080800000ull, /* BC */
1442    0x8000808080800080ull, /* BD */
1443    0x8000808080808000ull, /* BE */
1444    0x8000808080808080ull, /* BF */
1445    0x8080000000000000ull, /* C0 */
1446    0x8080000000000080ull, /* C1 */
1447    0x8080000000008000ull, /* C2 */
1448    0x8080000000008080ull, /* C3 */
1449    0x8080000000800000ull, /* C4 */
1450    0x8080000000800080ull, /* C5 */
1451    0x8080000000808000ull, /* C6 */
1452    0x8080000000808080ull, /* C7 */
1453    0x8080000080000000ull, /* C8 */
1454    0x8080000080000080ull, /* C9 */
1455    0x8080000080008000ull, /* CA */
1456    0x8080000080008080ull, /* CB */
1457    0x8080000080800000ull, /* CC */
1458    0x8080000080800080ull, /* CD */
1459    0x8080000080808000ull, /* CE */
1460    0x8080000080808080ull, /* CF */
1461    0x8080008000000000ull, /* D0 */
1462    0x8080008000000080ull, /* D1 */
1463    0x8080008000008000ull, /* D2 */
1464    0x8080008000008080ull, /* D3 */
1465    0x8080008000800000ull, /* D4 */
1466    0x8080008000800080ull, /* D5 */
1467    0x8080008000808000ull, /* D6 */
1468    0x8080008000808080ull, /* D7 */
1469    0x8080008080000000ull, /* D8 */
1470    0x8080008080000080ull, /* D9 */
1471    0x8080008080008000ull, /* DA */
1472    0x8080008080008080ull, /* DB */
1473    0x8080008080800000ull, /* DC */
1474    0x8080008080800080ull, /* DD */
1475    0x8080008080808000ull, /* DE */
1476    0x8080008080808080ull, /* DF */
1477    0x8080800000000000ull, /* E0 */
1478    0x8080800000000080ull, /* E1 */
1479    0x8080800000008000ull, /* E2 */
1480    0x8080800000008080ull, /* E3 */
1481    0x8080800000800000ull, /* E4 */
1482    0x8080800000800080ull, /* E5 */
1483    0x8080800000808000ull, /* E6 */
1484    0x8080800000808080ull, /* E7 */
1485    0x8080800080000000ull, /* E8 */
1486    0x8080800080000080ull, /* E9 */
1487    0x8080800080008000ull, /* EA */
1488    0x8080800080008080ull, /* EB */
1489    0x8080800080800000ull, /* EC */
1490    0x8080800080800080ull, /* ED */
1491    0x8080800080808000ull, /* EE */
1492    0x8080800080808080ull, /* EF */
1493    0x8080808000000000ull, /* F0 */
1494    0x8080808000000080ull, /* F1 */
1495    0x8080808000008000ull, /* F2 */
1496    0x8080808000008080ull, /* F3 */
1497    0x8080808000800000ull, /* F4 */
1498    0x8080808000800080ull, /* F5 */
1499    0x8080808000808000ull, /* F6 */
1500    0x8080808000808080ull, /* F7 */
1501    0x8080808080000000ull, /* F8 */
1502    0x8080808080000080ull, /* F9 */
1503    0x8080808080008000ull, /* FA */
1504    0x8080808080008080ull, /* FB */
1505    0x8080808080800000ull, /* FC */
1506    0x8080808080800080ull, /* FD */
1507    0x8080808080808000ull, /* FE */
1508    0x8080808080808080ull, /* FF */
1509};
1510
1511void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b)
1512{
1513    int i;
1514    uint64_t t[2] = { 0, 0 };
1515
1516    VECTOR_FOR_INORDER_I(i, u8) {
1517#if defined(HOST_WORDS_BIGENDIAN)
1518        t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7);
1519#else
1520        t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (7-(i & 7));
1521#endif
1522    }
1523
1524    r->u64[0] = t[0];
1525    r->u64[1] = t[1];
1526}
1527
1528#define PMSUM(name, srcfld, trgfld, trgtyp)                   \
1529void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)  \
1530{                                                             \
1531    int i, j;                                                 \
1532    trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])];      \
1533                                                              \
1534    VECTOR_FOR_INORDER_I(i, srcfld) {                         \
1535        prod[i] = 0;                                          \
1536        for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) {      \
1537            if (a->srcfld[i] & (1ull<<j)) {                   \
1538                prod[i] ^= ((trgtyp)b->srcfld[i] << j);       \
1539            }                                                 \
1540        }                                                     \
1541    }                                                         \
1542                                                              \
1543    VECTOR_FOR_INORDER_I(i, trgfld) {                         \
1544        r->trgfld[i] = prod[2*i] ^ prod[2*i+1];               \
1545    }                                                         \
1546}
1547
1548PMSUM(vpmsumb, u8, u16, uint16_t)
1549PMSUM(vpmsumh, u16, u32, uint32_t)
1550PMSUM(vpmsumw, u32, u64, uint64_t)
1551
1552void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1553{
1554
1555#ifdef CONFIG_INT128
1556    int i, j;
1557    __uint128_t prod[2];
1558
1559    VECTOR_FOR_INORDER_I(i, u64) {
1560        prod[i] = 0;
1561        for (j = 0; j < 64; j++) {
1562            if (a->u64[i] & (1ull<<j)) {
1563                prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1564            }
1565        }
1566    }
1567
1568    r->u128 = prod[0] ^ prod[1];
1569
1570#else
1571    int i, j;
1572    ppc_avr_t prod[2];
1573
1574    VECTOR_FOR_INORDER_I(i, u64) {
1575        prod[i].u64[LO_IDX] = prod[i].u64[HI_IDX] = 0;
1576        for (j = 0; j < 64; j++) {
1577            if (a->u64[i] & (1ull<<j)) {
1578                ppc_avr_t bshift;
1579                if (j == 0) {
1580                    bshift.u64[HI_IDX] = 0;
1581                    bshift.u64[LO_IDX] = b->u64[i];
1582                } else {
1583                    bshift.u64[HI_IDX] = b->u64[i] >> (64-j);
1584                    bshift.u64[LO_IDX] = b->u64[i] << j;
1585                }
1586                prod[i].u64[LO_IDX] ^= bshift.u64[LO_IDX];
1587                prod[i].u64[HI_IDX] ^= bshift.u64[HI_IDX];
1588            }
1589        }
1590    }
1591
1592    r->u64[LO_IDX] = prod[0].u64[LO_IDX] ^ prod[1].u64[LO_IDX];
1593    r->u64[HI_IDX] = prod[0].u64[HI_IDX] ^ prod[1].u64[HI_IDX];
1594#endif
1595}
1596
1597
1598#if defined(HOST_WORDS_BIGENDIAN)
1599#define PKBIG 1
1600#else
1601#define PKBIG 0
1602#endif
1603void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1604{
1605    int i, j;
1606    ppc_avr_t result;
1607#if defined(HOST_WORDS_BIGENDIAN)
1608    const ppc_avr_t *x[2] = { a, b };
1609#else
1610    const ppc_avr_t *x[2] = { b, a };
1611#endif
1612
1613    VECTOR_FOR_INORDER_I(i, u64) {
1614        VECTOR_FOR_INORDER_I(j, u32) {
1615            uint32_t e = x[i]->u32[j];
1616
1617            result.u16[4*i+j] = (((e >> 9) & 0xfc00) |
1618                                 ((e >> 6) & 0x3e0) |
1619                                 ((e >> 3) & 0x1f));
1620        }
1621    }
1622    *r = result;
1623}
1624
1625#define VPK(suffix, from, to, cvt, dosat)                               \
1626    void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r,             \
1627                            ppc_avr_t *a, ppc_avr_t *b)                 \
1628    {                                                                   \
1629        int i;                                                          \
1630        int sat = 0;                                                    \
1631        ppc_avr_t result;                                               \
1632        ppc_avr_t *a0 = PKBIG ? a : b;                                  \
1633        ppc_avr_t *a1 = PKBIG ? b : a;                                  \
1634                                                                        \
1635        VECTOR_FOR_INORDER_I(i, from) {                                 \
1636            result.to[i] = cvt(a0->from[i], &sat);                      \
1637            result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);  \
1638        }                                                               \
1639        *r = result;                                                    \
1640        if (dosat && sat) {                                             \
1641            env->vscr |= (1 << VSCR_SAT);                               \
1642        }                                                               \
1643    }
1644#define I(x, y) (x)
1645VPK(shss, s16, s8, cvtshsb, 1)
1646VPK(shus, s16, u8, cvtshub, 1)
1647VPK(swss, s32, s16, cvtswsh, 1)
1648VPK(swus, s32, u16, cvtswuh, 1)
1649VPK(sdss, s64, s32, cvtsdsw, 1)
1650VPK(sdus, s64, u32, cvtsduw, 1)
1651VPK(uhus, u16, u8, cvtuhub, 1)
1652VPK(uwus, u32, u16, cvtuwuh, 1)
1653VPK(udus, u64, u32, cvtuduw, 1)
1654VPK(uhum, u16, u8, I, 0)
1655VPK(uwum, u32, u16, I, 0)
1656VPK(udum, u64, u32, I, 0)
1657#undef I
1658#undef VPK
1659#undef PKBIG
1660
1661void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1662{
1663    int i;
1664
1665    for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1666        r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status);
1667    }
1668}
1669
1670#define VRFI(suffix, rounding)                                  \
1671    void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r,    \
1672                             ppc_avr_t *b)                      \
1673    {                                                           \
1674        int i;                                                  \
1675        float_status s = env->vec_status;                       \
1676                                                                \
1677        set_float_rounding_mode(rounding, &s);                  \
1678        for (i = 0; i < ARRAY_SIZE(r->f); i++) {                \
1679            r->f[i] = float32_round_to_int (b->f[i], &s);       \
1680        }                                                       \
1681    }
1682VRFI(n, float_round_nearest_even)
1683VRFI(m, float_round_down)
1684VRFI(p, float_round_up)
1685VRFI(z, float_round_to_zero)
1686#undef VRFI
1687
1688#define VROTATE(suffix, element, mask)                                  \
1689    void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)   \
1690    {                                                                   \
1691        int i;                                                          \
1692                                                                        \
1693        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
1694            unsigned int shift = b->element[i] & mask;                  \
1695            r->element[i] = (a->element[i] << shift) |                  \
1696                (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1697        }                                                               \
1698    }
1699VROTATE(b, u8, 0x7)
1700VROTATE(h, u16, 0xF)
1701VROTATE(w, u32, 0x1F)
1702VROTATE(d, u64, 0x3F)
1703#undef VROTATE
1704
1705void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1706{
1707    int i;
1708
1709    for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1710        float32 t = float32_sqrt(b->f[i], &env->vec_status);
1711
1712        r->f[i] = float32_div(float32_one, t, &env->vec_status);
1713    }
1714}
1715
1716#define VRLMI(name, size, element, insert)                            \
1717void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)          \
1718{                                                                     \
1719    int i;                                                            \
1720    for (i = 0; i < ARRAY_SIZE(r->element); i++) {                    \
1721        uint##size##_t src1 = a->element[i];                          \
1722        uint##size##_t src2 = b->element[i];                          \
1723        uint##size##_t src3 = r->element[i];                          \
1724        uint##size##_t begin, end, shift, mask, rot_val;              \
1725                                                                      \
1726        shift = extract##size(src2, 0, 6);                            \
1727        end   = extract##size(src2, 8, 6);                            \
1728        begin = extract##size(src2, 16, 6);                           \
1729        rot_val = rol##size(src1, shift);                             \
1730        mask = mask_u##size(begin, end);                              \
1731        if (insert) {                                                 \
1732            r->element[i] = (rot_val & mask) | (src3 & ~mask);        \
1733        } else {                                                      \
1734            r->element[i] = (rot_val & mask);                         \
1735        }                                                             \
1736    }                                                                 \
1737}
1738
1739VRLMI(vrldmi, 64, u64, 1);
1740VRLMI(vrlwmi, 32, u32, 1);
1741VRLMI(vrldnm, 64, u64, 0);
1742VRLMI(vrlwnm, 32, u32, 0);
1743
1744void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1745                 ppc_avr_t *c)
1746{
1747    r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1748    r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1749}
1750
1751void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1752{
1753    int i;
1754
1755    for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1756        r->f[i] = float32_exp2(b->f[i], &env->vec_status);
1757    }
1758}
1759
1760void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1761{
1762    int i;
1763
1764    for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1765        r->f[i] = float32_log2(b->f[i], &env->vec_status);
1766    }
1767}
1768
1769#if defined(HOST_WORDS_BIGENDIAN)
1770#define VEXTU_X_DO(name, size, left)                                \
1771    target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b)  \
1772    {                                                               \
1773        int index;                                                  \
1774        if (left) {                                                 \
1775            index = (a & 0xf) * 8;                                  \
1776        } else {                                                    \
1777            index = ((15 - (a & 0xf) + 1) * 8) - size;              \
1778        }                                                           \
1779        return int128_getlo(int128_rshift(b->s128, index)) &        \
1780            MAKE_64BIT_MASK(0, size);                               \
1781    }
1782#else
1783#define VEXTU_X_DO(name, size, left)                                \
1784    target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b)  \
1785    {                                                               \
1786        int index;                                                  \
1787        if (left) {                                                 \
1788            index = ((15 - (a & 0xf) + 1) * 8) - size;              \
1789        } else {                                                    \
1790            index = (a & 0xf) * 8;                                  \
1791        }                                                           \
1792        return int128_getlo(int128_rshift(b->s128, index)) &        \
1793            MAKE_64BIT_MASK(0, size);                               \
1794    }
1795#endif
1796
1797VEXTU_X_DO(vextublx,  8, 1)
1798VEXTU_X_DO(vextuhlx, 16, 1)
1799VEXTU_X_DO(vextuwlx, 32, 1)
1800VEXTU_X_DO(vextubrx,  8, 0)
1801VEXTU_X_DO(vextuhrx, 16, 0)
1802VEXTU_X_DO(vextuwrx, 32, 0)
1803#undef VEXTU_X_DO
1804
1805/* The specification says that the results are undefined if all of the
1806 * shift counts are not identical.  We check to make sure that they are
1807 * to conform to what real hardware appears to do.  */
1808#define VSHIFT(suffix, leftp)                                           \
1809    void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)    \
1810    {                                                                   \
1811        int shift = b->u8[LO_IDX*15] & 0x7;                             \
1812        int doit = 1;                                                   \
1813        int i;                                                          \
1814                                                                        \
1815        for (i = 0; i < ARRAY_SIZE(r->u8); i++) {                       \
1816            doit = doit && ((b->u8[i] & 0x7) == shift);                 \
1817        }                                                               \
1818        if (doit) {                                                     \
1819            if (shift == 0) {                                           \
1820                *r = *a;                                                \
1821            } else if (leftp) {                                         \
1822                uint64_t carry = a->u64[LO_IDX] >> (64 - shift);        \
1823                                                                        \
1824                r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry;     \
1825                r->u64[LO_IDX] = a->u64[LO_IDX] << shift;               \
1826            } else {                                                    \
1827                uint64_t carry = a->u64[HI_IDX] << (64 - shift);        \
1828                                                                        \
1829                r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry;     \
1830                r->u64[HI_IDX] = a->u64[HI_IDX] >> shift;               \
1831            }                                                           \
1832        }                                                               \
1833    }
1834VSHIFT(l, 1)
1835VSHIFT(r, 0)
1836#undef VSHIFT
1837
1838#define VSL(suffix, element, mask)                                      \
1839    void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)   \
1840    {                                                                   \
1841        int i;                                                          \
1842                                                                        \
1843        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
1844            unsigned int shift = b->element[i] & mask;                  \
1845                                                                        \
1846            r->element[i] = a->element[i] << shift;                     \
1847        }                                                               \
1848    }
1849VSL(b, u8, 0x7)
1850VSL(h, u16, 0x0F)
1851VSL(w, u32, 0x1F)
1852VSL(d, u64, 0x3F)
1853#undef VSL
1854
1855void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1856{
1857    int i;
1858    unsigned int shift, bytes, size;
1859
1860    size = ARRAY_SIZE(r->u8);
1861    for (i = 0; i < size; i++) {
1862        shift = b->u8[i] & 0x7;             /* extract shift value */
1863        bytes = (a->u8[i] << 8) +             /* extract adjacent bytes */
1864            (((i + 1) < size) ? a->u8[i + 1] : 0);
1865        r->u8[i] = (bytes << shift) >> 8;   /* shift and store result */
1866    }
1867}
1868
1869void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1870{
1871    int i;
1872    unsigned int shift, bytes;
1873
1874    /* Use reverse order, as destination and source register can be same. Its
1875     * being modified in place saving temporary, reverse order will guarantee
1876     * that computed result is not fed back.
1877     */
1878    for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1879        shift = b->u8[i] & 0x7;                 /* extract shift value */
1880        bytes = ((i ? a->u8[i - 1] : 0) << 8) + a->u8[i];
1881                                                /* extract adjacent bytes */
1882        r->u8[i] = (bytes >> shift) & 0xFF;     /* shift and store result */
1883    }
1884}
1885
1886void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1887{
1888    int sh = shift & 0xf;
1889    int i;
1890    ppc_avr_t result;
1891
1892#if defined(HOST_WORDS_BIGENDIAN)
1893    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1894        int index = sh + i;
1895        if (index > 0xf) {
1896            result.u8[i] = b->u8[index - 0x10];
1897        } else {
1898            result.u8[i] = a->u8[index];
1899        }
1900    }
1901#else
1902    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1903        int index = (16 - sh) + i;
1904        if (index > 0xf) {
1905            result.u8[i] = a->u8[index - 0x10];
1906        } else {
1907            result.u8[i] = b->u8[index];
1908        }
1909    }
1910#endif
1911    *r = result;
1912}
1913
1914void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1915{
1916    int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf;
1917
1918#if defined(HOST_WORDS_BIGENDIAN)
1919    memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1920    memset(&r->u8[16-sh], 0, sh);
1921#else
1922    memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1923    memset(&r->u8[0], 0, sh);
1924#endif
1925}
1926
1927/* Experimental testing shows that hardware masks the immediate.  */
1928#define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1))
1929#if defined(HOST_WORDS_BIGENDIAN)
1930#define SPLAT_ELEMENT(element) _SPLAT_MASKED(element)
1931#else
1932#define SPLAT_ELEMENT(element)                                  \
1933    (ARRAY_SIZE(r->element) - 1 - _SPLAT_MASKED(element))
1934#endif
1935#define VSPLT(suffix, element)                                          \
1936    void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \
1937    {                                                                   \
1938        uint32_t s = b->element[SPLAT_ELEMENT(element)];                \
1939        int i;                                                          \
1940                                                                        \
1941        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
1942            r->element[i] = s;                                          \
1943        }                                                               \
1944    }
1945VSPLT(b, u8)
1946VSPLT(h, u16)
1947VSPLT(w, u32)
1948#undef VSPLT
1949#undef SPLAT_ELEMENT
1950#undef _SPLAT_MASKED
1951#if defined(HOST_WORDS_BIGENDIAN)
1952#define VINSERT(suffix, element)                                            \
1953    void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1954    {                                                                       \
1955        memmove(&r->u8[index], &b->u8[8 - sizeof(r->element)],              \
1956               sizeof(r->element[0]));                                      \
1957    }
1958#else
1959#define VINSERT(suffix, element)                                            \
1960    void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1961    {                                                                       \
1962        uint32_t d = (16 - index) - sizeof(r->element[0]);                  \
1963        memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0]));               \
1964    }
1965#endif
1966VINSERT(b, u8)
1967VINSERT(h, u16)
1968VINSERT(w, u32)
1969VINSERT(d, u64)
1970#undef VINSERT
1971#if defined(HOST_WORDS_BIGENDIAN)
1972#define VEXTRACT(suffix, element)                                            \
1973    void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1974    {                                                                        \
1975        uint32_t es = sizeof(r->element[0]);                                 \
1976        memmove(&r->u8[8 - es], &b->u8[index], es);                          \
1977        memset(&r->u8[8], 0, 8);                                             \
1978        memset(&r->u8[0], 0, 8 - es);                                        \
1979    }
1980#else
1981#define VEXTRACT(suffix, element)                                            \
1982    void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1983    {                                                                        \
1984        uint32_t es = sizeof(r->element[0]);                                 \
1985        uint32_t s = (16 - index) - es;                                      \
1986        memmove(&r->u8[8], &b->u8[s], es);                                   \
1987        memset(&r->u8[0], 0, 8);                                             \
1988        memset(&r->u8[8 + es], 0, 8 - es);                                   \
1989    }
1990#endif
1991VEXTRACT(ub, u8)
1992VEXTRACT(uh, u16)
1993VEXTRACT(uw, u32)
1994VEXTRACT(d, u64)
1995#undef VEXTRACT
1996
1997void helper_xxextractuw(CPUPPCState *env, target_ulong xtn,
1998                        target_ulong xbn, uint32_t index)
1999{
2000    ppc_vsr_t xt, xb;
2001    size_t es = sizeof(uint32_t);
2002    uint32_t ext_index;
2003    int i;
2004
2005    getVSR(xbn, &xb, env);
2006    memset(&xt, 0, sizeof(xt));
2007
2008#if defined(HOST_WORDS_BIGENDIAN)
2009    ext_index = index;
2010    for (i = 0; i < es; i++, ext_index++) {
2011        xt.u8[8 - es + i] = xb.u8[ext_index % 16];
2012    }
2013#else
2014    ext_index = 15 - index;
2015    for (i = es - 1; i >= 0; i--, ext_index--) {
2016        xt.u8[8 + i] = xb.u8[ext_index % 16];
2017    }
2018#endif
2019
2020    putVSR(xtn, &xt, env);
2021}
2022
2023void helper_xxinsertw(CPUPPCState *env, target_ulong xtn,
2024                      target_ulong xbn, uint32_t index)
2025{
2026    ppc_vsr_t xt, xb;
2027    size_t es = sizeof(uint32_t);
2028    int ins_index, i = 0;
2029
2030    getVSR(xbn, &xb, env);
2031    getVSR(xtn, &xt, env);
2032
2033#if defined(HOST_WORDS_BIGENDIAN)
2034    ins_index = index;
2035    for (i = 0; i < es && ins_index < 16; i++, ins_index++) {
2036        xt.u8[ins_index] = xb.u8[8 - es + i];
2037    }
2038#else
2039    ins_index = 15 - index;
2040    for (i = es - 1; i >= 0 && ins_index >= 0; i--, ins_index--) {
2041        xt.u8[ins_index] = xb.u8[8 + i];
2042    }
2043#endif
2044
2045    putVSR(xtn, &xt, env);
2046}
2047
2048#define VEXT_SIGNED(name, element, mask, cast, recast)              \
2049void helper_##name(ppc_avr_t *r, ppc_avr_t *b)                      \
2050{                                                                   \
2051    int i;                                                          \
2052    VECTOR_FOR_INORDER_I(i, element) {                              \
2053        r->element[i] = (recast)((cast)(b->element[i] & mask));     \
2054    }                                                               \
2055}
2056VEXT_SIGNED(vextsb2w, s32, UINT8_MAX, int8_t, int32_t)
2057VEXT_SIGNED(vextsb2d, s64, UINT8_MAX, int8_t, int64_t)
2058VEXT_SIGNED(vextsh2w, s32, UINT16_MAX, int16_t, int32_t)
2059VEXT_SIGNED(vextsh2d, s64, UINT16_MAX, int16_t, int64_t)
2060VEXT_SIGNED(vextsw2d, s64, UINT32_MAX, int32_t, int64_t)
2061#undef VEXT_SIGNED
2062
2063#define VNEG(name, element)                                         \
2064void helper_##name(ppc_avr_t *r, ppc_avr_t *b)                      \
2065{                                                                   \
2066    int i;                                                          \
2067    VECTOR_FOR_INORDER_I(i, element) {                              \
2068        r->element[i] = -b->element[i];                             \
2069    }                                                               \
2070}
2071VNEG(vnegw, s32)
2072VNEG(vnegd, s64)
2073#undef VNEG
2074
2075#define VSPLTI(suffix, element, splat_type)                     \
2076    void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat)   \
2077    {                                                           \
2078        splat_type x = (int8_t)(splat << 3) >> 3;               \
2079        int i;                                                  \
2080                                                                \
2081        for (i = 0; i < ARRAY_SIZE(r->element); i++) {          \
2082            r->element[i] = x;                                  \
2083        }                                                       \
2084    }
2085VSPLTI(b, s8, int8_t)
2086VSPLTI(h, s16, int16_t)
2087VSPLTI(w, s32, int32_t)
2088#undef VSPLTI
2089
2090#define VSR(suffix, element, mask)                                      \
2091    void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)   \
2092    {                                                                   \
2093        int i;                                                          \
2094                                                                        \
2095        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
2096            unsigned int shift = b->element[i] & mask;                  \
2097            r->element[i] = a->element[i] >> shift;                     \
2098        }                                                               \
2099    }
2100VSR(ab, s8, 0x7)
2101VSR(ah, s16, 0xF)
2102VSR(aw, s32, 0x1F)
2103VSR(ad, s64, 0x3F)
2104VSR(b, u8, 0x7)
2105VSR(h, u16, 0xF)
2106VSR(w, u32, 0x1F)
2107VSR(d, u64, 0x3F)
2108#undef VSR
2109
2110void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2111{
2112    int sh = (b->u8[LO_IDX * 0xf] >> 3) & 0xf;
2113
2114#if defined(HOST_WORDS_BIGENDIAN)
2115    memmove(&r->u8[sh], &a->u8[0], 16 - sh);
2116    memset(&r->u8[0], 0, sh);
2117#else
2118    memmove(&r->u8[0], &a->u8[sh], 16 - sh);
2119    memset(&r->u8[16 - sh], 0, sh);
2120#endif
2121}
2122
2123void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2124{
2125    int i;
2126
2127    for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2128        r->u32[i] = a->u32[i] >= b->u32[i];
2129    }
2130}
2131
2132void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2133{
2134    int64_t t;
2135    int i, upper;
2136    ppc_avr_t result;
2137    int sat = 0;
2138
2139#if defined(HOST_WORDS_BIGENDIAN)
2140    upper = ARRAY_SIZE(r->s32)-1;
2141#else
2142    upper = 0;
2143#endif
2144    t = (int64_t)b->s32[upper];
2145    for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2146        t += a->s32[i];
2147        result.s32[i] = 0;
2148    }
2149    result.s32[upper] = cvtsdsw(t, &sat);
2150    *r = result;
2151
2152    if (sat) {
2153        env->vscr |= (1 << VSCR_SAT);
2154    }
2155}
2156
2157void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2158{
2159    int i, j, upper;
2160    ppc_avr_t result;
2161    int sat = 0;
2162
2163#if defined(HOST_WORDS_BIGENDIAN)
2164    upper = 1;
2165#else
2166    upper = 0;
2167#endif
2168    for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
2169        int64_t t = (int64_t)b->s32[upper + i * 2];
2170
2171        result.u64[i] = 0;
2172        for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
2173            t += a->s32[2 * i + j];
2174        }
2175        result.s32[upper + i * 2] = cvtsdsw(t, &sat);
2176    }
2177
2178    *r = result;
2179    if (sat) {
2180        env->vscr |= (1 << VSCR_SAT);
2181    }
2182}
2183
2184void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2185{
2186    int i, j;
2187    int sat = 0;
2188
2189    for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2190        int64_t t = (int64_t)b->s32[i];
2191
2192        for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
2193            t += a->s8[4 * i + j];
2194        }
2195        r->s32[i] = cvtsdsw(t, &sat);
2196    }
2197
2198    if (sat) {
2199        env->vscr |= (1 << VSCR_SAT);
2200    }
2201}
2202
2203void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2204{
2205    int sat = 0;
2206    int i;
2207
2208    for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2209        int64_t t = (int64_t)b->s32[i];
2210
2211        t += a->s16[2 * i] + a->s16[2 * i + 1];
2212        r->s32[i] = cvtsdsw(t, &sat);
2213    }
2214
2215    if (sat) {
2216        env->vscr |= (1 << VSCR_SAT);
2217    }
2218}
2219
2220void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2221{
2222    int i, j;
2223    int sat = 0;
2224
2225    for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2226        uint64_t t = (uint64_t)b->u32[i];
2227
2228        for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
2229            t += a->u8[4 * i + j];
2230        }
2231        r->u32[i] = cvtuduw(t, &sat);
2232    }
2233
2234    if (sat) {
2235        env->vscr |= (1 << VSCR_SAT);
2236    }
2237}
2238
2239#if defined(HOST_WORDS_BIGENDIAN)
2240#define UPKHI 1
2241#define UPKLO 0
2242#else
2243#define UPKHI 0
2244#define UPKLO 1
2245#endif
2246#define VUPKPX(suffix, hi)                                              \
2247    void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b)                \
2248    {                                                                   \
2249        int i;                                                          \
2250        ppc_avr_t result;                                               \
2251                                                                        \
2252        for (i = 0; i < ARRAY_SIZE(r->u32); i++) {                      \
2253            uint16_t e = b->u16[hi ? i : i+4];                          \
2254            uint8_t a = (e >> 15) ? 0xff : 0;                           \
2255            uint8_t r = (e >> 10) & 0x1f;                               \
2256            uint8_t g = (e >> 5) & 0x1f;                                \
2257            uint8_t b = e & 0x1f;                                       \
2258                                                                        \
2259            result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b;       \
2260        }                                                               \
2261        *r = result;                                                    \
2262    }
2263VUPKPX(lpx, UPKLO)
2264VUPKPX(hpx, UPKHI)
2265#undef VUPKPX
2266
2267#define VUPK(suffix, unpacked, packee, hi)                              \
2268    void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b)                \
2269    {                                                                   \
2270        int i;                                                          \
2271        ppc_avr_t result;                                               \
2272                                                                        \
2273        if (hi) {                                                       \
2274            for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) {             \
2275                result.unpacked[i] = b->packee[i];                      \
2276            }                                                           \
2277        } else {                                                        \
2278            for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
2279                 i++) {                                                 \
2280                result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
2281            }                                                           \
2282        }                                                               \
2283        *r = result;                                                    \
2284    }
2285VUPK(hsb, s16, s8, UPKHI)
2286VUPK(hsh, s32, s16, UPKHI)
2287VUPK(hsw, s64, s32, UPKHI)
2288VUPK(lsb, s16, s8, UPKLO)
2289VUPK(lsh, s32, s16, UPKLO)
2290VUPK(lsw, s64, s32, UPKLO)
2291#undef VUPK
2292#undef UPKHI
2293#undef UPKLO
2294
2295#define VGENERIC_DO(name, element)                                      \
2296    void helper_v##name(ppc_avr_t *r, ppc_avr_t *b)                     \
2297    {                                                                   \
2298        int i;                                                          \
2299                                                                        \
2300        VECTOR_FOR_INORDER_I(i, element) {                              \
2301            r->element[i] = name(b->element[i]);                        \
2302        }                                                               \
2303    }
2304
2305#define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
2306#define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
2307#define clzw(v) clz32((v))
2308#define clzd(v) clz64((v))
2309
2310VGENERIC_DO(clzb, u8)
2311VGENERIC_DO(clzh, u16)
2312VGENERIC_DO(clzw, u32)
2313VGENERIC_DO(clzd, u64)
2314
2315#undef clzb
2316#undef clzh
2317#undef clzw
2318#undef clzd
2319
2320#define ctzb(v) ((v) ? ctz32(v) : 8)
2321#define ctzh(v) ((v) ? ctz32(v) : 16)
2322#define ctzw(v) ctz32((v))
2323#define ctzd(v) ctz64((v))
2324
2325VGENERIC_DO(ctzb, u8)
2326VGENERIC_DO(ctzh, u16)
2327VGENERIC_DO(ctzw, u32)
2328VGENERIC_DO(ctzd, u64)
2329
2330#undef ctzb
2331#undef ctzh
2332#undef ctzw
2333#undef ctzd
2334
2335#define popcntb(v) ctpop8(v)
2336#define popcnth(v) ctpop16(v)
2337#define popcntw(v) ctpop32(v)
2338#define popcntd(v) ctpop64(v)
2339
2340VGENERIC_DO(popcntb, u8)
2341VGENERIC_DO(popcnth, u16)
2342VGENERIC_DO(popcntw, u32)
2343VGENERIC_DO(popcntd, u64)
2344
2345#undef popcntb
2346#undef popcnth
2347#undef popcntw
2348#undef popcntd
2349
2350#undef VGENERIC_DO
2351
2352#if defined(HOST_WORDS_BIGENDIAN)
2353#define QW_ONE { .u64 = { 0, 1 } }
2354#else
2355#define QW_ONE { .u64 = { 1, 0 } }
2356#endif
2357
2358#ifndef CONFIG_INT128
2359
2360static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
2361{
2362    t->u64[0] = ~a.u64[0];
2363    t->u64[1] = ~a.u64[1];
2364}
2365
2366static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
2367{
2368    if (a.u64[HI_IDX] < b.u64[HI_IDX]) {
2369        return -1;
2370    } else if (a.u64[HI_IDX] > b.u64[HI_IDX]) {
2371        return 1;
2372    } else if (a.u64[LO_IDX] < b.u64[LO_IDX]) {
2373        return -1;
2374    } else if (a.u64[LO_IDX] > b.u64[LO_IDX]) {
2375        return 1;
2376    } else {
2377        return 0;
2378    }
2379}
2380
2381static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2382{
2383    t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
2384    t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
2385                     (~a.u64[LO_IDX] < b.u64[LO_IDX]);
2386}
2387
2388static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2389{
2390    ppc_avr_t not_a;
2391    t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
2392    t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
2393                     (~a.u64[LO_IDX] < b.u64[LO_IDX]);
2394    avr_qw_not(&not_a, a);
2395    return avr_qw_cmpu(not_a, b) < 0;
2396}
2397
2398#endif
2399
2400void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2401{
2402#ifdef CONFIG_INT128
2403    r->u128 = a->u128 + b->u128;
2404#else
2405    avr_qw_add(r, *a, *b);
2406#endif
2407}
2408
2409void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2410{
2411#ifdef CONFIG_INT128
2412    r->u128 = a->u128 + b->u128 + (c->u128 & 1);
2413#else
2414
2415    if (c->u64[LO_IDX] & 1) {
2416        ppc_avr_t tmp;
2417
2418        tmp.u64[HI_IDX] = 0;
2419        tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2420        avr_qw_add(&tmp, *a, tmp);
2421        avr_qw_add(r, tmp, *b);
2422    } else {
2423        avr_qw_add(r, *a, *b);
2424    }
2425#endif
2426}
2427
2428void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2429{
2430#ifdef CONFIG_INT128
2431    r->u128 = (~a->u128 < b->u128);
2432#else
2433    ppc_avr_t not_a;
2434
2435    avr_qw_not(&not_a, *a);
2436
2437    r->u64[HI_IDX] = 0;
2438    r->u64[LO_IDX] = (avr_qw_cmpu(not_a, *b) < 0);
2439#endif
2440}
2441
2442void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2443{
2444#ifdef CONFIG_INT128
2445    int carry_out = (~a->u128 < b->u128);
2446    if (!carry_out && (c->u128 & 1)) {
2447        carry_out = ((a->u128 + b->u128 + 1) == 0) &&
2448                    ((a->u128 != 0) || (b->u128 != 0));
2449    }
2450    r->u128 = carry_out;
2451#else
2452
2453    int carry_in = c->u64[LO_IDX] & 1;
2454    int carry_out = 0;
2455    ppc_avr_t tmp;
2456
2457    carry_out = avr_qw_addc(&tmp, *a, *b);
2458
2459    if (!carry_out && carry_in) {
2460        ppc_avr_t one = QW_ONE;
2461        carry_out = avr_qw_addc(&tmp, tmp, one);
2462    }
2463    r->u64[HI_IDX] = 0;
2464    r->u64[LO_IDX] = carry_out;
2465#endif
2466}
2467
2468void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2469{
2470#ifdef CONFIG_INT128
2471    r->u128 = a->u128 - b->u128;
2472#else
2473    ppc_avr_t tmp;
2474    ppc_avr_t one = QW_ONE;
2475
2476    avr_qw_not(&tmp, *b);
2477    avr_qw_add(&tmp, *a, tmp);
2478    avr_qw_add(r, tmp, one);
2479#endif
2480}
2481
2482void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2483{
2484#ifdef CONFIG_INT128
2485    r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2486#else
2487    ppc_avr_t tmp, sum;
2488
2489    avr_qw_not(&tmp, *b);
2490    avr_qw_add(&sum, *a, tmp);
2491
2492    tmp.u64[HI_IDX] = 0;
2493    tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2494    avr_qw_add(r, sum, tmp);
2495#endif
2496}
2497
2498void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2499{
2500#ifdef CONFIG_INT128
2501    r->u128 = (~a->u128 < ~b->u128) ||
2502                 (a->u128 + ~b->u128 == (__uint128_t)-1);
2503#else
2504    int carry = (avr_qw_cmpu(*a, *b) > 0);
2505    if (!carry) {
2506        ppc_avr_t tmp;
2507        avr_qw_not(&tmp, *b);
2508        avr_qw_add(&tmp, *a, tmp);
2509        carry = ((tmp.s64[HI_IDX] == -1ull) && (tmp.s64[LO_IDX] == -1ull));
2510    }
2511    r->u64[HI_IDX] = 0;
2512    r->u64[LO_IDX] = carry;
2513#endif
2514}
2515
2516void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2517{
2518#ifdef CONFIG_INT128
2519    r->u128 =
2520        (~a->u128 < ~b->u128) ||
2521        ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2522#else
2523    int carry_in = c->u64[LO_IDX] & 1;
2524    int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2525    if (!carry_out && carry_in) {
2526        ppc_avr_t tmp;
2527        avr_qw_not(&tmp, *b);
2528        avr_qw_add(&tmp, *a, tmp);
2529        carry_out = ((tmp.u64[HI_IDX] == -1ull) && (tmp.u64[LO_IDX] == -1ull));
2530    }
2531
2532    r->u64[HI_IDX] = 0;
2533    r->u64[LO_IDX] = carry_out;
2534#endif
2535}
2536
2537#define BCD_PLUS_PREF_1 0xC
2538#define BCD_PLUS_PREF_2 0xF
2539#define BCD_PLUS_ALT_1  0xA
2540#define BCD_NEG_PREF    0xD
2541#define BCD_NEG_ALT     0xB
2542#define BCD_PLUS_ALT_2  0xE
2543#define NATIONAL_PLUS   0x2B
2544#define NATIONAL_NEG    0x2D
2545
2546#if defined(HOST_WORDS_BIGENDIAN)
2547#define BCD_DIG_BYTE(n) (15 - ((n) / 2))
2548#else
2549#define BCD_DIG_BYTE(n) ((n) / 2)
2550#endif
2551
2552static int bcd_get_sgn(ppc_avr_t *bcd)
2553{
2554    switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) {
2555    case BCD_PLUS_PREF_1:
2556    case BCD_PLUS_PREF_2:
2557    case BCD_PLUS_ALT_1:
2558    case BCD_PLUS_ALT_2:
2559    {
2560        return 1;
2561    }
2562
2563    case BCD_NEG_PREF:
2564    case BCD_NEG_ALT:
2565    {
2566        return -1;
2567    }
2568
2569    default:
2570    {
2571        return 0;
2572    }
2573    }
2574}
2575
2576static int bcd_preferred_sgn(int sgn, int ps)
2577{
2578    if (sgn >= 0) {
2579        return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2580    } else {
2581        return BCD_NEG_PREF;
2582    }
2583}
2584
2585static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2586{
2587    uint8_t result;
2588    if (n & 1) {
2589        result = bcd->u8[BCD_DIG_BYTE(n)] >> 4;
2590    } else {
2591       result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF;
2592    }
2593
2594    if (unlikely(result > 9)) {
2595        *invalid = true;
2596    }
2597    return result;
2598}
2599
2600static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2601{
2602    if (n & 1) {
2603        bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F;
2604        bcd->u8[BCD_DIG_BYTE(n)] |= (digit<<4);
2605    } else {
2606        bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0;
2607        bcd->u8[BCD_DIG_BYTE(n)] |= digit;
2608    }
2609}
2610
2611static bool bcd_is_valid(ppc_avr_t *bcd)
2612{
2613    int i;
2614    int invalid = 0;
2615
2616    if (bcd_get_sgn(bcd) == 0) {
2617        return false;
2618    }
2619
2620    for (i = 1; i < 32; i++) {
2621        bcd_get_digit(bcd, i, &invalid);
2622        if (unlikely(invalid)) {
2623            return false;
2624        }
2625    }
2626    return true;
2627}
2628
2629static int bcd_cmp_zero(ppc_avr_t *bcd)
2630{
2631    if (bcd->u64[HI_IDX] == 0 && (bcd->u64[LO_IDX] >> 4) == 0) {
2632        return CRF_EQ;
2633    } else {
2634        return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT;
2635    }
2636}
2637
2638static uint16_t get_national_digit(ppc_avr_t *reg, int n)
2639{
2640#if defined(HOST_WORDS_BIGENDIAN)
2641    return reg->u16[7 - n];
2642#else
2643    return reg->u16[n];
2644#endif
2645}
2646
2647static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
2648{
2649#if defined(HOST_WORDS_BIGENDIAN)
2650    reg->u16[7 - n] = val;
2651#else
2652    reg->u16[n] = val;
2653#endif
2654}
2655
2656static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2657{
2658    int i;
2659    int invalid = 0;
2660    for (i = 31; i > 0; i--) {
2661        uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2662        uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2663        if (unlikely(invalid)) {
2664            return 0; /* doesn't matter */
2665        } else if (dig_a > dig_b) {
2666            return 1;
2667        } else if (dig_a < dig_b) {
2668            return -1;
2669        }
2670    }
2671
2672    return 0;
2673}
2674
2675static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2676                       int *overflow)
2677{
2678    int carry = 0;
2679    int i;
2680    int is_zero = 1;
2681    for (i = 1; i <= 31; i++) {
2682        uint8_t digit = bcd_get_digit(a, i, invalid) +
2683                        bcd_get_digit(b, i, invalid) + carry;
2684        is_zero &= (digit == 0);
2685        if (digit > 9) {
2686            carry = 1;
2687            digit -= 10;
2688        } else {
2689            carry = 0;
2690        }
2691
2692        bcd_put_digit(t, digit, i);
2693
2694        if (unlikely(*invalid)) {
2695            return -1;
2696        }
2697    }
2698
2699    *overflow = carry;
2700    return is_zero;
2701}
2702
2703static int bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2704                       int *overflow)
2705{
2706    int carry = 0;
2707    int i;
2708    int is_zero = 1;
2709    for (i = 1; i <= 31; i++) {
2710        uint8_t digit = bcd_get_digit(a, i, invalid) -
2711                        bcd_get_digit(b, i, invalid) + carry;
2712        is_zero &= (digit == 0);
2713        if (digit & 0x80) {
2714            carry = -1;
2715            digit += 10;
2716        } else {
2717            carry = 0;
2718        }
2719
2720        bcd_put_digit(t, digit, i);
2721
2722        if (unlikely(*invalid)) {
2723            return -1;
2724        }
2725    }
2726
2727    *overflow = carry;
2728    return is_zero;
2729}
2730
2731uint32_t helper_bcdadd(ppc_avr_t *r,  ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2732{
2733
2734    int sgna = bcd_get_sgn(a);
2735    int sgnb = bcd_get_sgn(b);
2736    int invalid = (sgna == 0) || (sgnb == 0);
2737    int overflow = 0;
2738    int zero = 0;
2739    uint32_t cr = 0;
2740    ppc_avr_t result = { .u64 = { 0, 0 } };
2741
2742    if (!invalid) {
2743        if (sgna == sgnb) {
2744            result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2745            zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
2746            cr = (sgna > 0) ? CRF_GT : CRF_LT;
2747        } else if (bcd_cmp_mag(a, b) > 0) {
2748            result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2749            zero = bcd_sub_mag(&result, a, b, &invalid, &overflow);
2750            cr = (sgna > 0) ? CRF_GT : CRF_LT;
2751        } else {
2752            result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps);
2753            zero = bcd_sub_mag(&result, b, a, &invalid, &overflow);
2754            cr = (sgnb > 0) ? CRF_GT : CRF_LT;
2755        }
2756    }
2757
2758    if (unlikely(invalid)) {
2759        result.u64[HI_IDX] = result.u64[LO_IDX] = -1;
2760        cr = CRF_SO;
2761    } else if (overflow) {
2762        cr |= CRF_SO;
2763    } else if (zero) {
2764        cr = CRF_EQ;
2765    }
2766
2767    *r = result;
2768
2769    return cr;
2770}
2771
2772uint32_t helper_bcdsub(ppc_avr_t *r,  ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2773{
2774    ppc_avr_t bcopy = *b;
2775    int sgnb = bcd_get_sgn(b);
2776    if (sgnb < 0) {
2777        bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2778    } else if (sgnb > 0) {
2779        bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2780    }
2781    /* else invalid ... defer to bcdadd code for proper handling */
2782
2783    return helper_bcdadd(r, a, &bcopy, ps);
2784}
2785
2786uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2787{
2788    int i;
2789    int cr = 0;
2790    uint16_t national = 0;
2791    uint16_t sgnb = get_national_digit(b, 0);
2792    ppc_avr_t ret = { .u64 = { 0, 0 } };
2793    int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
2794
2795    for (i = 1; i < 8; i++) {
2796        national = get_national_digit(b, i);
2797        if (unlikely(national < 0x30 || national > 0x39)) {
2798            invalid = 1;
2799            break;
2800        }
2801
2802        bcd_put_digit(&ret, national & 0xf, i);
2803    }
2804
2805    if (sgnb == NATIONAL_PLUS) {
2806        bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
2807    } else {
2808        bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2809    }
2810
2811    cr = bcd_cmp_zero(&ret);
2812
2813    if (unlikely(invalid)) {
2814        cr = CRF_SO;
2815    }
2816
2817    *r = ret;
2818
2819    return cr;
2820}
2821
2822uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2823{
2824    int i;
2825    int cr = 0;
2826    int sgnb = bcd_get_sgn(b);
2827    int invalid = (sgnb == 0);
2828    ppc_avr_t ret = { .u64 = { 0, 0 } };
2829
2830    int ox_flag = (b->u64[HI_IDX] != 0) || ((b->u64[LO_IDX] >> 32) != 0);
2831
2832    for (i = 1; i < 8; i++) {
2833        set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
2834
2835        if (unlikely(invalid)) {
2836            break;
2837        }
2838    }
2839    set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
2840
2841    cr = bcd_cmp_zero(b);
2842
2843    if (ox_flag) {
2844        cr |= CRF_SO;
2845    }
2846
2847    if (unlikely(invalid)) {
2848        cr = CRF_SO;
2849    }
2850
2851    *r = ret;
2852
2853    return cr;
2854}
2855
2856uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2857{
2858    int i;
2859    int cr = 0;
2860    int invalid = 0;
2861    int zone_digit = 0;
2862    int zone_lead = ps ? 0xF : 0x3;
2863    int digit = 0;
2864    ppc_avr_t ret = { .u64 = { 0, 0 } };
2865    int sgnb = b->u8[BCD_DIG_BYTE(0)] >> 4;
2866
2867    if (unlikely((sgnb < 0xA) && ps)) {
2868        invalid = 1;
2869    }
2870
2871    for (i = 0; i < 16; i++) {
2872        zone_digit = i ? b->u8[BCD_DIG_BYTE(i * 2)] >> 4 : zone_lead;
2873        digit = b->u8[BCD_DIG_BYTE(i * 2)] & 0xF;
2874        if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
2875            invalid = 1;
2876            break;
2877        }
2878
2879        bcd_put_digit(&ret, digit, i + 1);
2880    }
2881
2882    if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
2883            (!ps && (sgnb & 0x4))) {
2884        bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2885    } else {
2886        bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
2887    }
2888
2889    cr = bcd_cmp_zero(&ret);
2890
2891    if (unlikely(invalid)) {
2892        cr = CRF_SO;
2893    }
2894
2895    *r = ret;
2896
2897    return cr;
2898}
2899
2900uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2901{
2902    int i;
2903    int cr = 0;
2904    uint8_t digit = 0;
2905    int sgnb = bcd_get_sgn(b);
2906    int zone_lead = (ps) ? 0xF0 : 0x30;
2907    int invalid = (sgnb == 0);
2908    ppc_avr_t ret = { .u64 = { 0, 0 } };
2909
2910    int ox_flag = ((b->u64[HI_IDX] >> 4) != 0);
2911
2912    for (i = 0; i < 16; i++) {
2913        digit = bcd_get_digit(b, i + 1, &invalid);
2914
2915        if (unlikely(invalid)) {
2916            break;
2917        }
2918
2919        ret.u8[BCD_DIG_BYTE(i * 2)] = zone_lead + digit;
2920    }
2921
2922    if (ps) {
2923        bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
2924    } else {
2925        bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
2926    }
2927
2928    cr = bcd_cmp_zero(b);
2929
2930    if (ox_flag) {
2931        cr |= CRF_SO;
2932    }
2933
2934    if (unlikely(invalid)) {
2935        cr = CRF_SO;
2936    }
2937
2938    *r = ret;
2939
2940    return cr;
2941}
2942
2943uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2944{
2945    int i;
2946    int cr = 0;
2947    uint64_t lo_value;
2948    uint64_t hi_value;
2949    ppc_avr_t ret = { .u64 = { 0, 0 } };
2950
2951    if (b->s64[HI_IDX] < 0) {
2952        lo_value = -b->s64[LO_IDX];
2953        hi_value = ~b->u64[HI_IDX] + !lo_value;
2954        bcd_put_digit(&ret, 0xD, 0);
2955    } else {
2956        lo_value = b->u64[LO_IDX];
2957        hi_value = b->u64[HI_IDX];
2958        bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0);
2959    }
2960
2961    if (divu128(&lo_value, &hi_value, 1000000000000000ULL) ||
2962            lo_value > 9999999999999999ULL) {
2963        cr = CRF_SO;
2964    }
2965
2966    for (i = 1; i < 16; hi_value /= 10, i++) {
2967        bcd_put_digit(&ret, hi_value % 10, i);
2968    }
2969
2970    for (; i < 32; lo_value /= 10, i++) {
2971        bcd_put_digit(&ret, lo_value % 10, i);
2972    }
2973
2974    cr |= bcd_cmp_zero(&ret);
2975
2976    *r = ret;
2977
2978    return cr;
2979}
2980
2981uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2982{
2983    uint8_t i;
2984    int cr;
2985    uint64_t carry;
2986    uint64_t unused;
2987    uint64_t lo_value;
2988    uint64_t hi_value = 0;
2989    int sgnb = bcd_get_sgn(b);
2990    int invalid = (sgnb == 0);
2991
2992    lo_value = bcd_get_digit(b, 31, &invalid);
2993    for (i = 30; i > 0; i--) {
2994        mulu64(&lo_value, &carry, lo_value, 10ULL);
2995        mulu64(&hi_value, &unused, hi_value, 10ULL);
2996        lo_value += bcd_get_digit(b, i, &invalid);
2997        hi_value += carry;
2998
2999        if (unlikely(invalid)) {
3000            break;
3001        }
3002    }
3003
3004    if (sgnb == -1) {
3005        r->s64[LO_IDX] = -lo_value;
3006        r->s64[HI_IDX] = ~hi_value + !r->s64[LO_IDX];
3007    } else {
3008        r->s64[LO_IDX] = lo_value;
3009        r->s64[HI_IDX] = hi_value;
3010    }
3011
3012    cr = bcd_cmp_zero(b);
3013
3014    if (unlikely(invalid)) {
3015        cr = CRF_SO;
3016    }
3017
3018    return cr;
3019}
3020
3021uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3022{
3023    int i;
3024    int invalid = 0;
3025
3026    if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) {
3027        return CRF_SO;
3028    }
3029
3030    *r = *a;
3031    bcd_put_digit(r, b->u8[BCD_DIG_BYTE(0)] & 0xF, 0);
3032
3033    for (i = 1; i < 32; i++) {
3034        bcd_get_digit(a, i, &invalid);
3035        bcd_get_digit(b, i, &invalid);
3036        if (unlikely(invalid)) {
3037            return CRF_SO;
3038        }
3039    }
3040
3041    return bcd_cmp_zero(r);
3042}
3043
3044uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
3045{
3046    int sgnb = bcd_get_sgn(b);
3047
3048    *r = *b;
3049    bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0);
3050
3051    if (bcd_is_valid(b) == false) {
3052        return CRF_SO;
3053    }
3054
3055    return bcd_cmp_zero(r);
3056}
3057
3058uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3059{
3060    int cr;
3061#if defined(HOST_WORDS_BIGENDIAN)
3062    int i = a->s8[7];
3063#else
3064    int i = a->s8[8];
3065#endif
3066    bool ox_flag = false;
3067    int sgnb = bcd_get_sgn(b);
3068    ppc_avr_t ret = *b;
3069    ret.u64[LO_IDX] &= ~0xf;
3070
3071    if (bcd_is_valid(b) == false) {
3072        return CRF_SO;
3073    }
3074
3075    if (unlikely(i > 31)) {
3076        i = 31;
3077    } else if (unlikely(i < -31)) {
3078        i = -31;
3079    }
3080
3081    if (i > 0) {
3082        ulshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], i * 4, &ox_flag);
3083    } else {
3084        urshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], -i * 4);
3085    }
3086    bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
3087
3088    *r = ret;
3089
3090    cr = bcd_cmp_zero(r);
3091    if (ox_flag) {
3092        cr |= CRF_SO;
3093    }
3094
3095    return cr;
3096}
3097
3098uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3099{
3100    int cr;
3101    int i;
3102    int invalid = 0;
3103    bool ox_flag = false;
3104    ppc_avr_t ret = *b;
3105
3106    for (i = 0; i < 32; i++) {
3107        bcd_get_digit(b, i, &invalid);
3108
3109        if (unlikely(invalid)) {
3110            return CRF_SO;
3111        }
3112    }
3113
3114#if defined(HOST_WORDS_BIGENDIAN)
3115    i = a->s8[7];
3116#else
3117    i = a->s8[8];
3118#endif
3119    if (i >= 32) {
3120        ox_flag = true;
3121        ret.u64[LO_IDX] = ret.u64[HI_IDX] = 0;
3122    } else if (i <= -32) {
3123        ret.u64[LO_IDX] = ret.u64[HI_IDX] = 0;
3124    } else if (i > 0) {
3125        ulshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], i * 4, &ox_flag);
3126    } else {
3127        urshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], -i * 4);
3128    }
3129    *r = ret;
3130
3131    cr = bcd_cmp_zero(r);
3132    if (ox_flag) {
3133        cr |= CRF_SO;
3134    }
3135
3136    return cr;
3137}
3138
3139uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3140{
3141    int cr;
3142    int unused = 0;
3143    int invalid = 0;
3144    bool ox_flag = false;
3145    int sgnb = bcd_get_sgn(b);
3146    ppc_avr_t ret = *b;
3147    ret.u64[LO_IDX] &= ~0xf;
3148
3149#if defined(HOST_WORDS_BIGENDIAN)
3150    int i = a->s8[7];
3151    ppc_avr_t bcd_one = { .u64 = { 0, 0x10 } };
3152#else
3153    int i = a->s8[8];
3154    ppc_avr_t bcd_one = { .u64 = { 0x10, 0 } };
3155#endif
3156
3157    if (bcd_is_valid(b) == false) {
3158        return CRF_SO;
3159    }
3160
3161    if (unlikely(i > 31)) {
3162        i = 31;
3163    } else if (unlikely(i < -31)) {
3164        i = -31;
3165    }
3166
3167    if (i > 0) {
3168        ulshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], i * 4, &ox_flag);
3169    } else {
3170        urshift(&ret.u64[LO_IDX], &ret.u64[HI_IDX], -i * 4);
3171
3172        if (bcd_get_digit(&ret, 0, &invalid) >= 5) {
3173            bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused);
3174        }
3175    }
3176    bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
3177
3178    cr = bcd_cmp_zero(&ret);
3179    if (ox_flag) {
3180        cr |= CRF_SO;
3181    }
3182    *r = ret;
3183
3184    return cr;
3185}
3186
3187uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3188{
3189    uint64_t mask;
3190    uint32_t ox_flag = 0;
3191#if defined(HOST_WORDS_BIGENDIAN)
3192    int i = a->s16[3] + 1;
3193#else
3194    int i = a->s16[4] + 1;
3195#endif
3196    ppc_avr_t ret = *b;
3197
3198    if (bcd_is_valid(b) == false) {
3199        return CRF_SO;
3200    }
3201
3202    if (i > 16 && i < 32) {
3203        mask = (uint64_t)-1 >> (128 - i * 4);
3204        if (ret.u64[HI_IDX] & ~mask) {
3205            ox_flag = CRF_SO;
3206        }
3207
3208        ret.u64[HI_IDX] &= mask;
3209    } else if (i >= 0 && i <= 16) {
3210        mask = (uint64_t)-1 >> (64 - i * 4);
3211        if (ret.u64[HI_IDX] || (ret.u64[LO_IDX] & ~mask)) {
3212            ox_flag = CRF_SO;
3213        }
3214
3215        ret.u64[LO_IDX] &= mask;
3216        ret.u64[HI_IDX] = 0;
3217    }
3218    bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0);
3219    *r = ret;
3220
3221    return bcd_cmp_zero(&ret) | ox_flag;
3222}
3223
3224uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3225{
3226    int i;
3227    uint64_t mask;
3228    uint32_t ox_flag = 0;
3229    int invalid = 0;
3230    ppc_avr_t ret = *b;
3231
3232    for (i = 0; i < 32; i++) {
3233        bcd_get_digit(b, i, &invalid);
3234
3235        if (unlikely(invalid)) {
3236            return CRF_SO;
3237        }
3238    }
3239
3240#if defined(HOST_WORDS_BIGENDIAN)
3241    i = a->s16[3];
3242#else
3243    i = a->s16[4];
3244#endif
3245    if (i > 16 && i < 33) {
3246        mask = (uint64_t)-1 >> (128 - i * 4);
3247        if (ret.u64[HI_IDX] & ~mask) {
3248            ox_flag = CRF_SO;
3249        }
3250
3251        ret.u64[HI_IDX] &= mask;
3252    } else if (i > 0 && i <= 16) {
3253        mask = (uint64_t)-1 >> (64 - i * 4);
3254        if (ret.u64[HI_IDX] || (ret.u64[LO_IDX] & ~mask)) {
3255            ox_flag = CRF_SO;
3256        }
3257
3258        ret.u64[LO_IDX] &= mask;
3259        ret.u64[HI_IDX] = 0;
3260    } else if (i == 0) {
3261        if (ret.u64[HI_IDX] || ret.u64[LO_IDX]) {
3262            ox_flag = CRF_SO;
3263        }
3264        ret.u64[HI_IDX] = ret.u64[LO_IDX] = 0;
3265    }
3266
3267    *r = ret;
3268    if (r->u64[HI_IDX] == 0 && r->u64[LO_IDX] == 0) {
3269        return ox_flag | CRF_EQ;
3270    }
3271
3272    return ox_flag | CRF_GT;
3273}
3274
3275void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
3276{
3277    int i;
3278    VECTOR_FOR_INORDER_I(i, u8) {
3279        r->u8[i] = AES_sbox[a->u8[i]];
3280    }
3281}
3282
3283void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3284{
3285    ppc_avr_t result;
3286    int i;
3287
3288    VECTOR_FOR_INORDER_I(i, u32) {
3289        result.AVRW(i) = b->AVRW(i) ^
3290            (AES_Te0[a->AVRB(AES_shifts[4*i + 0])] ^
3291             AES_Te1[a->AVRB(AES_shifts[4*i + 1])] ^
3292             AES_Te2[a->AVRB(AES_shifts[4*i + 2])] ^
3293             AES_Te3[a->AVRB(AES_shifts[4*i + 3])]);
3294    }
3295    *r = result;
3296}
3297
3298void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3299{
3300    ppc_avr_t result;
3301    int i;
3302
3303    VECTOR_FOR_INORDER_I(i, u8) {
3304        result.AVRB(i) = b->AVRB(i) ^ (AES_sbox[a->AVRB(AES_shifts[i])]);
3305    }
3306    *r = result;
3307}
3308
3309void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3310{
3311    /* This differs from what is written in ISA V2.07.  The RTL is */
3312    /* incorrect and will be fixed in V2.07B.                      */
3313    int i;
3314    ppc_avr_t tmp;
3315
3316    VECTOR_FOR_INORDER_I(i, u8) {
3317        tmp.AVRB(i) = b->AVRB(i) ^ AES_isbox[a->AVRB(AES_ishifts[i])];
3318    }
3319
3320    VECTOR_FOR_INORDER_I(i, u32) {
3321        r->AVRW(i) =
3322            AES_imc[tmp.AVRB(4*i + 0)][0] ^
3323            AES_imc[tmp.AVRB(4*i + 1)][1] ^
3324            AES_imc[tmp.AVRB(4*i + 2)][2] ^
3325            AES_imc[tmp.AVRB(4*i + 3)][3];
3326    }
3327}
3328
3329void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3330{
3331    ppc_avr_t result;
3332    int i;
3333
3334    VECTOR_FOR_INORDER_I(i, u8) {
3335        result.AVRB(i) = b->AVRB(i) ^ (AES_isbox[a->AVRB(AES_ishifts[i])]);
3336    }
3337    *r = result;
3338}
3339
3340#define ROTRu32(v, n) (((v) >> (n)) | ((v) << (32-n)))
3341#if defined(HOST_WORDS_BIGENDIAN)
3342#define EL_IDX(i) (i)
3343#else
3344#define EL_IDX(i) (3 - (i))
3345#endif
3346
3347void helper_vshasigmaw(ppc_avr_t *r,  ppc_avr_t *a, uint32_t st_six)
3348{
3349    int st = (st_six & 0x10) != 0;
3350    int six = st_six & 0xF;
3351    int i;
3352
3353    VECTOR_FOR_INORDER_I(i, u32) {
3354        if (st == 0) {
3355            if ((six & (0x8 >> i)) == 0) {
3356                r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 7) ^
3357                                    ROTRu32(a->u32[EL_IDX(i)], 18) ^
3358                                    (a->u32[EL_IDX(i)] >> 3);
3359            } else { /* six.bit[i] == 1 */
3360                r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 17) ^
3361                                    ROTRu32(a->u32[EL_IDX(i)], 19) ^
3362                                    (a->u32[EL_IDX(i)] >> 10);
3363            }
3364        } else { /* st == 1 */
3365            if ((six & (0x8 >> i)) == 0) {
3366                r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 2) ^
3367                                    ROTRu32(a->u32[EL_IDX(i)], 13) ^
3368                                    ROTRu32(a->u32[EL_IDX(i)], 22);
3369            } else { /* six.bit[i] == 1 */
3370                r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 6) ^
3371                                    ROTRu32(a->u32[EL_IDX(i)], 11) ^
3372                                    ROTRu32(a->u32[EL_IDX(i)], 25);
3373            }
3374        }
3375    }
3376}
3377
3378#undef ROTRu32
3379#undef EL_IDX
3380
3381#define ROTRu64(v, n) (((v) >> (n)) | ((v) << (64-n)))
3382#if defined(HOST_WORDS_BIGENDIAN)
3383#define EL_IDX(i) (i)
3384#else
3385#define EL_IDX(i) (1 - (i))
3386#endif
3387
3388void helper_vshasigmad(ppc_avr_t *r,  ppc_avr_t *a, uint32_t st_six)
3389{
3390    int st = (st_six & 0x10) != 0;
3391    int six = st_six & 0xF;
3392    int i;
3393
3394    VECTOR_FOR_INORDER_I(i, u64) {
3395        if (st == 0) {
3396            if ((six & (0x8 >> (2*i))) == 0) {
3397                r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 1) ^
3398                                    ROTRu64(a->u64[EL_IDX(i)], 8) ^
3399                                    (a->u64[EL_IDX(i)] >> 7);
3400            } else { /* six.bit[2*i] == 1 */
3401                r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 19) ^
3402                                    ROTRu64(a->u64[EL_IDX(i)], 61) ^
3403                                    (a->u64[EL_IDX(i)] >> 6);
3404            }
3405        } else { /* st == 1 */
3406            if ((six & (0x8 >> (2*i))) == 0) {
3407                r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 28) ^
3408                                    ROTRu64(a->u64[EL_IDX(i)], 34) ^
3409                                    ROTRu64(a->u64[EL_IDX(i)], 39);
3410            } else { /* six.bit[2*i] == 1 */
3411                r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 14) ^
3412                                    ROTRu64(a->u64[EL_IDX(i)], 18) ^
3413                                    ROTRu64(a->u64[EL_IDX(i)], 41);
3414            }
3415        }
3416    }
3417}
3418
3419#undef ROTRu64
3420#undef EL_IDX
3421
3422void helper_vpermxor(ppc_avr_t *r,  ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
3423{
3424    ppc_avr_t result;
3425    int i;
3426
3427    VECTOR_FOR_INORDER_I(i, u8) {
3428        int indexA = c->u8[i] >> 4;
3429        int indexB = c->u8[i] & 0xF;
3430#if defined(HOST_WORDS_BIGENDIAN)
3431        result.u8[i] = a->u8[indexA] ^ b->u8[indexB];
3432#else
3433        result.u8[i] = a->u8[15-indexA] ^ b->u8[15-indexB];
3434#endif
3435    }
3436    *r = result;
3437}
3438
3439#undef VECTOR_FOR_INORDER_I
3440#undef HI_IDX
3441#undef LO_IDX
3442
3443/*****************************************************************************/
3444/* SPE extension helpers */
3445/* Use a table to make this quicker */
3446static const uint8_t hbrev[16] = {
3447    0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
3448    0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
3449};
3450
3451static inline uint8_t byte_reverse(uint8_t val)
3452{
3453    return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
3454}
3455
3456static inline uint32_t word_reverse(uint32_t val)
3457{
3458    return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
3459        (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
3460}
3461
3462#define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
3463target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
3464{
3465    uint32_t a, b, d, mask;
3466
3467    mask = UINT32_MAX >> (32 - MASKBITS);
3468    a = arg1 & mask;
3469    b = arg2 & mask;
3470    d = word_reverse(1 + word_reverse(a | ~b));
3471    return (arg1 & ~mask) | (d & b);
3472}
3473
3474uint32_t helper_cntlsw32(uint32_t val)
3475{
3476    if (val & 0x80000000) {
3477        return clz32(~val);
3478    } else {
3479        return clz32(val);
3480    }
3481}
3482
3483uint32_t helper_cntlzw32(uint32_t val)
3484{
3485    return clz32(val);
3486}
3487
3488/* 440 specific */
3489target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
3490                          target_ulong low, uint32_t update_Rc)
3491{
3492    target_ulong mask;
3493    int i;
3494
3495    i = 1;
3496    for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3497        if ((high & mask) == 0) {
3498            if (update_Rc) {
3499                env->crf[0] = 0x4;
3500            }
3501            goto done;
3502        }
3503        i++;
3504    }
3505    for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3506        if ((low & mask) == 0) {
3507            if (update_Rc) {
3508                env->crf[0] = 0x8;
3509            }
3510            goto done;
3511        }
3512        i++;
3513    }
3514    i = 8;
3515    if (update_Rc) {
3516        env->crf[0] = 0x2;
3517    }
3518 done:
3519    env->xer = (env->xer & ~0x7F) | i;
3520    if (update_Rc) {
3521        env->crf[0] |= xer_so;
3522    }
3523    return i;
3524}
3525