qemu/target/ppc/int_helper.c
<<
>>
Prefs
   1/*
   2 *  PowerPC integer and vector emulation helpers for QEMU.
   3 *
   4 *  Copyright (c) 2003-2007 Jocelyn Mayer
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2.1 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include "cpu.h"
  22#include "internal.h"
  23#include "qemu/host-utils.h"
  24#include "qemu/main-loop.h"
  25#include "qemu/log.h"
  26#include "exec/helper-proto.h"
  27#include "crypto/aes.h"
  28#include "crypto/aes-round.h"
  29#include "fpu/softfloat.h"
  30#include "qapi/error.h"
  31#include "qemu/guest-random.h"
  32#include "tcg/tcg-gvec-desc.h"
  33
  34#include "helper_regs.h"
  35/*****************************************************************************/
  36/* Fixed point operations helpers */
  37
  38static inline void helper_update_ov_legacy(CPUPPCState *env, int ov)
  39{
  40    if (unlikely(ov)) {
  41        env->so = env->ov = env->ov32 = 1;
  42    } else {
  43        env->ov = env->ov32 = 0;
  44    }
  45}
  46
  47target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
  48                           uint32_t oe)
  49{
  50    uint64_t rt = 0;
  51    int overflow = 0;
  52
  53    uint64_t dividend = (uint64_t)ra << 32;
  54    uint64_t divisor = (uint32_t)rb;
  55
  56    if (unlikely(divisor == 0)) {
  57        overflow = 1;
  58    } else {
  59        rt = dividend / divisor;
  60        overflow = rt > UINT32_MAX;
  61    }
  62
  63    if (unlikely(overflow)) {
  64        rt = 0; /* Undefined */
  65    }
  66
  67    if (oe) {
  68        helper_update_ov_legacy(env, overflow);
  69    }
  70
  71    return (target_ulong)rt;
  72}
  73
  74target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
  75                          uint32_t oe)
  76{
  77    int64_t rt = 0;
  78    int overflow = 0;
  79
  80    int64_t dividend = (int64_t)ra << 32;
  81    int64_t divisor = (int64_t)((int32_t)rb);
  82
  83    if (unlikely((divisor == 0) ||
  84                 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
  85        overflow = 1;
  86    } else {
  87        rt = dividend / divisor;
  88        overflow = rt != (int32_t)rt;
  89    }
  90
  91    if (unlikely(overflow)) {
  92        rt = 0; /* Undefined */
  93    }
  94
  95    if (oe) {
  96        helper_update_ov_legacy(env, overflow);
  97    }
  98
  99    return (target_ulong)rt;
 100}
 101
 102#if defined(TARGET_PPC64)
 103
 104uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
 105{
 106    uint64_t rt = 0;
 107    int overflow = 0;
 108
 109    if (unlikely(rb == 0 || ra >= rb)) {
 110        overflow = 1;
 111        rt = 0; /* Undefined */
 112    } else {
 113        divu128(&rt, &ra, rb);
 114    }
 115
 116    if (oe) {
 117        helper_update_ov_legacy(env, overflow);
 118    }
 119
 120    return rt;
 121}
 122
 123uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
 124{
 125    uint64_t rt = 0;
 126    int64_t ra = (int64_t)rau;
 127    int64_t rb = (int64_t)rbu;
 128    int overflow = 0;
 129
 130    if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) {
 131        overflow = 1;
 132        rt = 0; /* Undefined */
 133    } else {
 134        divs128(&rt, &ra, rb);
 135    }
 136
 137    if (oe) {
 138        helper_update_ov_legacy(env, overflow);
 139    }
 140
 141    return rt;
 142}
 143
 144#endif
 145
 146
 147#if defined(TARGET_PPC64)
 148/* if x = 0xab, returns 0xababababababababa */
 149#define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
 150
 151/*
 152 * subtract 1 from each byte, and with inverse, check if MSB is set at each
 153 * byte.
 154 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
 155 *      (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
 156 */
 157#define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
 158
 159/* When you XOR the pattern and there is a match, that byte will be zero */
 160#define hasvalue(x, n)  (haszero((x) ^ pattern(n)))
 161
 162uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
 163{
 164    return hasvalue(rb, ra) ? CRF_GT : 0;
 165}
 166
 167#undef pattern
 168#undef haszero
 169#undef hasvalue
 170
 171/*
 172 * Return a random number.
 173 */
 174uint64_t helper_darn32(void)
 175{
 176    Error *err = NULL;
 177    uint32_t ret;
 178
 179    if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
 180        qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
 181                      error_get_pretty(err));
 182        error_free(err);
 183        return -1;
 184    }
 185
 186    return ret;
 187}
 188
 189uint64_t helper_darn64(void)
 190{
 191    Error *err = NULL;
 192    uint64_t ret;
 193
 194    if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
 195        qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
 196                      error_get_pretty(err));
 197        error_free(err);
 198        return -1;
 199    }
 200
 201    return ret;
 202}
 203
 204uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
 205{
 206    int i;
 207    uint64_t ra = 0;
 208
 209    for (i = 0; i < 8; i++) {
 210        int index = (rs >> (i * 8)) & 0xFF;
 211        if (index < 64) {
 212            if (rb & PPC_BIT(index)) {
 213                ra |= 1 << i;
 214            }
 215        }
 216    }
 217    return ra;
 218}
 219
 220#endif
 221
 222target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
 223{
 224    target_ulong mask = 0xff;
 225    target_ulong ra = 0;
 226    int i;
 227
 228    for (i = 0; i < sizeof(target_ulong); i++) {
 229        if ((rs & mask) == (rb & mask)) {
 230            ra |= mask;
 231        }
 232        mask <<= 8;
 233    }
 234    return ra;
 235}
 236
 237/* shift right arithmetic helper */
 238target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
 239                         target_ulong shift)
 240{
 241    int32_t ret;
 242
 243    if (likely(!(shift & 0x20))) {
 244        if (likely((uint32_t)shift != 0)) {
 245            shift &= 0x1f;
 246            ret = (int32_t)value >> shift;
 247            if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
 248                env->ca32 = env->ca = 0;
 249            } else {
 250                env->ca32 = env->ca = 1;
 251            }
 252        } else {
 253            ret = (int32_t)value;
 254            env->ca32 = env->ca = 0;
 255        }
 256    } else {
 257        ret = (int32_t)value >> 31;
 258        env->ca32 = env->ca = (ret != 0);
 259    }
 260    return (target_long)ret;
 261}
 262
 263#if defined(TARGET_PPC64)
 264target_ulong helper_srad(CPUPPCState *env, target_ulong value,
 265                         target_ulong shift)
 266{
 267    int64_t ret;
 268
 269    if (likely(!(shift & 0x40))) {
 270        if (likely((uint64_t)shift != 0)) {
 271            shift &= 0x3f;
 272            ret = (int64_t)value >> shift;
 273            if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
 274                env->ca32 = env->ca = 0;
 275            } else {
 276                env->ca32 = env->ca = 1;
 277            }
 278        } else {
 279            ret = (int64_t)value;
 280            env->ca32 = env->ca = 0;
 281        }
 282    } else {
 283        ret = (int64_t)value >> 63;
 284        env->ca32 = env->ca = (ret != 0);
 285    }
 286    return ret;
 287}
 288#endif
 289
 290#if defined(TARGET_PPC64)
 291target_ulong helper_popcntb(target_ulong val)
 292{
 293    /* Note that we don't fold past bytes */
 294    val = (val & 0x5555555555555555ULL) + ((val >>  1) &
 295                                           0x5555555555555555ULL);
 296    val = (val & 0x3333333333333333ULL) + ((val >>  2) &
 297                                           0x3333333333333333ULL);
 298    val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >>  4) &
 299                                           0x0f0f0f0f0f0f0f0fULL);
 300    return val;
 301}
 302
 303target_ulong helper_popcntw(target_ulong val)
 304{
 305    /* Note that we don't fold past words.  */
 306    val = (val & 0x5555555555555555ULL) + ((val >>  1) &
 307                                           0x5555555555555555ULL);
 308    val = (val & 0x3333333333333333ULL) + ((val >>  2) &
 309                                           0x3333333333333333ULL);
 310    val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >>  4) &
 311                                           0x0f0f0f0f0f0f0f0fULL);
 312    val = (val & 0x00ff00ff00ff00ffULL) + ((val >>  8) &
 313                                           0x00ff00ff00ff00ffULL);
 314    val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
 315                                           0x0000ffff0000ffffULL);
 316    return val;
 317}
 318#else
 319target_ulong helper_popcntb(target_ulong val)
 320{
 321    /* Note that we don't fold past bytes */
 322    val = (val & 0x55555555) + ((val >>  1) & 0x55555555);
 323    val = (val & 0x33333333) + ((val >>  2) & 0x33333333);
 324    val = (val & 0x0f0f0f0f) + ((val >>  4) & 0x0f0f0f0f);
 325    return val;
 326}
 327#endif
 328
 329uint64_t helper_CFUGED(uint64_t src, uint64_t mask)
 330{
 331    /*
 332     * Instead of processing the mask bit-by-bit from the most significant to
 333     * the least significant bit, as described in PowerISA, we'll handle it in
 334     * blocks of 'n' zeros/ones from LSB to MSB. To avoid the decision to use
 335     * ctz or cto, we negate the mask at the end of the loop.
 336     */
 337    target_ulong m, left = 0, right = 0;
 338    unsigned int n, i = 64;
 339    bool bit = false; /* tracks if we are processing zeros or ones */
 340
 341    if (mask == 0 || mask == -1) {
 342        return src;
 343    }
 344
 345    /* Processes the mask in blocks, from LSB to MSB */
 346    while (i) {
 347        /* Find how many bits we should take */
 348        n = ctz64(mask);
 349        if (n > i) {
 350            n = i;
 351        }
 352
 353        /*
 354         * Extracts 'n' trailing bits of src and put them on the leading 'n'
 355         * bits of 'right' or 'left', pushing down the previously extracted
 356         * values.
 357         */
 358        m = (1ll << n) - 1;
 359        if (bit) {
 360            right = ror64(right | (src & m), n);
 361        } else {
 362            left = ror64(left | (src & m), n);
 363        }
 364
 365        /*
 366         * Discards the processed bits from 'src' and 'mask'. Note that we are
 367         * removing 'n' trailing zeros from 'mask', but the logical shift will
 368         * add 'n' leading zeros back, so the population count of 'mask' is kept
 369         * the same.
 370         */
 371        src >>= n;
 372        mask >>= n;
 373        i -= n;
 374        bit = !bit;
 375        mask = ~mask;
 376    }
 377
 378    /*
 379     * At the end, right was ror'ed ctpop(mask) times. To put it back in place,
 380     * we'll shift it more 64-ctpop(mask) times.
 381     */
 382    if (bit) {
 383        n = ctpop64(mask);
 384    } else {
 385        n = 64 - ctpop64(mask);
 386    }
 387
 388    return left | (right >> n);
 389}
 390
 391uint64_t helper_PDEPD(uint64_t src, uint64_t mask)
 392{
 393    int i, o;
 394    uint64_t result = 0;
 395
 396    if (mask == -1) {
 397        return src;
 398    }
 399
 400    for (i = 0; mask != 0; i++) {
 401        o = ctz64(mask);
 402        mask &= mask - 1;
 403        result |= ((src >> i) & 1) << o;
 404    }
 405
 406    return result;
 407}
 408
 409uint64_t helper_PEXTD(uint64_t src, uint64_t mask)
 410{
 411    int i, o;
 412    uint64_t result = 0;
 413
 414    if (mask == -1) {
 415        return src;
 416    }
 417
 418    for (o = 0; mask != 0; o++) {
 419        i = ctz64(mask);
 420        mask &= mask - 1;
 421        result |= ((src >> i) & 1) << o;
 422    }
 423
 424    return result;
 425}
 426
 427/*****************************************************************************/
 428/* Altivec extension helpers */
 429#if HOST_BIG_ENDIAN
 430#define VECTOR_FOR_INORDER_I(index, element)                    \
 431    for (index = 0; index < ARRAY_SIZE(r->element); index++)
 432#else
 433#define VECTOR_FOR_INORDER_I(index, element)                    \
 434    for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--)
 435#endif
 436
 437/* Saturating arithmetic helpers.  */
 438#define SATCVT(from, to, from_type, to_type, min, max)          \
 439    static inline to_type cvt##from##to(from_type x, int *sat)  \
 440    {                                                           \
 441        to_type r;                                              \
 442                                                                \
 443        if (x < (from_type)min) {                               \
 444            r = min;                                            \
 445            *sat = 1;                                           \
 446        } else if (x > (from_type)max) {                        \
 447            r = max;                                            \
 448            *sat = 1;                                           \
 449        } else {                                                \
 450            r = x;                                              \
 451        }                                                       \
 452        return r;                                               \
 453    }
 454#define SATCVTU(from, to, from_type, to_type, min, max)         \
 455    static inline to_type cvt##from##to(from_type x, int *sat)  \
 456    {                                                           \
 457        to_type r;                                              \
 458                                                                \
 459        if (x > (from_type)max) {                               \
 460            r = max;                                            \
 461            *sat = 1;                                           \
 462        } else {                                                \
 463            r = x;                                              \
 464        }                                                       \
 465        return r;                                               \
 466    }
 467SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
 468SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
 469SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
 470
 471SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
 472SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
 473SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
 474SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
 475SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
 476SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
 477#undef SATCVT
 478#undef SATCVTU
 479
 480void helper_mtvscr(CPUPPCState *env, uint32_t vscr)
 481{
 482    ppc_store_vscr(env, vscr);
 483}
 484
 485uint32_t helper_mfvscr(CPUPPCState *env)
 486{
 487    return ppc_get_vscr(env);
 488}
 489
 490static inline void set_vscr_sat(CPUPPCState *env)
 491{
 492    /* The choice of non-zero value is arbitrary.  */
 493    env->vscr_sat.u32[0] = 1;
 494}
 495
 496/* vprtybq */
 497void helper_VPRTYBQ(ppc_avr_t *r, ppc_avr_t *b, uint32_t v)
 498{
 499    uint64_t res = b->u64[0] ^ b->u64[1];
 500    res ^= res >> 32;
 501    res ^= res >> 16;
 502    res ^= res >> 8;
 503    r->VsrD(1) = res & 1;
 504    r->VsrD(0) = 0;
 505}
 506
 507#define VARITHFP(suffix, func)                                          \
 508    void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
 509                          ppc_avr_t *b)                                 \
 510    {                                                                   \
 511        int i;                                                          \
 512                                                                        \
 513        for (i = 0; i < ARRAY_SIZE(r->f32); i++) {                      \
 514            r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status);   \
 515        }                                                               \
 516    }
 517VARITHFP(addfp, float32_add)
 518VARITHFP(subfp, float32_sub)
 519VARITHFP(minfp, float32_min)
 520VARITHFP(maxfp, float32_max)
 521#undef VARITHFP
 522
 523#define VARITHFPFMA(suffix, type)                                       \
 524    void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
 525                           ppc_avr_t *b, ppc_avr_t *c)                  \
 526    {                                                                   \
 527        int i;                                                          \
 528        for (i = 0; i < ARRAY_SIZE(r->f32); i++) {                      \
 529            r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \
 530                                       type, &env->vec_status);         \
 531        }                                                               \
 532    }
 533VARITHFPFMA(maddfp, 0);
 534VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
 535#undef VARITHFPFMA
 536
 537#define VARITHSAT_CASE(type, op, cvt, element)                          \
 538    {                                                                   \
 539        type result = (type)a->element[i] op (type)b->element[i];       \
 540        r->element[i] = cvt(result, &sat);                              \
 541    }
 542
 543#define VARITHSAT_DO(name, op, optype, cvt, element)                    \
 544    void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat,              \
 545                        ppc_avr_t *a, ppc_avr_t *b, uint32_t desc)      \
 546    {                                                                   \
 547        int sat = 0;                                                    \
 548        int i;                                                          \
 549                                                                        \
 550        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
 551            VARITHSAT_CASE(optype, op, cvt, element);                   \
 552        }                                                               \
 553        if (sat) {                                                      \
 554            vscr_sat->u32[0] = 1;                                       \
 555        }                                                               \
 556    }
 557#define VARITHSAT_SIGNED(suffix, element, optype, cvt)          \
 558    VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element)      \
 559    VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
 560#define VARITHSAT_UNSIGNED(suffix, element, optype, cvt)        \
 561    VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element)      \
 562    VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
 563VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
 564VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
 565VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
 566VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
 567VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
 568VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
 569#undef VARITHSAT_CASE
 570#undef VARITHSAT_DO
 571#undef VARITHSAT_SIGNED
 572#undef VARITHSAT_UNSIGNED
 573
 574#define VAVG(name, element, etype)                                          \
 575    void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t v)\
 576    {                                                                       \
 577        int i;                                                              \
 578                                                                            \
 579        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                      \
 580            etype x = (etype)a->element[i] + (etype)b->element[i] + 1;      \
 581            r->element[i] = x >> 1;                                         \
 582        }                                                                   \
 583    }
 584
 585VAVG(VAVGSB, s8, int16_t)
 586VAVG(VAVGUB, u8, uint16_t)
 587VAVG(VAVGSH, s16, int32_t)
 588VAVG(VAVGUH, u16, uint32_t)
 589VAVG(VAVGSW, s32, int64_t)
 590VAVG(VAVGUW, u32, uint64_t)
 591#undef VAVG
 592
 593#define VABSDU(name, element)                                           \
 594void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t v)\
 595{                                                                       \
 596    int i;                                                              \
 597                                                                        \
 598    for (i = 0; i < ARRAY_SIZE(r->element); i++) {                      \
 599        r->element[i] = (a->element[i] > b->element[i]) ?               \
 600            (a->element[i] - b->element[i]) :                           \
 601            (b->element[i] - a->element[i]);                            \
 602    }                                                                   \
 603}
 604
 605/*
 606 * VABSDU - Vector absolute difference unsigned
 607 *   name    - instruction mnemonic suffix (b: byte, h: halfword, w: word)
 608 *   element - element type to access from vector
 609 */
 610VABSDU(VABSDUB, u8)
 611VABSDU(VABSDUH, u16)
 612VABSDU(VABSDUW, u32)
 613#undef VABSDU
 614
 615#define VCF(suffix, cvt, element)                                       \
 616    void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r,             \
 617                            ppc_avr_t *b, uint32_t uim)                 \
 618    {                                                                   \
 619        int i;                                                          \
 620                                                                        \
 621        for (i = 0; i < ARRAY_SIZE(r->f32); i++) {                      \
 622            float32 t = cvt(b->element[i], &env->vec_status);           \
 623            r->f32[i] = float32_scalbn(t, -uim, &env->vec_status);      \
 624        }                                                               \
 625    }
 626VCF(ux, uint32_to_float32, u32)
 627VCF(sx, int32_to_float32, s32)
 628#undef VCF
 629
 630#define VCMPNEZ(NAME, ELEM) \
 631void helper_##NAME(ppc_vsr_t *t, ppc_vsr_t *a, ppc_vsr_t *b, uint32_t desc) \
 632{                                                                           \
 633    for (int i = 0; i < ARRAY_SIZE(t->ELEM); i++) {                         \
 634        t->ELEM[i] = ((a->ELEM[i] == 0) || (b->ELEM[i] == 0) ||             \
 635                      (a->ELEM[i] != b->ELEM[i])) ? -1 : 0;                 \
 636    }                                                                       \
 637}
 638VCMPNEZ(VCMPNEZB, u8)
 639VCMPNEZ(VCMPNEZH, u16)
 640VCMPNEZ(VCMPNEZW, u32)
 641#undef VCMPNEZ
 642
 643#define VCMPFP_DO(suffix, compare, order, record)                       \
 644    void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r,            \
 645                             ppc_avr_t *a, ppc_avr_t *b)                \
 646    {                                                                   \
 647        uint32_t ones = (uint32_t)-1;                                   \
 648        uint32_t all = ones;                                            \
 649        uint32_t none = 0;                                              \
 650        int i;                                                          \
 651                                                                        \
 652        for (i = 0; i < ARRAY_SIZE(r->f32); i++) {                      \
 653            uint32_t result;                                            \
 654            FloatRelation rel =                                         \
 655                float32_compare_quiet(a->f32[i], b->f32[i],             \
 656                                      &env->vec_status);                \
 657            if (rel == float_relation_unordered) {                      \
 658                result = 0;                                             \
 659            } else if (rel compare order) {                             \
 660                result = ones;                                          \
 661            } else {                                                    \
 662                result = 0;                                             \
 663            }                                                           \
 664            r->u32[i] = result;                                         \
 665            all &= result;                                              \
 666            none |= result;                                             \
 667        }                                                               \
 668        if (record) {                                                   \
 669            env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1);       \
 670        }                                                               \
 671    }
 672#define VCMPFP(suffix, compare, order)          \
 673    VCMPFP_DO(suffix, compare, order, 0)        \
 674    VCMPFP_DO(suffix##_dot, compare, order, 1)
 675VCMPFP(eqfp, ==, float_relation_equal)
 676VCMPFP(gefp, !=, float_relation_less)
 677VCMPFP(gtfp, ==, float_relation_greater)
 678#undef VCMPFP_DO
 679#undef VCMPFP
 680
 681static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
 682                                    ppc_avr_t *a, ppc_avr_t *b, int record)
 683{
 684    int i;
 685    int all_in = 0;
 686
 687    for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
 688        FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i],
 689                                                     &env->vec_status);
 690        if (le_rel == float_relation_unordered) {
 691            r->u32[i] = 0xc0000000;
 692            all_in = 1;
 693        } else {
 694            float32 bneg = float32_chs(b->f32[i]);
 695            FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg,
 696                                                         &env->vec_status);
 697            int le = le_rel != float_relation_greater;
 698            int ge = ge_rel != float_relation_less;
 699
 700            r->u32[i] = ((!le) << 31) | ((!ge) << 30);
 701            all_in |= (!le | !ge);
 702        }
 703    }
 704    if (record) {
 705        env->crf[6] = (all_in == 0) << 1;
 706    }
 707}
 708
 709void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
 710{
 711    vcmpbfp_internal(env, r, a, b, 0);
 712}
 713
 714void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
 715                        ppc_avr_t *b)
 716{
 717    vcmpbfp_internal(env, r, a, b, 1);
 718}
 719
 720#define VCT(suffix, satcvt, element)                                    \
 721    void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r,             \
 722                            ppc_avr_t *b, uint32_t uim)                 \
 723    {                                                                   \
 724        int i;                                                          \
 725        int sat = 0;                                                    \
 726        float_status s = env->vec_status;                               \
 727                                                                        \
 728        set_float_rounding_mode(float_round_to_zero, &s);               \
 729        for (i = 0; i < ARRAY_SIZE(r->f32); i++) {                      \
 730            if (float32_is_any_nan(b->f32[i])) {                        \
 731                r->element[i] = 0;                                      \
 732            } else {                                                    \
 733                float64 t = float32_to_float64(b->f32[i], &s);          \
 734                int64_t j;                                              \
 735                                                                        \
 736                t = float64_scalbn(t, uim, &s);                         \
 737                j = float64_to_int64(t, &s);                            \
 738                r->element[i] = satcvt(j, &sat);                        \
 739            }                                                           \
 740        }                                                               \
 741        if (sat) {                                                      \
 742            set_vscr_sat(env);                                          \
 743        }                                                               \
 744    }
 745VCT(uxs, cvtsduw, u32)
 746VCT(sxs, cvtsdsw, s32)
 747#undef VCT
 748
 749typedef int64_t do_ger(uint32_t, uint32_t, uint32_t);
 750
 751static int64_t ger_rank8(uint32_t a, uint32_t b, uint32_t mask)
 752{
 753    int64_t psum = 0;
 754    for (int i = 0; i < 8; i++, mask >>= 1) {
 755        if (mask & 1) {
 756            psum += (int64_t)sextract32(a, 4 * i, 4) * sextract32(b, 4 * i, 4);
 757        }
 758    }
 759    return psum;
 760}
 761
 762static int64_t ger_rank4(uint32_t a, uint32_t b, uint32_t mask)
 763{
 764    int64_t psum = 0;
 765    for (int i = 0; i < 4; i++, mask >>= 1) {
 766        if (mask & 1) {
 767            psum += sextract32(a, 8 * i, 8) * (int64_t)extract32(b, 8 * i, 8);
 768        }
 769    }
 770    return psum;
 771}
 772
 773static int64_t ger_rank2(uint32_t a, uint32_t b, uint32_t mask)
 774{
 775    int64_t psum = 0;
 776    for (int i = 0; i < 2; i++, mask >>= 1) {
 777        if (mask & 1) {
 778            psum += (int64_t)sextract32(a, 16 * i, 16) *
 779                             sextract32(b, 16 * i, 16);
 780        }
 781    }
 782    return psum;
 783}
 784
 785static void xviger(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b, ppc_acc_t  *at,
 786                   uint32_t mask, bool sat, bool acc, do_ger ger)
 787{
 788    uint8_t pmsk = FIELD_EX32(mask, GER_MSK, PMSK),
 789            xmsk = FIELD_EX32(mask, GER_MSK, XMSK),
 790            ymsk = FIELD_EX32(mask, GER_MSK, YMSK);
 791    uint8_t xmsk_bit, ymsk_bit;
 792    int64_t psum;
 793    int i, j;
 794    for (i = 0, xmsk_bit = 1 << 3; i < 4; i++, xmsk_bit >>= 1) {
 795        for (j = 0, ymsk_bit = 1 << 3; j < 4; j++, ymsk_bit >>= 1) {
 796            if ((xmsk_bit & xmsk) && (ymsk_bit & ymsk)) {
 797                psum = ger(a->VsrW(i), b->VsrW(j), pmsk);
 798                if (acc) {
 799                    psum += at[i].VsrSW(j);
 800                }
 801                if (sat && psum > INT32_MAX) {
 802                    set_vscr_sat(env);
 803                    at[i].VsrSW(j) = INT32_MAX;
 804                } else if (sat && psum < INT32_MIN) {
 805                    set_vscr_sat(env);
 806                    at[i].VsrSW(j) = INT32_MIN;
 807                } else {
 808                    at[i].VsrSW(j) = (int32_t) psum;
 809                }
 810            } else {
 811                at[i].VsrSW(j) = 0;
 812            }
 813        }
 814    }
 815}
 816
 817QEMU_FLATTEN
 818void helper_XVI4GER8(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
 819                     ppc_acc_t *at, uint32_t mask)
 820{
 821    xviger(env, a, b, at, mask, false, false, ger_rank8);
 822}
 823
 824QEMU_FLATTEN
 825void helper_XVI4GER8PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
 826                       ppc_acc_t *at, uint32_t mask)
 827{
 828    xviger(env, a, b, at, mask, false, true, ger_rank8);
 829}
 830
 831QEMU_FLATTEN
 832void helper_XVI8GER4(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
 833                     ppc_acc_t *at, uint32_t mask)
 834{
 835    xviger(env, a, b, at, mask, false, false, ger_rank4);
 836}
 837
 838QEMU_FLATTEN
 839void helper_XVI8GER4PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
 840                       ppc_acc_t *at, uint32_t mask)
 841{
 842    xviger(env, a, b, at, mask, false, true, ger_rank4);
 843}
 844
 845QEMU_FLATTEN
 846void helper_XVI8GER4SPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
 847                        ppc_acc_t *at, uint32_t mask)
 848{
 849    xviger(env, a, b, at, mask, true, true, ger_rank4);
 850}
 851
 852QEMU_FLATTEN
 853void helper_XVI16GER2(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
 854                      ppc_acc_t *at, uint32_t mask)
 855{
 856    xviger(env, a, b, at, mask, false, false, ger_rank2);
 857}
 858
 859QEMU_FLATTEN
 860void helper_XVI16GER2S(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
 861                       ppc_acc_t *at, uint32_t mask)
 862{
 863    xviger(env, a, b, at, mask, true, false, ger_rank2);
 864}
 865
 866QEMU_FLATTEN
 867void helper_XVI16GER2PP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
 868                        ppc_acc_t *at, uint32_t mask)
 869{
 870    xviger(env, a, b, at, mask, false, true, ger_rank2);
 871}
 872
 873QEMU_FLATTEN
 874void helper_XVI16GER2SPP(CPUPPCState *env, ppc_vsr_t *a, ppc_vsr_t *b,
 875                         ppc_acc_t *at, uint32_t mask)
 876{
 877    xviger(env, a, b, at, mask, true, true, ger_rank2);
 878}
 879
 880target_ulong helper_vclzlsbb(ppc_avr_t *r)
 881{
 882    target_ulong count = 0;
 883    int i;
 884    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
 885        if (r->VsrB(i) & 0x01) {
 886            break;
 887        }
 888        count++;
 889    }
 890    return count;
 891}
 892
 893target_ulong helper_vctzlsbb(ppc_avr_t *r)
 894{
 895    target_ulong count = 0;
 896    int i;
 897    for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
 898        if (r->VsrB(i) & 0x01) {
 899            break;
 900        }
 901        count++;
 902    }
 903    return count;
 904}
 905
 906void helper_VMHADDSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
 907                      ppc_avr_t *b, ppc_avr_t *c)
 908{
 909    int sat = 0;
 910    int i;
 911
 912    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
 913        int32_t prod = a->s16[i] * b->s16[i];
 914        int32_t t = (int32_t)c->s16[i] + (prod >> 15);
 915
 916        r->s16[i] = cvtswsh(t, &sat);
 917    }
 918
 919    if (sat) {
 920        set_vscr_sat(env);
 921    }
 922}
 923
 924void helper_VMHRADDSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
 925                       ppc_avr_t *b, ppc_avr_t *c)
 926{
 927    int sat = 0;
 928    int i;
 929
 930    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
 931        int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
 932        int32_t t = (int32_t)c->s16[i] + (prod >> 15);
 933        r->s16[i] = cvtswsh(t, &sat);
 934    }
 935
 936    if (sat) {
 937        set_vscr_sat(env);
 938    }
 939}
 940
 941void helper_VMLADDUHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c,
 942                      uint32_t v)
 943{
 944    int i;
 945
 946    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
 947        int32_t prod = a->s16[i] * b->s16[i];
 948        r->s16[i] = (int16_t) (prod + c->s16[i]);
 949    }
 950}
 951
 952#define VMRG_DO(name, element, access, ofs)                                  \
 953    void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)            \
 954    {                                                                        \
 955        ppc_avr_t result;                                                    \
 956        int i, half = ARRAY_SIZE(r->element) / 2;                            \
 957                                                                             \
 958        for (i = 0; i < half; i++) {                                         \
 959            result.access(i * 2 + 0) = a->access(i + ofs);                   \
 960            result.access(i * 2 + 1) = b->access(i + ofs);                   \
 961        }                                                                    \
 962        *r = result;                                                         \
 963    }
 964
 965#define VMRG(suffix, element, access)          \
 966    VMRG_DO(mrgl##suffix, element, access, half)   \
 967    VMRG_DO(mrgh##suffix, element, access, 0)
 968VMRG(b, u8, VsrB)
 969VMRG(h, u16, VsrH)
 970VMRG(w, u32, VsrW)
 971#undef VMRG_DO
 972#undef VMRG
 973
 974void helper_VMSUMMBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
 975{
 976    int32_t prod[16];
 977    int i;
 978
 979    for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
 980        prod[i] = (int32_t)a->s8[i] * b->u8[i];
 981    }
 982
 983    VECTOR_FOR_INORDER_I(i, s32) {
 984        r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
 985            prod[4 * i + 2] + prod[4 * i + 3];
 986    }
 987}
 988
 989void helper_VMSUMSHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
 990{
 991    int32_t prod[8];
 992    int i;
 993
 994    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
 995        prod[i] = a->s16[i] * b->s16[i];
 996    }
 997
 998    VECTOR_FOR_INORDER_I(i, s32) {
 999        r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1000    }
1001}
1002
1003void helper_VMSUMSHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1004                     ppc_avr_t *b, ppc_avr_t *c)
1005{
1006    int32_t prod[8];
1007    int i;
1008    int sat = 0;
1009
1010    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1011        prod[i] = (int32_t)a->s16[i] * b->s16[i];
1012    }
1013
1014    VECTOR_FOR_INORDER_I(i, s32) {
1015        int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1016
1017        r->u32[i] = cvtsdsw(t, &sat);
1018    }
1019
1020    if (sat) {
1021        set_vscr_sat(env);
1022    }
1023}
1024
1025void helper_VMSUMUBM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1026{
1027    uint16_t prod[16];
1028    int i;
1029
1030    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1031        prod[i] = a->u8[i] * b->u8[i];
1032    }
1033
1034    VECTOR_FOR_INORDER_I(i, u32) {
1035        r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
1036            prod[4 * i + 2] + prod[4 * i + 3];
1037    }
1038}
1039
1040void helper_VMSUMUHM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1041{
1042    uint32_t prod[8];
1043    int i;
1044
1045    for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1046        prod[i] = a->u16[i] * b->u16[i];
1047    }
1048
1049    VECTOR_FOR_INORDER_I(i, u32) {
1050        r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1051    }
1052}
1053
1054void helper_VMSUMUHS(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1055                     ppc_avr_t *b, ppc_avr_t *c)
1056{
1057    uint32_t prod[8];
1058    int i;
1059    int sat = 0;
1060
1061    for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1062        prod[i] = a->u16[i] * b->u16[i];
1063    }
1064
1065    VECTOR_FOR_INORDER_I(i, s32) {
1066        uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1067
1068        r->u32[i] = cvtuduw(t, &sat);
1069    }
1070
1071    if (sat) {
1072        set_vscr_sat(env);
1073    }
1074}
1075
1076#define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast)   \
1077    void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
1078    {                                                                   \
1079        int i;                                                          \
1080                                                                        \
1081        for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) {           \
1082            r->prod_access(i >> 1) = (cast)a->mul_access(i) *           \
1083                                     (cast)b->mul_access(i);            \
1084        }                                                               \
1085    }
1086
1087#define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast)   \
1088    void helper_V##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
1089    {                                                                   \
1090        int i;                                                          \
1091                                                                        \
1092        for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) {           \
1093            r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) *       \
1094                                     (cast)b->mul_access(i + 1);        \
1095        }                                                               \
1096    }
1097
1098#define VMUL(suffix, mul_element, mul_access, prod_access, cast)       \
1099    VMUL_DO_EVN(MULE##suffix, mul_element, mul_access, prod_access, cast)  \
1100    VMUL_DO_ODD(MULO##suffix, mul_element, mul_access, prod_access, cast)
1101VMUL(SB, s8, VsrSB, VsrSH, int16_t)
1102VMUL(SH, s16, VsrSH, VsrSW, int32_t)
1103VMUL(SW, s32, VsrSW, VsrSD, int64_t)
1104VMUL(UB, u8, VsrB, VsrH, uint16_t)
1105VMUL(UH, u16, VsrH, VsrW, uint32_t)
1106VMUL(UW, u32, VsrW, VsrD, uint64_t)
1107#undef VMUL_DO_EVN
1108#undef VMUL_DO_ODD
1109#undef VMUL
1110
1111void helper_XXPERMX(ppc_vsr_t *t, ppc_vsr_t *s0, ppc_vsr_t *s1, ppc_vsr_t *pcv,
1112                    target_ulong uim)
1113{
1114    int i, idx;
1115    ppc_vsr_t tmp = { .u64 = {0, 0} };
1116
1117    for (i = 0; i < ARRAY_SIZE(t->u8); i++) {
1118        if ((pcv->VsrB(i) >> 5) == uim) {
1119            idx = pcv->VsrB(i) & 0x1f;
1120            if (idx < ARRAY_SIZE(t->u8)) {
1121                tmp.VsrB(i) = s0->VsrB(idx);
1122            } else {
1123                tmp.VsrB(i) = s1->VsrB(idx - ARRAY_SIZE(t->u8));
1124            }
1125        }
1126    }
1127
1128    *t = tmp;
1129}
1130
1131void helper_VDIVSQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1132{
1133    Int128 neg1 = int128_makes64(-1);
1134    Int128 int128_min = int128_make128(0, INT64_MIN);
1135    if (likely(int128_nz(b->s128) &&
1136              (int128_ne(a->s128, int128_min) || int128_ne(b->s128, neg1)))) {
1137        t->s128 = int128_divs(a->s128, b->s128);
1138    } else {
1139        t->s128 = a->s128; /* Undefined behavior */
1140    }
1141}
1142
1143void helper_VDIVUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1144{
1145    if (int128_nz(b->s128)) {
1146        t->s128 = int128_divu(a->s128, b->s128);
1147    } else {
1148        t->s128 = a->s128; /* Undefined behavior */
1149    }
1150}
1151
1152void helper_VDIVESD(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1153{
1154    int i;
1155    int64_t high;
1156    uint64_t low;
1157    for (i = 0; i < 2; i++) {
1158        high = a->s64[i];
1159        low = 0;
1160        if (unlikely((high == INT64_MIN && b->s64[i] == -1) || !b->s64[i])) {
1161            t->s64[i] = a->s64[i]; /* Undefined behavior */
1162        } else {
1163            divs128(&low, &high, b->s64[i]);
1164            t->s64[i] = low;
1165        }
1166    }
1167}
1168
1169void helper_VDIVEUD(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1170{
1171    int i;
1172    uint64_t high, low;
1173    for (i = 0; i < 2; i++) {
1174        high = a->u64[i];
1175        low = 0;
1176        if (unlikely(!b->u64[i])) {
1177            t->u64[i] = a->u64[i]; /* Undefined behavior */
1178        } else {
1179            divu128(&low, &high, b->u64[i]);
1180            t->u64[i] = low;
1181        }
1182    }
1183}
1184
1185void helper_VDIVESQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1186{
1187    Int128 high, low;
1188    Int128 int128_min = int128_make128(0, INT64_MIN);
1189    Int128 neg1 = int128_makes64(-1);
1190
1191    high = a->s128;
1192    low = int128_zero();
1193    if (unlikely(!int128_nz(b->s128) ||
1194                 (int128_eq(b->s128, neg1) && int128_eq(high, int128_min)))) {
1195        t->s128 = a->s128; /* Undefined behavior */
1196    } else {
1197        divs256(&low, &high, b->s128);
1198        t->s128 = low;
1199    }
1200}
1201
1202void helper_VDIVEUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1203{
1204    Int128 high, low;
1205
1206    high = a->s128;
1207    low = int128_zero();
1208    if (unlikely(!int128_nz(b->s128))) {
1209        t->s128 = a->s128; /* Undefined behavior */
1210    } else {
1211        divu256(&low, &high, b->s128);
1212        t->s128 = low;
1213    }
1214}
1215
1216void helper_VMODSQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1217{
1218    Int128 neg1 = int128_makes64(-1);
1219    Int128 int128_min = int128_make128(0, INT64_MIN);
1220    if (likely(int128_nz(b->s128) &&
1221              (int128_ne(a->s128, int128_min) || int128_ne(b->s128, neg1)))) {
1222        t->s128 = int128_rems(a->s128, b->s128);
1223    } else {
1224        t->s128 = int128_zero(); /* Undefined behavior */
1225    }
1226}
1227
1228void helper_VMODUQ(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b)
1229{
1230    if (likely(int128_nz(b->s128))) {
1231        t->s128 = int128_remu(a->s128, b->s128);
1232    } else {
1233        t->s128 = int128_zero(); /* Undefined behavior */
1234    }
1235}
1236
1237void helper_VPERM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1238{
1239    ppc_avr_t result;
1240    int i;
1241
1242    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1243        int s = c->VsrB(i) & 0x1f;
1244        int index = s & 0xf;
1245
1246        if (s & 0x10) {
1247            result.VsrB(i) = b->VsrB(index);
1248        } else {
1249            result.VsrB(i) = a->VsrB(index);
1250        }
1251    }
1252    *r = result;
1253}
1254
1255void helper_VPERMR(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1256{
1257    ppc_avr_t result;
1258    int i;
1259
1260    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1261        int s = c->VsrB(i) & 0x1f;
1262        int index = 15 - (s & 0xf);
1263
1264        if (s & 0x10) {
1265            result.VsrB(i) = a->VsrB(index);
1266        } else {
1267            result.VsrB(i) = b->VsrB(index);
1268        }
1269    }
1270    *r = result;
1271}
1272
1273#define XXGENPCV_BE_EXP(NAME, SZ) \
1274void glue(helper_, glue(NAME, _be_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \
1275{                                                                   \
1276    ppc_vsr_t tmp;                                                  \
1277                                                                    \
1278    /* Initialize tmp with the result of an all-zeros mask */       \
1279    tmp.VsrD(0) = 0x1011121314151617;                               \
1280    tmp.VsrD(1) = 0x18191A1B1C1D1E1F;                               \
1281                                                                    \
1282    /* Iterate over the most significant byte of each element */    \
1283    for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) {        \
1284        if (b->VsrB(i) & 0x80) {                                    \
1285            /* Update each byte of the element */                   \
1286            for (int k = 0; k < SZ; k++) {                          \
1287                tmp.VsrB(i + k) = j + k;                            \
1288            }                                                       \
1289            j += SZ;                                                \
1290        }                                                           \
1291    }                                                               \
1292                                                                    \
1293    *t = tmp;                                                       \
1294}
1295
1296#define XXGENPCV_BE_COMP(NAME, SZ) \
1297void glue(helper_, glue(NAME, _be_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\
1298{                                                                   \
1299    ppc_vsr_t tmp = { .u64 = { 0, 0 } };                            \
1300                                                                    \
1301    /* Iterate over the most significant byte of each element */    \
1302    for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) {        \
1303        if (b->VsrB(i) & 0x80) {                                    \
1304            /* Update each byte of the element */                   \
1305            for (int k = 0; k < SZ; k++) {                          \
1306                tmp.VsrB(j + k) = i + k;                            \
1307            }                                                       \
1308            j += SZ;                                                \
1309        }                                                           \
1310    }                                                               \
1311                                                                    \
1312    *t = tmp;                                                       \
1313}
1314
1315#define XXGENPCV_LE_EXP(NAME, SZ) \
1316void glue(helper_, glue(NAME, _le_exp))(ppc_vsr_t *t, ppc_vsr_t *b) \
1317{                                                                   \
1318    ppc_vsr_t tmp;                                                  \
1319                                                                    \
1320    /* Initialize tmp with the result of an all-zeros mask */       \
1321    tmp.VsrD(0) = 0x1F1E1D1C1B1A1918;                               \
1322    tmp.VsrD(1) = 0x1716151413121110;                               \
1323                                                                    \
1324    /* Iterate over the most significant byte of each element */    \
1325    for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) {        \
1326        /* Reverse indexing of "i" */                               \
1327        const int idx = ARRAY_SIZE(b->u8) - i - SZ;                 \
1328        if (b->VsrB(idx) & 0x80) {                                  \
1329            /* Update each byte of the element */                   \
1330            for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) {       \
1331                tmp.VsrB(idx + rk) = j + k;                         \
1332            }                                                       \
1333            j += SZ;                                                \
1334        }                                                           \
1335    }                                                               \
1336                                                                    \
1337    *t = tmp;                                                       \
1338}
1339
1340#define XXGENPCV_LE_COMP(NAME, SZ) \
1341void glue(helper_, glue(NAME, _le_comp))(ppc_vsr_t *t, ppc_vsr_t *b)\
1342{                                                                   \
1343    ppc_vsr_t tmp = { .u64 = { 0, 0 } };                            \
1344                                                                    \
1345    /* Iterate over the most significant byte of each element */    \
1346    for (int i = 0, j = 0; i < ARRAY_SIZE(b->u8); i += SZ) {        \
1347        if (b->VsrB(ARRAY_SIZE(b->u8) - i - SZ) & 0x80) {           \
1348            /* Update each byte of the element */                   \
1349            for (int k = 0, rk = SZ - 1; k < SZ; k++, rk--) {       \
1350                /* Reverse indexing of "j" */                       \
1351                const int idx = ARRAY_SIZE(b->u8) - j - SZ;         \
1352                tmp.VsrB(idx + rk) = i + k;                         \
1353            }                                                       \
1354            j += SZ;                                                \
1355        }                                                           \
1356    }                                                               \
1357                                                                    \
1358    *t = tmp;                                                       \
1359}
1360
1361#define XXGENPCV(NAME, SZ) \
1362    XXGENPCV_BE_EXP(NAME, SZ)  \
1363    XXGENPCV_BE_COMP(NAME, SZ) \
1364    XXGENPCV_LE_EXP(NAME, SZ)  \
1365    XXGENPCV_LE_COMP(NAME, SZ) \
1366
1367XXGENPCV(XXGENPCVBM, 1)
1368XXGENPCV(XXGENPCVHM, 2)
1369XXGENPCV(XXGENPCVWM, 4)
1370XXGENPCV(XXGENPCVDM, 8)
1371
1372#undef XXGENPCV_BE_EXP
1373#undef XXGENPCV_BE_COMP
1374#undef XXGENPCV_LE_EXP
1375#undef XXGENPCV_LE_COMP
1376#undef XXGENPCV
1377
1378#if HOST_BIG_ENDIAN
1379#define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1380#define VBPERMD_INDEX(i) (i)
1381#define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1382#else
1383#define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)])
1384#define VBPERMD_INDEX(i) (1 - i)
1385#define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1386#endif
1387#define EXTRACT_BIT(avr, i, index) \
1388        (extract64((avr)->VsrD(i), 63 - index, 1))
1389
1390void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1391{
1392    int i, j;
1393    ppc_avr_t result = { .u64 = { 0, 0 } };
1394    VECTOR_FOR_INORDER_I(i, u64) {
1395        for (j = 0; j < 8; j++) {
1396            int index = VBPERMQ_INDEX(b, (i * 8) + j);
1397            if (index < 64 && EXTRACT_BIT(a, i, index)) {
1398                result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1399            }
1400        }
1401    }
1402    *r = result;
1403}
1404
1405void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1406{
1407    int i;
1408    uint64_t perm = 0;
1409
1410    VECTOR_FOR_INORDER_I(i, u8) {
1411        int index = VBPERMQ_INDEX(b, i);
1412
1413        if (index < 128) {
1414            uint64_t mask = (1ull << (63 - (index & 0x3F)));
1415            if (a->u64[VBPERMQ_DW(index)] & mask) {
1416                perm |= (0x8000 >> i);
1417            }
1418        }
1419    }
1420
1421    r->VsrD(0) = perm;
1422    r->VsrD(1) = 0;
1423}
1424
1425#undef VBPERMQ_INDEX
1426#undef VBPERMQ_DW
1427
1428#define PMSUM(name, srcfld, trgfld, trgtyp)                   \
1429void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)  \
1430{                                                             \
1431    int i, j;                                                 \
1432    trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])];    \
1433                                                              \
1434    VECTOR_FOR_INORDER_I(i, srcfld) {                         \
1435        prod[i] = 0;                                          \
1436        for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) {      \
1437            if (a->srcfld[i] & (1ull << j)) {                 \
1438                prod[i] ^= ((trgtyp)b->srcfld[i] << j);       \
1439            }                                                 \
1440        }                                                     \
1441    }                                                         \
1442                                                              \
1443    VECTOR_FOR_INORDER_I(i, trgfld) {                         \
1444        r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1];         \
1445    }                                                         \
1446}
1447
1448PMSUM(vpmsumb, u8, u16, uint16_t)
1449PMSUM(vpmsumh, u16, u32, uint32_t)
1450PMSUM(vpmsumw, u32, u64, uint64_t)
1451
1452void helper_VPMSUMD(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1453{
1454    int i, j;
1455    Int128 tmp, prod[2] = {int128_zero(), int128_zero()};
1456
1457    for (j = 0; j < 64; j++) {
1458        for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1459            if (a->VsrD(i) & (1ull << j)) {
1460                tmp = int128_make64(b->VsrD(i));
1461                tmp = int128_lshift(tmp, j);
1462                prod[i] = int128_xor(prod[i], tmp);
1463            }
1464        }
1465    }
1466
1467    r->s128 = int128_xor(prod[0], prod[1]);
1468}
1469
1470#if HOST_BIG_ENDIAN
1471#define PKBIG 1
1472#else
1473#define PKBIG 0
1474#endif
1475void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1476{
1477    int i, j;
1478    ppc_avr_t result;
1479#if HOST_BIG_ENDIAN
1480    const ppc_avr_t *x[2] = { a, b };
1481#else
1482    const ppc_avr_t *x[2] = { b, a };
1483#endif
1484
1485    VECTOR_FOR_INORDER_I(i, u64) {
1486        VECTOR_FOR_INORDER_I(j, u32) {
1487            uint32_t e = x[i]->u32[j];
1488
1489            result.u16[4 * i + j] = (((e >> 9) & 0xfc00) |
1490                                     ((e >> 6) & 0x3e0) |
1491                                     ((e >> 3) & 0x1f));
1492        }
1493    }
1494    *r = result;
1495}
1496
1497#define VPK(suffix, from, to, cvt, dosat)                               \
1498    void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r,             \
1499                            ppc_avr_t *a, ppc_avr_t *b)                 \
1500    {                                                                   \
1501        int i;                                                          \
1502        int sat = 0;                                                    \
1503        ppc_avr_t result;                                               \
1504        ppc_avr_t *a0 = PKBIG ? a : b;                                  \
1505        ppc_avr_t *a1 = PKBIG ? b : a;                                  \
1506                                                                        \
1507        VECTOR_FOR_INORDER_I(i, from) {                                 \
1508            result.to[i] = cvt(a0->from[i], &sat);                      \
1509            result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\
1510        }                                                               \
1511        *r = result;                                                    \
1512        if (dosat && sat) {                                             \
1513            set_vscr_sat(env);                                          \
1514        }                                                               \
1515    }
1516#define I(x, y) (x)
1517VPK(shss, s16, s8, cvtshsb, 1)
1518VPK(shus, s16, u8, cvtshub, 1)
1519VPK(swss, s32, s16, cvtswsh, 1)
1520VPK(swus, s32, u16, cvtswuh, 1)
1521VPK(sdss, s64, s32, cvtsdsw, 1)
1522VPK(sdus, s64, u32, cvtsduw, 1)
1523VPK(uhus, u16, u8, cvtuhub, 1)
1524VPK(uwus, u32, u16, cvtuwuh, 1)
1525VPK(udus, u64, u32, cvtuduw, 1)
1526VPK(uhum, u16, u8, I, 0)
1527VPK(uwum, u32, u16, I, 0)
1528VPK(udum, u64, u32, I, 0)
1529#undef I
1530#undef VPK
1531#undef PKBIG
1532
1533void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1534{
1535    int i;
1536
1537    for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1538        r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status);
1539    }
1540}
1541
1542#define VRFI(suffix, rounding)                                  \
1543    void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r,    \
1544                             ppc_avr_t *b)                      \
1545    {                                                           \
1546        int i;                                                  \
1547        float_status s = env->vec_status;                       \
1548                                                                \
1549        set_float_rounding_mode(rounding, &s);                  \
1550        for (i = 0; i < ARRAY_SIZE(r->f32); i++) {              \
1551            r->f32[i] = float32_round_to_int (b->f32[i], &s);   \
1552        }                                                       \
1553    }
1554VRFI(n, float_round_nearest_even)
1555VRFI(m, float_round_down)
1556VRFI(p, float_round_up)
1557VRFI(z, float_round_to_zero)
1558#undef VRFI
1559
1560void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1561{
1562    int i;
1563
1564    for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1565        float32 t = float32_sqrt(b->f32[i], &env->vec_status);
1566
1567        r->f32[i] = float32_div(float32_one, t, &env->vec_status);
1568    }
1569}
1570
1571#define VRLMI(name, size, element, insert)                                  \
1572void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t desc) \
1573{                                                                           \
1574    int i;                                                                  \
1575    for (i = 0; i < ARRAY_SIZE(r->element); i++) {                          \
1576        uint##size##_t src1 = a->element[i];                                \
1577        uint##size##_t src2 = b->element[i];                                \
1578        uint##size##_t src3 = r->element[i];                                \
1579        uint##size##_t begin, end, shift, mask, rot_val;                    \
1580                                                                            \
1581        shift = extract##size(src2, 0, 6);                                  \
1582        end   = extract##size(src2, 8, 6);                                  \
1583        begin = extract##size(src2, 16, 6);                                 \
1584        rot_val = rol##size(src1, shift);                                   \
1585        mask = mask_u##size(begin, end);                                    \
1586        if (insert) {                                                       \
1587            r->element[i] = (rot_val & mask) | (src3 & ~mask);              \
1588        } else {                                                            \
1589            r->element[i] = (rot_val & mask);                               \
1590        }                                                                   \
1591    }                                                                       \
1592}
1593
1594VRLMI(VRLDMI, 64, u64, 1);
1595VRLMI(VRLWMI, 32, u32, 1);
1596VRLMI(VRLDNM, 64, u64, 0);
1597VRLMI(VRLWNM, 32, u32, 0);
1598
1599void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1600{
1601    int i;
1602
1603    for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1604        r->f32[i] = float32_exp2(b->f32[i], &env->vec_status);
1605    }
1606}
1607
1608void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1609{
1610    int i;
1611
1612    for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1613        r->f32[i] = float32_log2(b->f32[i], &env->vec_status);
1614    }
1615}
1616
1617#define VEXTU_X_DO(name, size, left)                            \
1618target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b)  \
1619{                                                               \
1620    int index = (a & 0xf) * 8;                                  \
1621    if (left) {                                                 \
1622        index = 128 - index - size;                             \
1623    }                                                           \
1624    return int128_getlo(int128_rshift(b->s128, index)) &        \
1625        MAKE_64BIT_MASK(0, size);                               \
1626}
1627VEXTU_X_DO(vextublx,  8, 1)
1628VEXTU_X_DO(vextuhlx, 16, 1)
1629VEXTU_X_DO(vextuwlx, 32, 1)
1630VEXTU_X_DO(vextubrx,  8, 0)
1631VEXTU_X_DO(vextuhrx, 16, 0)
1632VEXTU_X_DO(vextuwrx, 32, 0)
1633#undef VEXTU_X_DO
1634
1635void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1636{
1637    int i;
1638    unsigned int shift, bytes, size;
1639
1640    size = ARRAY_SIZE(r->u8);
1641    for (i = 0; i < size; i++) {
1642        shift = b->VsrB(i) & 0x7;             /* extract shift value */
1643        bytes = (a->VsrB(i) << 8) +           /* extract adjacent bytes */
1644            (((i + 1) < size) ? a->VsrB(i + 1) : 0);
1645        r->VsrB(i) = (bytes << shift) >> 8;   /* shift and store result */
1646    }
1647}
1648
1649void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1650{
1651    int i;
1652    unsigned int shift, bytes;
1653
1654    /*
1655     * Use reverse order, as destination and source register can be
1656     * same. Its being modified in place saving temporary, reverse
1657     * order will guarantee that computed result is not fed back.
1658     */
1659    for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1660        shift = b->VsrB(i) & 0x7;               /* extract shift value */
1661        bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i);
1662                                                /* extract adjacent bytes */
1663        r->VsrB(i) = (bytes >> shift) & 0xFF;   /* shift and store result */
1664    }
1665}
1666
1667void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1668{
1669    int sh = shift & 0xf;
1670    int i;
1671    ppc_avr_t result;
1672
1673    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1674        int index = sh + i;
1675        if (index > 0xf) {
1676            result.VsrB(i) = b->VsrB(index - 0x10);
1677        } else {
1678            result.VsrB(i) = a->VsrB(index);
1679        }
1680    }
1681    *r = result;
1682}
1683
1684void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1685{
1686    int sh = (b->VsrB(0xf) >> 3) & 0xf;
1687
1688#if HOST_BIG_ENDIAN
1689    memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1690    memset(&r->u8[16 - sh], 0, sh);
1691#else
1692    memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1693    memset(&r->u8[0], 0, sh);
1694#endif
1695}
1696
1697#if HOST_BIG_ENDIAN
1698#define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[IDX])
1699#else
1700#define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[15 - (IDX)] - (SIZE) + 1)
1701#endif
1702
1703#define VINSX(SUFFIX, TYPE) \
1704void glue(glue(helper_VINS, SUFFIX), LX)(CPUPPCState *env, ppc_avr_t *t,       \
1705                                         uint64_t val, target_ulong index)     \
1706{                                                                              \
1707    const int maxidx = ARRAY_SIZE(t->u8) - sizeof(TYPE);                       \
1708    target_long idx = index;                                                   \
1709                                                                               \
1710    if (idx < 0 || idx > maxidx) {                                             \
1711        idx =  idx < 0 ? sizeof(TYPE) - idx : idx;                             \
1712        qemu_log_mask(LOG_GUEST_ERROR,                                         \
1713            "Invalid index for Vector Insert Element after 0x" TARGET_FMT_lx   \
1714            ", RA = " TARGET_FMT_ld " > %d\n", env->nip, idx, maxidx);         \
1715    } else {                                                                   \
1716        TYPE src = val;                                                        \
1717        memcpy(ELEM_ADDR(t, idx, sizeof(TYPE)), &src, sizeof(TYPE));           \
1718    }                                                                          \
1719}
1720VINSX(B, uint8_t)
1721VINSX(H, uint16_t)
1722VINSX(W, uint32_t)
1723VINSX(D, uint64_t)
1724#undef ELEM_ADDR
1725#undef VINSX
1726#if HOST_BIG_ENDIAN
1727#define VEXTDVLX(NAME, SIZE) \
1728void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1729                   target_ulong index)                                         \
1730{                                                                              \
1731    const target_long idx = index;                                             \
1732    ppc_avr_t tmp[2] = { *a, *b };                                             \
1733    memset(t, 0, sizeof(*t));                                                  \
1734    if (idx >= 0 && idx + SIZE <= sizeof(tmp)) {                               \
1735        memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2 - SIZE], (void *)tmp + idx, SIZE); \
1736    } else {                                                                   \
1737        qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x"  \
1738                      TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n",         \
1739                      env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE);        \
1740    }                                                                          \
1741}
1742#else
1743#define VEXTDVLX(NAME, SIZE) \
1744void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1745                   target_ulong index)                                         \
1746{                                                                              \
1747    const target_long idx = index;                                             \
1748    ppc_avr_t tmp[2] = { *b, *a };                                             \
1749    memset(t, 0, sizeof(*t));                                                  \
1750    if (idx >= 0 && idx + SIZE <= sizeof(tmp)) {                               \
1751        memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2],                                  \
1752               (void *)tmp + sizeof(tmp) - SIZE - idx, SIZE);                  \
1753    } else {                                                                   \
1754        qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x"  \
1755                      TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n",         \
1756                      env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE);        \
1757    }                                                                          \
1758}
1759#endif
1760VEXTDVLX(VEXTDUBVLX, 1)
1761VEXTDVLX(VEXTDUHVLX, 2)
1762VEXTDVLX(VEXTDUWVLX, 4)
1763VEXTDVLX(VEXTDDVLX, 8)
1764#undef VEXTDVLX
1765#if HOST_BIG_ENDIAN
1766#define VEXTRACT(suffix, element)                                            \
1767    void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1768    {                                                                        \
1769        uint32_t es = sizeof(r->element[0]);                                 \
1770        memmove(&r->u8[8 - es], &b->u8[index], es);                          \
1771        memset(&r->u8[8], 0, 8);                                             \
1772        memset(&r->u8[0], 0, 8 - es);                                        \
1773    }
1774#else
1775#define VEXTRACT(suffix, element)                                            \
1776    void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1777    {                                                                        \
1778        uint32_t es = sizeof(r->element[0]);                                 \
1779        uint32_t s = (16 - index) - es;                                      \
1780        memmove(&r->u8[8], &b->u8[s], es);                                   \
1781        memset(&r->u8[0], 0, 8);                                             \
1782        memset(&r->u8[8 + es], 0, 8 - es);                                   \
1783    }
1784#endif
1785VEXTRACT(ub, u8)
1786VEXTRACT(uh, u16)
1787VEXTRACT(uw, u32)
1788VEXTRACT(d, u64)
1789#undef VEXTRACT
1790
1791#define VSTRI(NAME, ELEM, NUM_ELEMS, LEFT) \
1792uint32_t helper_##NAME(ppc_avr_t *t, ppc_avr_t *b) \
1793{                                                   \
1794    int i, idx, crf = 0;                            \
1795                                                    \
1796    for (i = 0; i < NUM_ELEMS; i++) {               \
1797        idx = LEFT ? i : NUM_ELEMS - i - 1;         \
1798        if (b->Vsr##ELEM(idx)) {                    \
1799            t->Vsr##ELEM(idx) = b->Vsr##ELEM(idx);  \
1800        } else {                                    \
1801            crf = 0b0010;                           \
1802            break;                                  \
1803        }                                           \
1804    }                                               \
1805                                                    \
1806    for (; i < NUM_ELEMS; i++) {                    \
1807        idx = LEFT ? i : NUM_ELEMS - i - 1;         \
1808        t->Vsr##ELEM(idx) = 0;                      \
1809    }                                               \
1810                                                    \
1811    return crf;                                     \
1812}
1813VSTRI(VSTRIBL, B, 16, true)
1814VSTRI(VSTRIBR, B, 16, false)
1815VSTRI(VSTRIHL, H, 8, true)
1816VSTRI(VSTRIHR, H, 8, false)
1817#undef VSTRI
1818
1819void helper_XXEXTRACTUW(ppc_vsr_t *xt, ppc_vsr_t *xb, uint32_t index)
1820{
1821    ppc_vsr_t t = { };
1822    size_t es = sizeof(uint32_t);
1823    uint32_t ext_index;
1824    int i;
1825
1826    ext_index = index;
1827    for (i = 0; i < es; i++, ext_index++) {
1828        t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16);
1829    }
1830
1831    *xt = t;
1832}
1833
1834void helper_XXINSERTW(ppc_vsr_t *xt, ppc_vsr_t *xb, uint32_t index)
1835{
1836    ppc_vsr_t t = *xt;
1837    size_t es = sizeof(uint32_t);
1838    int ins_index, i = 0;
1839
1840    ins_index = index;
1841    for (i = 0; i < es && ins_index < 16; i++, ins_index++) {
1842        t.VsrB(ins_index) = xb->VsrB(8 - es + i);
1843    }
1844
1845    *xt = t;
1846}
1847
1848void helper_XXEVAL(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c,
1849                   uint32_t desc)
1850{
1851    /*
1852     * Instead of processing imm bit-by-bit, we'll skip the computation of
1853     * conjunctions whose corresponding bit is unset.
1854     */
1855    int bit, imm = simd_data(desc);
1856    Int128 conj, disj = int128_zero();
1857
1858    /* Iterate over set bits from the least to the most significant bit */
1859    while (imm) {
1860        /*
1861         * Get the next bit to be processed with ctz64. Invert the result of
1862         * ctz64 to match the indexing used by PowerISA.
1863         */
1864        bit = 7 - ctzl(imm);
1865        if (bit & 0x4) {
1866            conj = a->s128;
1867        } else {
1868            conj = int128_not(a->s128);
1869        }
1870        if (bit & 0x2) {
1871            conj = int128_and(conj, b->s128);
1872        } else {
1873            conj = int128_and(conj, int128_not(b->s128));
1874        }
1875        if (bit & 0x1) {
1876            conj = int128_and(conj, c->s128);
1877        } else {
1878            conj = int128_and(conj, int128_not(c->s128));
1879        }
1880        disj = int128_or(disj, conj);
1881
1882        /* Unset the least significant bit that is set */
1883        imm &= imm - 1;
1884    }
1885
1886    t->s128 = disj;
1887}
1888
1889#define XXBLEND(name, sz) \
1890void glue(helper_XXBLENDV, name)(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b,  \
1891                                 ppc_avr_t *c, uint32_t desc)               \
1892{                                                                           \
1893    for (int i = 0; i < ARRAY_SIZE(t->glue(u, sz)); i++) {                  \
1894        t->glue(u, sz)[i] = (c->glue(s, sz)[i] >> (sz - 1)) ?               \
1895            b->glue(u, sz)[i] : a->glue(u, sz)[i];                          \
1896    }                                                                       \
1897}
1898XXBLEND(B, 8)
1899XXBLEND(H, 16)
1900XXBLEND(W, 32)
1901XXBLEND(D, 64)
1902#undef XXBLEND
1903
1904void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1905{
1906    int sh = (b->VsrB(0xf) >> 3) & 0xf;
1907
1908#if HOST_BIG_ENDIAN
1909    memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1910    memset(&r->u8[0], 0, sh);
1911#else
1912    memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1913    memset(&r->u8[16 - sh], 0, sh);
1914#endif
1915}
1916
1917void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1918{
1919    int64_t t;
1920    int i, upper;
1921    ppc_avr_t result;
1922    int sat = 0;
1923
1924    upper = ARRAY_SIZE(r->s32) - 1;
1925    t = (int64_t)b->VsrSW(upper);
1926    for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1927        t += a->VsrSW(i);
1928        result.VsrSW(i) = 0;
1929    }
1930    result.VsrSW(upper) = cvtsdsw(t, &sat);
1931    *r = result;
1932
1933    if (sat) {
1934        set_vscr_sat(env);
1935    }
1936}
1937
1938void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1939{
1940    int i, j, upper;
1941    ppc_avr_t result;
1942    int sat = 0;
1943
1944    upper = 1;
1945    for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1946        int64_t t = (int64_t)b->VsrSW(upper + i * 2);
1947
1948        result.VsrD(i) = 0;
1949        for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1950            t += a->VsrSW(2 * i + j);
1951        }
1952        result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat);
1953    }
1954
1955    *r = result;
1956    if (sat) {
1957        set_vscr_sat(env);
1958    }
1959}
1960
1961void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1962{
1963    int i, j;
1964    int sat = 0;
1965
1966    for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1967        int64_t t = (int64_t)b->s32[i];
1968
1969        for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1970            t += a->s8[4 * i + j];
1971        }
1972        r->s32[i] = cvtsdsw(t, &sat);
1973    }
1974
1975    if (sat) {
1976        set_vscr_sat(env);
1977    }
1978}
1979
1980void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1981{
1982    int sat = 0;
1983    int i;
1984
1985    for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1986        int64_t t = (int64_t)b->s32[i];
1987
1988        t += a->s16[2 * i] + a->s16[2 * i + 1];
1989        r->s32[i] = cvtsdsw(t, &sat);
1990    }
1991
1992    if (sat) {
1993        set_vscr_sat(env);
1994    }
1995}
1996
1997void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1998{
1999    int i, j;
2000    int sat = 0;
2001
2002    for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2003        uint64_t t = (uint64_t)b->u32[i];
2004
2005        for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
2006            t += a->u8[4 * i + j];
2007        }
2008        r->u32[i] = cvtuduw(t, &sat);
2009    }
2010
2011    if (sat) {
2012        set_vscr_sat(env);
2013    }
2014}
2015
2016#if HOST_BIG_ENDIAN
2017#define UPKHI 1
2018#define UPKLO 0
2019#else
2020#define UPKHI 0
2021#define UPKLO 1
2022#endif
2023#define VUPKPX(suffix, hi)                                              \
2024    void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b)                \
2025    {                                                                   \
2026        int i;                                                          \
2027        ppc_avr_t result;                                               \
2028                                                                        \
2029        for (i = 0; i < ARRAY_SIZE(r->u32); i++) {                      \
2030            uint16_t e = b->u16[hi ? i : i + 4];                        \
2031            uint8_t a = (e >> 15) ? 0xff : 0;                           \
2032            uint8_t r = (e >> 10) & 0x1f;                               \
2033            uint8_t g = (e >> 5) & 0x1f;                                \
2034            uint8_t b = e & 0x1f;                                       \
2035                                                                        \
2036            result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b;       \
2037        }                                                               \
2038        *r = result;                                                    \
2039    }
2040VUPKPX(lpx, UPKLO)
2041VUPKPX(hpx, UPKHI)
2042#undef VUPKPX
2043
2044#define VUPK(suffix, unpacked, packee, hi)                              \
2045    void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b)                \
2046    {                                                                   \
2047        int i;                                                          \
2048        ppc_avr_t result;                                               \
2049                                                                        \
2050        if (hi) {                                                       \
2051            for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) {             \
2052                result.unpacked[i] = b->packee[i];                      \
2053            }                                                           \
2054        } else {                                                        \
2055            for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
2056                 i++) {                                                 \
2057                result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
2058            }                                                           \
2059        }                                                               \
2060        *r = result;                                                    \
2061    }
2062VUPK(hsb, s16, s8, UPKHI)
2063VUPK(hsh, s32, s16, UPKHI)
2064VUPK(hsw, s64, s32, UPKHI)
2065VUPK(lsb, s16, s8, UPKLO)
2066VUPK(lsh, s32, s16, UPKLO)
2067VUPK(lsw, s64, s32, UPKLO)
2068#undef VUPK
2069#undef UPKHI
2070#undef UPKLO
2071
2072#define VGENERIC_DO(name, element)                                      \
2073    void helper_v##name(ppc_avr_t *r, ppc_avr_t *b)                     \
2074    {                                                                   \
2075        int i;                                                          \
2076                                                                        \
2077        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
2078            r->element[i] = name(b->element[i]);                        \
2079        }                                                               \
2080    }
2081
2082#define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
2083#define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
2084
2085VGENERIC_DO(clzb, u8)
2086VGENERIC_DO(clzh, u16)
2087
2088#undef clzb
2089#undef clzh
2090
2091#define ctzb(v) ((v) ? ctz32(v) : 8)
2092#define ctzh(v) ((v) ? ctz32(v) : 16)
2093#define ctzw(v) ctz32((v))
2094#define ctzd(v) ctz64((v))
2095
2096VGENERIC_DO(ctzb, u8)
2097VGENERIC_DO(ctzh, u16)
2098VGENERIC_DO(ctzw, u32)
2099VGENERIC_DO(ctzd, u64)
2100
2101#undef ctzb
2102#undef ctzh
2103#undef ctzw
2104#undef ctzd
2105
2106#define popcntb(v) ctpop8(v)
2107#define popcnth(v) ctpop16(v)
2108#define popcntw(v) ctpop32(v)
2109#define popcntd(v) ctpop64(v)
2110
2111VGENERIC_DO(popcntb, u8)
2112VGENERIC_DO(popcnth, u16)
2113VGENERIC_DO(popcntw, u32)
2114VGENERIC_DO(popcntd, u64)
2115
2116#undef popcntb
2117#undef popcnth
2118#undef popcntw
2119#undef popcntd
2120
2121#undef VGENERIC_DO
2122
2123void helper_VADDUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2124{
2125    r->s128 = int128_add(a->s128, b->s128);
2126}
2127
2128void helper_VADDEUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2129{
2130    r->s128 = int128_add(int128_add(a->s128, b->s128),
2131                         int128_make64(int128_getlo(c->s128) & 1));
2132}
2133
2134void helper_VADDCUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2135{
2136    r->VsrD(1) = int128_ult(int128_not(a->s128), b->s128);
2137    r->VsrD(0) = 0;
2138}
2139
2140void helper_VADDECUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2141{
2142    bool carry_out = int128_ult(int128_not(a->s128), b->s128),
2143         carry_in = int128_getlo(c->s128) & 1;
2144
2145    if (!carry_out && carry_in) {
2146        carry_out = (int128_nz(a->s128) || int128_nz(b->s128)) &&
2147                    int128_eq(int128_add(a->s128, b->s128), int128_makes64(-1));
2148    }
2149
2150    r->VsrD(0) = 0;
2151    r->VsrD(1) = carry_out;
2152}
2153
2154void helper_VSUBUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2155{
2156    r->s128 = int128_sub(a->s128, b->s128);
2157}
2158
2159void helper_VSUBEUQM(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2160{
2161    r->s128 = int128_add(int128_add(a->s128, int128_not(b->s128)),
2162                         int128_make64(int128_getlo(c->s128) & 1));
2163}
2164
2165void helper_VSUBCUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2166{
2167    Int128 tmp = int128_not(b->s128);
2168
2169    r->VsrD(1) = int128_ult(int128_not(a->s128), tmp) ||
2170                 int128_eq(int128_add(a->s128, tmp), int128_makes64(-1));
2171    r->VsrD(0) = 0;
2172}
2173
2174void helper_VSUBECUQ(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2175{
2176    Int128 tmp = int128_not(b->s128);
2177    bool carry_out = int128_ult(int128_not(a->s128), tmp),
2178         carry_in = int128_getlo(c->s128) & 1;
2179
2180    r->VsrD(1) = carry_out || (carry_in && int128_eq(int128_add(a->s128, tmp),
2181                                                     int128_makes64(-1)));
2182    r->VsrD(0) = 0;
2183}
2184
2185#define BCD_PLUS_PREF_1 0xC
2186#define BCD_PLUS_PREF_2 0xF
2187#define BCD_PLUS_ALT_1  0xA
2188#define BCD_NEG_PREF    0xD
2189#define BCD_NEG_ALT     0xB
2190#define BCD_PLUS_ALT_2  0xE
2191#define NATIONAL_PLUS   0x2B
2192#define NATIONAL_NEG    0x2D
2193
2194#define BCD_DIG_BYTE(n) (15 - ((n) / 2))
2195
2196static int bcd_get_sgn(ppc_avr_t *bcd)
2197{
2198    switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) {
2199    case BCD_PLUS_PREF_1:
2200    case BCD_PLUS_PREF_2:
2201    case BCD_PLUS_ALT_1:
2202    case BCD_PLUS_ALT_2:
2203    {
2204        return 1;
2205    }
2206
2207    case BCD_NEG_PREF:
2208    case BCD_NEG_ALT:
2209    {
2210        return -1;
2211    }
2212
2213    default:
2214    {
2215        return 0;
2216    }
2217    }
2218}
2219
2220static int bcd_preferred_sgn(int sgn, int ps)
2221{
2222    if (sgn >= 0) {
2223        return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2224    } else {
2225        return BCD_NEG_PREF;
2226    }
2227}
2228
2229static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2230{
2231    uint8_t result;
2232    if (n & 1) {
2233        result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4;
2234    } else {
2235       result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF;
2236    }
2237
2238    if (unlikely(result > 9)) {
2239        *invalid = true;
2240    }
2241    return result;
2242}
2243
2244static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2245{
2246    if (n & 1) {
2247        bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F;
2248        bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4);
2249    } else {
2250        bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0;
2251        bcd->VsrB(BCD_DIG_BYTE(n)) |= digit;
2252    }
2253}
2254
2255static bool bcd_is_valid(ppc_avr_t *bcd)
2256{
2257    int i;
2258    int invalid = 0;
2259
2260    if (bcd_get_sgn(bcd) == 0) {
2261        return false;
2262    }
2263
2264    for (i = 1; i < 32; i++) {
2265        bcd_get_digit(bcd, i, &invalid);
2266        if (unlikely(invalid)) {
2267            return false;
2268        }
2269    }
2270    return true;
2271}
2272
2273static int bcd_cmp_zero(ppc_avr_t *bcd)
2274{
2275    if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) {
2276        return CRF_EQ;
2277    } else {
2278        return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT;
2279    }
2280}
2281
2282static uint16_t get_national_digit(ppc_avr_t *reg, int n)
2283{
2284    return reg->VsrH(7 - n);
2285}
2286
2287static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
2288{
2289    reg->VsrH(7 - n) = val;
2290}
2291
2292static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2293{
2294    int i;
2295    int invalid = 0;
2296    for (i = 31; i > 0; i--) {
2297        uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2298        uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2299        if (unlikely(invalid)) {
2300            return 0; /* doesn't matter */
2301        } else if (dig_a > dig_b) {
2302            return 1;
2303        } else if (dig_a < dig_b) {
2304            return -1;
2305        }
2306    }
2307
2308    return 0;
2309}
2310
2311static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2312                       int *overflow)
2313{
2314    int carry = 0;
2315    int i;
2316    int is_zero = 1;
2317
2318    for (i = 1; i <= 31; i++) {
2319        uint8_t digit = bcd_get_digit(a, i, invalid) +
2320                        bcd_get_digit(b, i, invalid) + carry;
2321        is_zero &= (digit == 0);
2322        if (digit > 9) {
2323            carry = 1;
2324            digit -= 10;
2325        } else {
2326            carry = 0;
2327        }
2328
2329        bcd_put_digit(t, digit, i);
2330    }
2331
2332    *overflow = carry;
2333    return is_zero;
2334}
2335
2336static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2337                       int *overflow)
2338{
2339    int carry = 0;
2340    int i;
2341
2342    for (i = 1; i <= 31; i++) {
2343        uint8_t digit = bcd_get_digit(a, i, invalid) -
2344                        bcd_get_digit(b, i, invalid) + carry;
2345        if (digit & 0x80) {
2346            carry = -1;
2347            digit += 10;
2348        } else {
2349            carry = 0;
2350        }
2351
2352        bcd_put_digit(t, digit, i);
2353    }
2354
2355    *overflow = carry;
2356}
2357
2358uint32_t helper_bcdadd(ppc_avr_t *r,  ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2359{
2360
2361    int sgna = bcd_get_sgn(a);
2362    int sgnb = bcd_get_sgn(b);
2363    int invalid = (sgna == 0) || (sgnb == 0);
2364    int overflow = 0;
2365    int zero = 0;
2366    uint32_t cr = 0;
2367    ppc_avr_t result = { .u64 = { 0, 0 } };
2368
2369    if (!invalid) {
2370        if (sgna == sgnb) {
2371            result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
2372            zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
2373            cr = (sgna > 0) ? CRF_GT : CRF_LT;
2374        } else {
2375            int magnitude = bcd_cmp_mag(a, b);
2376            if (magnitude > 0) {
2377                result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
2378                bcd_sub_mag(&result, a, b, &invalid, &overflow);
2379                cr = (sgna > 0) ? CRF_GT : CRF_LT;
2380            } else if (magnitude < 0) {
2381                result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps);
2382                bcd_sub_mag(&result, b, a, &invalid, &overflow);
2383                cr = (sgnb > 0) ? CRF_GT : CRF_LT;
2384            } else {
2385                result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps);
2386                cr = CRF_EQ;
2387            }
2388        }
2389    }
2390
2391    if (unlikely(invalid)) {
2392        result.VsrD(0) = result.VsrD(1) = -1;
2393        cr = CRF_SO;
2394    } else if (overflow) {
2395        cr |= CRF_SO;
2396    } else if (zero) {
2397        cr |= CRF_EQ;
2398    }
2399
2400    *r = result;
2401
2402    return cr;
2403}
2404
2405uint32_t helper_bcdsub(ppc_avr_t *r,  ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2406{
2407    ppc_avr_t bcopy = *b;
2408    int sgnb = bcd_get_sgn(b);
2409    if (sgnb < 0) {
2410        bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2411    } else if (sgnb > 0) {
2412        bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2413    }
2414    /* else invalid ... defer to bcdadd code for proper handling */
2415
2416    return helper_bcdadd(r, a, &bcopy, ps);
2417}
2418
2419uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2420{
2421    int i;
2422    int cr = 0;
2423    uint16_t national = 0;
2424    uint16_t sgnb = get_national_digit(b, 0);
2425    ppc_avr_t ret = { .u64 = { 0, 0 } };
2426    int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
2427
2428    for (i = 1; i < 8; i++) {
2429        national = get_national_digit(b, i);
2430        if (unlikely(national < 0x30 || national > 0x39)) {
2431            invalid = 1;
2432            break;
2433        }
2434
2435        bcd_put_digit(&ret, national & 0xf, i);
2436    }
2437
2438    if (sgnb == NATIONAL_PLUS) {
2439        bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
2440    } else {
2441        bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2442    }
2443
2444    cr = bcd_cmp_zero(&ret);
2445
2446    if (unlikely(invalid)) {
2447        cr = CRF_SO;
2448    }
2449
2450    *r = ret;
2451
2452    return cr;
2453}
2454
2455uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2456{
2457    int i;
2458    int cr = 0;
2459    int sgnb = bcd_get_sgn(b);
2460    int invalid = (sgnb == 0);
2461    ppc_avr_t ret = { .u64 = { 0, 0 } };
2462
2463    int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0);
2464
2465    for (i = 1; i < 8; i++) {
2466        set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
2467
2468        if (unlikely(invalid)) {
2469            break;
2470        }
2471    }
2472    set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
2473
2474    cr = bcd_cmp_zero(b);
2475
2476    if (ox_flag) {
2477        cr |= CRF_SO;
2478    }
2479
2480    if (unlikely(invalid)) {
2481        cr = CRF_SO;
2482    }
2483
2484    *r = ret;
2485
2486    return cr;
2487}
2488
2489uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2490{
2491    int i;
2492    int cr = 0;
2493    int invalid = 0;
2494    int zone_digit = 0;
2495    int zone_lead = ps ? 0xF : 0x3;
2496    int digit = 0;
2497    ppc_avr_t ret = { .u64 = { 0, 0 } };
2498    int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4;
2499
2500    if (unlikely((sgnb < 0xA) && ps)) {
2501        invalid = 1;
2502    }
2503
2504    for (i = 0; i < 16; i++) {
2505        zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead;
2506        digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF;
2507        if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
2508            invalid = 1;
2509            break;
2510        }
2511
2512        bcd_put_digit(&ret, digit, i + 1);
2513    }
2514
2515    if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
2516            (!ps && (sgnb & 0x4))) {
2517        bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2518    } else {
2519        bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
2520    }
2521
2522    cr = bcd_cmp_zero(&ret);
2523
2524    if (unlikely(invalid)) {
2525        cr = CRF_SO;
2526    }
2527
2528    *r = ret;
2529
2530    return cr;
2531}
2532
2533uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2534{
2535    int i;
2536    int cr = 0;
2537    uint8_t digit = 0;
2538    int sgnb = bcd_get_sgn(b);
2539    int zone_lead = (ps) ? 0xF0 : 0x30;
2540    int invalid = (sgnb == 0);
2541    ppc_avr_t ret = { .u64 = { 0, 0 } };
2542
2543    int ox_flag = ((b->VsrD(0) >> 4) != 0);
2544
2545    for (i = 0; i < 16; i++) {
2546        digit = bcd_get_digit(b, i + 1, &invalid);
2547
2548        if (unlikely(invalid)) {
2549            break;
2550        }
2551
2552        ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit;
2553    }
2554
2555    if (ps) {
2556        bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
2557    } else {
2558        bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
2559    }
2560
2561    cr = bcd_cmp_zero(b);
2562
2563    if (ox_flag) {
2564        cr |= CRF_SO;
2565    }
2566
2567    if (unlikely(invalid)) {
2568        cr = CRF_SO;
2569    }
2570
2571    *r = ret;
2572
2573    return cr;
2574}
2575
2576/**
2577 * Compare 2 128-bit unsigned integers, passed in as unsigned 64-bit pairs
2578 *
2579 * Returns:
2580 * > 0 if ahi|alo > bhi|blo,
2581 * 0 if ahi|alo == bhi|blo,
2582 * < 0 if ahi|alo < bhi|blo
2583 */
2584static inline int ucmp128(uint64_t alo, uint64_t ahi,
2585                          uint64_t blo, uint64_t bhi)
2586{
2587    return (ahi == bhi) ?
2588        (alo > blo ? 1 : (alo == blo ? 0 : -1)) :
2589        (ahi > bhi ? 1 : -1);
2590}
2591
2592uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2593{
2594    int i;
2595    int cr;
2596    uint64_t lo_value;
2597    uint64_t hi_value;
2598    uint64_t rem;
2599    ppc_avr_t ret = { .u64 = { 0, 0 } };
2600
2601    if (b->VsrSD(0) < 0) {
2602        lo_value = -b->VsrSD(1);
2603        hi_value = ~b->VsrD(0) + !lo_value;
2604        bcd_put_digit(&ret, 0xD, 0);
2605
2606        cr = CRF_LT;
2607    } else {
2608        lo_value = b->VsrD(1);
2609        hi_value = b->VsrD(0);
2610        bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0);
2611
2612        if (hi_value == 0 && lo_value == 0) {
2613            cr = CRF_EQ;
2614        } else {
2615            cr = CRF_GT;
2616        }
2617    }
2618
2619    /*
2620     * Check src limits: abs(src) <= 10^31 - 1
2621     *
2622     * 10^31 - 1 = 0x0000007e37be2022 c0914b267fffffff
2623     */
2624    if (ucmp128(lo_value, hi_value,
2625                0xc0914b267fffffffULL, 0x7e37be2022ULL) > 0) {
2626        cr |= CRF_SO;
2627
2628        /*
2629         * According to the ISA, if src wouldn't fit in the destination
2630         * register, the result is undefined.
2631         * In that case, we leave r unchanged.
2632         */
2633    } else {
2634        rem = divu128(&lo_value, &hi_value, 1000000000000000ULL);
2635
2636        for (i = 1; i < 16; rem /= 10, i++) {
2637            bcd_put_digit(&ret, rem % 10, i);
2638        }
2639
2640        for (; i < 32; lo_value /= 10, i++) {
2641            bcd_put_digit(&ret, lo_value % 10, i);
2642        }
2643
2644        *r = ret;
2645    }
2646
2647    return cr;
2648}
2649
2650uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2651{
2652    uint8_t i;
2653    int cr;
2654    uint64_t carry;
2655    uint64_t unused;
2656    uint64_t lo_value;
2657    uint64_t hi_value = 0;
2658    int sgnb = bcd_get_sgn(b);
2659    int invalid = (sgnb == 0);
2660
2661    lo_value = bcd_get_digit(b, 31, &invalid);
2662    for (i = 30; i > 0; i--) {
2663        mulu64(&lo_value, &carry, lo_value, 10ULL);
2664        mulu64(&hi_value, &unused, hi_value, 10ULL);
2665        lo_value += bcd_get_digit(b, i, &invalid);
2666        hi_value += carry;
2667
2668        if (unlikely(invalid)) {
2669            break;
2670        }
2671    }
2672
2673    if (sgnb == -1) {
2674        r->VsrSD(1) = -lo_value;
2675        r->VsrSD(0) = ~hi_value + !r->VsrSD(1);
2676    } else {
2677        r->VsrSD(1) = lo_value;
2678        r->VsrSD(0) = hi_value;
2679    }
2680
2681    cr = bcd_cmp_zero(b);
2682
2683    if (unlikely(invalid)) {
2684        cr = CRF_SO;
2685    }
2686
2687    return cr;
2688}
2689
2690uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2691{
2692    int i;
2693    int invalid = 0;
2694
2695    if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) {
2696        return CRF_SO;
2697    }
2698
2699    *r = *a;
2700    bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0);
2701
2702    for (i = 1; i < 32; i++) {
2703        bcd_get_digit(a, i, &invalid);
2704        bcd_get_digit(b, i, &invalid);
2705        if (unlikely(invalid)) {
2706            return CRF_SO;
2707        }
2708    }
2709
2710    return bcd_cmp_zero(r);
2711}
2712
2713uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2714{
2715    int sgnb = bcd_get_sgn(b);
2716
2717    *r = *b;
2718    bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0);
2719
2720    if (bcd_is_valid(b) == false) {
2721        return CRF_SO;
2722    }
2723
2724    return bcd_cmp_zero(r);
2725}
2726
2727uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2728{
2729    int cr;
2730    int i = a->VsrSB(7);
2731    bool ox_flag = false;
2732    int sgnb = bcd_get_sgn(b);
2733    ppc_avr_t ret = *b;
2734    ret.VsrD(1) &= ~0xf;
2735
2736    if (bcd_is_valid(b) == false) {
2737        return CRF_SO;
2738    }
2739
2740    if (unlikely(i > 31)) {
2741        i = 31;
2742    } else if (unlikely(i < -31)) {
2743        i = -31;
2744    }
2745
2746    if (i > 0) {
2747        ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2748    } else {
2749        urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2750    }
2751    bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2752
2753    *r = ret;
2754
2755    cr = bcd_cmp_zero(r);
2756    if (ox_flag) {
2757        cr |= CRF_SO;
2758    }
2759
2760    return cr;
2761}
2762
2763uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2764{
2765    int cr;
2766    int i;
2767    int invalid = 0;
2768    bool ox_flag = false;
2769    ppc_avr_t ret = *b;
2770
2771    for (i = 0; i < 32; i++) {
2772        bcd_get_digit(b, i, &invalid);
2773
2774        if (unlikely(invalid)) {
2775            return CRF_SO;
2776        }
2777    }
2778
2779    i = a->VsrSB(7);
2780    if (i >= 32) {
2781        ox_flag = true;
2782        ret.VsrD(1) = ret.VsrD(0) = 0;
2783    } else if (i <= -32) {
2784        ret.VsrD(1) = ret.VsrD(0) = 0;
2785    } else if (i > 0) {
2786        ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2787    } else {
2788        urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2789    }
2790    *r = ret;
2791
2792    cr = bcd_cmp_zero(r);
2793    if (ox_flag) {
2794        cr |= CRF_SO;
2795    }
2796
2797    return cr;
2798}
2799
2800uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2801{
2802    int cr;
2803    int unused = 0;
2804    int invalid = 0;
2805    bool ox_flag = false;
2806    int sgnb = bcd_get_sgn(b);
2807    ppc_avr_t ret = *b;
2808    ret.VsrD(1) &= ~0xf;
2809
2810    int i = a->VsrSB(7);
2811    ppc_avr_t bcd_one;
2812
2813    bcd_one.VsrD(0) = 0;
2814    bcd_one.VsrD(1) = 0x10;
2815
2816    if (bcd_is_valid(b) == false) {
2817        return CRF_SO;
2818    }
2819
2820    if (unlikely(i > 31)) {
2821        i = 31;
2822    } else if (unlikely(i < -31)) {
2823        i = -31;
2824    }
2825
2826    if (i > 0) {
2827        ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2828    } else {
2829        urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2830
2831        if (bcd_get_digit(&ret, 0, &invalid) >= 5) {
2832            bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused);
2833        }
2834    }
2835    bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2836
2837    cr = bcd_cmp_zero(&ret);
2838    if (ox_flag) {
2839        cr |= CRF_SO;
2840    }
2841    *r = ret;
2842
2843    return cr;
2844}
2845
2846uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2847{
2848    uint64_t mask;
2849    uint32_t ox_flag = 0;
2850    int i = a->VsrSH(3) + 1;
2851    ppc_avr_t ret = *b;
2852
2853    if (bcd_is_valid(b) == false) {
2854        return CRF_SO;
2855    }
2856
2857    if (i > 16 && i < 32) {
2858        mask = (uint64_t)-1 >> (128 - i * 4);
2859        if (ret.VsrD(0) & ~mask) {
2860            ox_flag = CRF_SO;
2861        }
2862
2863        ret.VsrD(0) &= mask;
2864    } else if (i >= 0 && i <= 16) {
2865        mask = (uint64_t)-1 >> (64 - i * 4);
2866        if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
2867            ox_flag = CRF_SO;
2868        }
2869
2870        ret.VsrD(1) &= mask;
2871        ret.VsrD(0) = 0;
2872    }
2873    bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0);
2874    *r = ret;
2875
2876    return bcd_cmp_zero(&ret) | ox_flag;
2877}
2878
2879uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2880{
2881    int i;
2882    uint64_t mask;
2883    uint32_t ox_flag = 0;
2884    int invalid = 0;
2885    ppc_avr_t ret = *b;
2886
2887    for (i = 0; i < 32; i++) {
2888        bcd_get_digit(b, i, &invalid);
2889
2890        if (unlikely(invalid)) {
2891            return CRF_SO;
2892        }
2893    }
2894
2895    i = a->VsrSH(3);
2896    if (i > 16 && i < 33) {
2897        mask = (uint64_t)-1 >> (128 - i * 4);
2898        if (ret.VsrD(0) & ~mask) {
2899            ox_flag = CRF_SO;
2900        }
2901
2902        ret.VsrD(0) &= mask;
2903    } else if (i > 0 && i <= 16) {
2904        mask = (uint64_t)-1 >> (64 - i * 4);
2905        if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
2906            ox_flag = CRF_SO;
2907        }
2908
2909        ret.VsrD(1) &= mask;
2910        ret.VsrD(0) = 0;
2911    } else if (i == 0) {
2912        if (ret.VsrD(0) || ret.VsrD(1)) {
2913            ox_flag = CRF_SO;
2914        }
2915        ret.VsrD(0) = ret.VsrD(1) = 0;
2916    }
2917
2918    *r = ret;
2919    if (r->VsrD(0) == 0 && r->VsrD(1) == 0) {
2920        return ox_flag | CRF_EQ;
2921    }
2922
2923    return ox_flag | CRF_GT;
2924}
2925
2926void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
2927{
2928    int i;
2929    VECTOR_FOR_INORDER_I(i, u8) {
2930        r->u8[i] = AES_sbox[a->u8[i]];
2931    }
2932}
2933
2934void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2935{
2936    AESState *ad = (AESState *)r;
2937    AESState *st = (AESState *)a;
2938    AESState *rk = (AESState *)b;
2939
2940    aesenc_SB_SR_MC_AK(ad, st, rk, true);
2941}
2942
2943void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2944{
2945    aesenc_SB_SR_AK((AESState *)r, (AESState *)a, (AESState *)b, true);
2946}
2947
2948void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2949{
2950    AESState *ad = (AESState *)r;
2951    AESState *st = (AESState *)a;
2952    AESState *rk = (AESState *)b;
2953
2954    aesdec_ISB_ISR_AK_IMC(ad, st, rk, true);
2955}
2956
2957void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2958{
2959    aesdec_ISB_ISR_AK((AESState *)r, (AESState *)a, (AESState *)b, true);
2960}
2961
2962void helper_vshasigmaw(ppc_avr_t *r,  ppc_avr_t *a, uint32_t st_six)
2963{
2964    int st = (st_six & 0x10) != 0;
2965    int six = st_six & 0xF;
2966    int i;
2967
2968    for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2969        if (st == 0) {
2970            if ((six & (0x8 >> i)) == 0) {
2971                r->VsrW(i) = ror32(a->VsrW(i), 7) ^
2972                             ror32(a->VsrW(i), 18) ^
2973                             (a->VsrW(i) >> 3);
2974            } else { /* six.bit[i] == 1 */
2975                r->VsrW(i) = ror32(a->VsrW(i), 17) ^
2976                             ror32(a->VsrW(i), 19) ^
2977                             (a->VsrW(i) >> 10);
2978            }
2979        } else { /* st == 1 */
2980            if ((six & (0x8 >> i)) == 0) {
2981                r->VsrW(i) = ror32(a->VsrW(i), 2) ^
2982                             ror32(a->VsrW(i), 13) ^
2983                             ror32(a->VsrW(i), 22);
2984            } else { /* six.bit[i] == 1 */
2985                r->VsrW(i) = ror32(a->VsrW(i), 6) ^
2986                             ror32(a->VsrW(i), 11) ^
2987                             ror32(a->VsrW(i), 25);
2988            }
2989        }
2990    }
2991}
2992
2993void helper_vshasigmad(ppc_avr_t *r,  ppc_avr_t *a, uint32_t st_six)
2994{
2995    int st = (st_six & 0x10) != 0;
2996    int six = st_six & 0xF;
2997    int i;
2998
2999    for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
3000        if (st == 0) {
3001            if ((six & (0x8 >> (2 * i))) == 0) {
3002                r->VsrD(i) = ror64(a->VsrD(i), 1) ^
3003                             ror64(a->VsrD(i), 8) ^
3004                             (a->VsrD(i) >> 7);
3005            } else { /* six.bit[2*i] == 1 */
3006                r->VsrD(i) = ror64(a->VsrD(i), 19) ^
3007                             ror64(a->VsrD(i), 61) ^
3008                             (a->VsrD(i) >> 6);
3009            }
3010        } else { /* st == 1 */
3011            if ((six & (0x8 >> (2 * i))) == 0) {
3012                r->VsrD(i) = ror64(a->VsrD(i), 28) ^
3013                             ror64(a->VsrD(i), 34) ^
3014                             ror64(a->VsrD(i), 39);
3015            } else { /* six.bit[2*i] == 1 */
3016                r->VsrD(i) = ror64(a->VsrD(i), 14) ^
3017                             ror64(a->VsrD(i), 18) ^
3018                             ror64(a->VsrD(i), 41);
3019            }
3020        }
3021    }
3022}
3023
3024void helper_vpermxor(ppc_avr_t *r,  ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
3025{
3026    ppc_avr_t result;
3027    int i;
3028
3029    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
3030        int indexA = c->VsrB(i) >> 4;
3031        int indexB = c->VsrB(i) & 0xF;
3032
3033        result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB);
3034    }
3035    *r = result;
3036}
3037
3038#undef VECTOR_FOR_INORDER_I
3039
3040/*****************************************************************************/
3041/* SPE extension helpers */
3042/* Use a table to make this quicker */
3043static const uint8_t hbrev[16] = {
3044    0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
3045    0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
3046};
3047
3048static inline uint8_t byte_reverse(uint8_t val)
3049{
3050    return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
3051}
3052
3053static inline uint32_t word_reverse(uint32_t val)
3054{
3055    return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
3056        (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
3057}
3058
3059#define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
3060target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
3061{
3062    uint32_t a, b, d, mask;
3063
3064    mask = UINT32_MAX >> (32 - MASKBITS);
3065    a = arg1 & mask;
3066    b = arg2 & mask;
3067    d = word_reverse(1 + word_reverse(a | ~b));
3068    return (arg1 & ~mask) | (d & b);
3069}
3070
3071uint32_t helper_cntlsw32(uint32_t val)
3072{
3073    if (val & 0x80000000) {
3074        return clz32(~val);
3075    } else {
3076        return clz32(val);
3077    }
3078}
3079
3080uint32_t helper_cntlzw32(uint32_t val)
3081{
3082    return clz32(val);
3083}
3084
3085/* 440 specific */
3086target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
3087                          target_ulong low, uint32_t update_Rc)
3088{
3089    target_ulong mask;
3090    int i;
3091
3092    i = 1;
3093    for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3094        if ((high & mask) == 0) {
3095            if (update_Rc) {
3096                env->crf[0] = 0x4;
3097            }
3098            goto done;
3099        }
3100        i++;
3101    }
3102    for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3103        if ((low & mask) == 0) {
3104            if (update_Rc) {
3105                env->crf[0] = 0x8;
3106            }
3107            goto done;
3108        }
3109        i++;
3110    }
3111    i = 8;
3112    if (update_Rc) {
3113        env->crf[0] = 0x2;
3114    }
3115 done:
3116    env->xer = (env->xer & ~0x7F) | i;
3117    if (update_Rc) {
3118        env->crf[0] |= xer_so;
3119    }
3120    return i;
3121}
3122