qemu/target/ppc/int_helper.c
<<
>>
Prefs
   1/*
   2 *  PowerPC integer and vector emulation helpers for QEMU.
   3 *
   4 *  Copyright (c) 2003-2007 Jocelyn Mayer
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2.1 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include "cpu.h"
  22#include "internal.h"
  23#include "qemu/host-utils.h"
  24#include "qemu/main-loop.h"
  25#include "qemu/log.h"
  26#include "exec/helper-proto.h"
  27#include "crypto/aes.h"
  28#include "fpu/softfloat.h"
  29#include "qapi/error.h"
  30#include "qemu/guest-random.h"
  31
  32#include "helper_regs.h"
  33/*****************************************************************************/
  34/* Fixed point operations helpers */
  35
  36static inline void helper_update_ov_legacy(CPUPPCState *env, int ov)
  37{
  38    if (unlikely(ov)) {
  39        env->so = env->ov = 1;
  40    } else {
  41        env->ov = 0;
  42    }
  43}
  44
  45target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
  46                           uint32_t oe)
  47{
  48    uint64_t rt = 0;
  49    int overflow = 0;
  50
  51    uint64_t dividend = (uint64_t)ra << 32;
  52    uint64_t divisor = (uint32_t)rb;
  53
  54    if (unlikely(divisor == 0)) {
  55        overflow = 1;
  56    } else {
  57        rt = dividend / divisor;
  58        overflow = rt > UINT32_MAX;
  59    }
  60
  61    if (unlikely(overflow)) {
  62        rt = 0; /* Undefined */
  63    }
  64
  65    if (oe) {
  66        helper_update_ov_legacy(env, overflow);
  67    }
  68
  69    return (target_ulong)rt;
  70}
  71
  72target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
  73                          uint32_t oe)
  74{
  75    int64_t rt = 0;
  76    int overflow = 0;
  77
  78    int64_t dividend = (int64_t)ra << 32;
  79    int64_t divisor = (int64_t)((int32_t)rb);
  80
  81    if (unlikely((divisor == 0) ||
  82                 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
  83        overflow = 1;
  84    } else {
  85        rt = dividend / divisor;
  86        overflow = rt != (int32_t)rt;
  87    }
  88
  89    if (unlikely(overflow)) {
  90        rt = 0; /* Undefined */
  91    }
  92
  93    if (oe) {
  94        helper_update_ov_legacy(env, overflow);
  95    }
  96
  97    return (target_ulong)rt;
  98}
  99
 100#if defined(TARGET_PPC64)
 101
 102uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
 103{
 104    uint64_t rt = 0;
 105    int overflow = 0;
 106
 107    if (unlikely(rb == 0 || ra >= rb)) {
 108        overflow = 1;
 109        rt = 0; /* Undefined */
 110    } else {
 111        divu128(&rt, &ra, rb);
 112    }
 113
 114    if (oe) {
 115        helper_update_ov_legacy(env, overflow);
 116    }
 117
 118    return rt;
 119}
 120
 121uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
 122{
 123    uint64_t rt = 0;
 124    int64_t ra = (int64_t)rau;
 125    int64_t rb = (int64_t)rbu;
 126    int overflow = 0;
 127
 128    if (unlikely(rb == 0 || uabs64(ra) >= uabs64(rb))) {
 129        overflow = 1;
 130        rt = 0; /* Undefined */
 131    } else {
 132        divs128(&rt, &ra, rb);
 133    }
 134
 135    if (oe) {
 136        helper_update_ov_legacy(env, overflow);
 137    }
 138
 139    return rt;
 140}
 141
 142#endif
 143
 144
 145#if defined(TARGET_PPC64)
 146/* if x = 0xab, returns 0xababababababababa */
 147#define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
 148
 149/*
 150 * subtract 1 from each byte, and with inverse, check if MSB is set at each
 151 * byte.
 152 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
 153 *      (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
 154 */
 155#define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
 156
 157/* When you XOR the pattern and there is a match, that byte will be zero */
 158#define hasvalue(x, n)  (haszero((x) ^ pattern(n)))
 159
 160uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
 161{
 162    return hasvalue(rb, ra) ? CRF_GT : 0;
 163}
 164
 165#undef pattern
 166#undef haszero
 167#undef hasvalue
 168
 169/*
 170 * Return a random number.
 171 */
 172uint64_t helper_darn32(void)
 173{
 174    Error *err = NULL;
 175    uint32_t ret;
 176
 177    if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
 178        qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
 179                      error_get_pretty(err));
 180        error_free(err);
 181        return -1;
 182    }
 183
 184    return ret;
 185}
 186
 187uint64_t helper_darn64(void)
 188{
 189    Error *err = NULL;
 190    uint64_t ret;
 191
 192    if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
 193        qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
 194                      error_get_pretty(err));
 195        error_free(err);
 196        return -1;
 197    }
 198
 199    return ret;
 200}
 201
 202uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
 203{
 204    int i;
 205    uint64_t ra = 0;
 206
 207    for (i = 0; i < 8; i++) {
 208        int index = (rs >> (i * 8)) & 0xFF;
 209        if (index < 64) {
 210            if (rb & PPC_BIT(index)) {
 211                ra |= 1 << i;
 212            }
 213        }
 214    }
 215    return ra;
 216}
 217
 218#endif
 219
 220target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
 221{
 222    target_ulong mask = 0xff;
 223    target_ulong ra = 0;
 224    int i;
 225
 226    for (i = 0; i < sizeof(target_ulong); i++) {
 227        if ((rs & mask) == (rb & mask)) {
 228            ra |= mask;
 229        }
 230        mask <<= 8;
 231    }
 232    return ra;
 233}
 234
 235/* shift right arithmetic helper */
 236target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
 237                         target_ulong shift)
 238{
 239    int32_t ret;
 240
 241    if (likely(!(shift & 0x20))) {
 242        if (likely((uint32_t)shift != 0)) {
 243            shift &= 0x1f;
 244            ret = (int32_t)value >> shift;
 245            if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
 246                env->ca32 = env->ca = 0;
 247            } else {
 248                env->ca32 = env->ca = 1;
 249            }
 250        } else {
 251            ret = (int32_t)value;
 252            env->ca32 = env->ca = 0;
 253        }
 254    } else {
 255        ret = (int32_t)value >> 31;
 256        env->ca32 = env->ca = (ret != 0);
 257    }
 258    return (target_long)ret;
 259}
 260
 261#if defined(TARGET_PPC64)
 262target_ulong helper_srad(CPUPPCState *env, target_ulong value,
 263                         target_ulong shift)
 264{
 265    int64_t ret;
 266
 267    if (likely(!(shift & 0x40))) {
 268        if (likely((uint64_t)shift != 0)) {
 269            shift &= 0x3f;
 270            ret = (int64_t)value >> shift;
 271            if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
 272                env->ca32 = env->ca = 0;
 273            } else {
 274                env->ca32 = env->ca = 1;
 275            }
 276        } else {
 277            ret = (int64_t)value;
 278            env->ca32 = env->ca = 0;
 279        }
 280    } else {
 281        ret = (int64_t)value >> 63;
 282        env->ca32 = env->ca = (ret != 0);
 283    }
 284    return ret;
 285}
 286#endif
 287
 288#if defined(TARGET_PPC64)
 289target_ulong helper_popcntb(target_ulong val)
 290{
 291    /* Note that we don't fold past bytes */
 292    val = (val & 0x5555555555555555ULL) + ((val >>  1) &
 293                                           0x5555555555555555ULL);
 294    val = (val & 0x3333333333333333ULL) + ((val >>  2) &
 295                                           0x3333333333333333ULL);
 296    val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >>  4) &
 297                                           0x0f0f0f0f0f0f0f0fULL);
 298    return val;
 299}
 300
 301target_ulong helper_popcntw(target_ulong val)
 302{
 303    /* Note that we don't fold past words.  */
 304    val = (val & 0x5555555555555555ULL) + ((val >>  1) &
 305                                           0x5555555555555555ULL);
 306    val = (val & 0x3333333333333333ULL) + ((val >>  2) &
 307                                           0x3333333333333333ULL);
 308    val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >>  4) &
 309                                           0x0f0f0f0f0f0f0f0fULL);
 310    val = (val & 0x00ff00ff00ff00ffULL) + ((val >>  8) &
 311                                           0x00ff00ff00ff00ffULL);
 312    val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
 313                                           0x0000ffff0000ffffULL);
 314    return val;
 315}
 316#else
 317target_ulong helper_popcntb(target_ulong val)
 318{
 319    /* Note that we don't fold past bytes */
 320    val = (val & 0x55555555) + ((val >>  1) & 0x55555555);
 321    val = (val & 0x33333333) + ((val >>  2) & 0x33333333);
 322    val = (val & 0x0f0f0f0f) + ((val >>  4) & 0x0f0f0f0f);
 323    return val;
 324}
 325#endif
 326
 327uint64_t helper_CFUGED(uint64_t src, uint64_t mask)
 328{
 329    /*
 330     * Instead of processing the mask bit-by-bit from the most significant to
 331     * the least significant bit, as described in PowerISA, we'll handle it in
 332     * blocks of 'n' zeros/ones from LSB to MSB. To avoid the decision to use
 333     * ctz or cto, we negate the mask at the end of the loop.
 334     */
 335    target_ulong m, left = 0, right = 0;
 336    unsigned int n, i = 64;
 337    bool bit = false; /* tracks if we are processing zeros or ones */
 338
 339    if (mask == 0 || mask == -1) {
 340        return src;
 341    }
 342
 343    /* Processes the mask in blocks, from LSB to MSB */
 344    while (i) {
 345        /* Find how many bits we should take */
 346        n = ctz64(mask);
 347        if (n > i) {
 348            n = i;
 349        }
 350
 351        /*
 352         * Extracts 'n' trailing bits of src and put them on the leading 'n'
 353         * bits of 'right' or 'left', pushing down the previously extracted
 354         * values.
 355         */
 356        m = (1ll << n) - 1;
 357        if (bit) {
 358            right = ror64(right | (src & m), n);
 359        } else {
 360            left = ror64(left | (src & m), n);
 361        }
 362
 363        /*
 364         * Discards the processed bits from 'src' and 'mask'. Note that we are
 365         * removing 'n' trailing zeros from 'mask', but the logical shift will
 366         * add 'n' leading zeros back, so the population count of 'mask' is kept
 367         * the same.
 368         */
 369        src >>= n;
 370        mask >>= n;
 371        i -= n;
 372        bit = !bit;
 373        mask = ~mask;
 374    }
 375
 376    /*
 377     * At the end, right was ror'ed ctpop(mask) times. To put it back in place,
 378     * we'll shift it more 64-ctpop(mask) times.
 379     */
 380    if (bit) {
 381        n = ctpop64(mask);
 382    } else {
 383        n = 64 - ctpop64(mask);
 384    }
 385
 386    return left | (right >> n);
 387}
 388
 389uint64_t helper_PDEPD(uint64_t src, uint64_t mask)
 390{
 391    int i, o;
 392    uint64_t result = 0;
 393
 394    if (mask == -1) {
 395        return src;
 396    }
 397
 398    for (i = 0; mask != 0; i++) {
 399        o = ctz64(mask);
 400        mask &= mask - 1;
 401        result |= ((src >> i) & 1) << o;
 402    }
 403
 404    return result;
 405}
 406
 407uint64_t helper_PEXTD(uint64_t src, uint64_t mask)
 408{
 409    int i, o;
 410    uint64_t result = 0;
 411
 412    if (mask == -1) {
 413        return src;
 414    }
 415
 416    for (o = 0; mask != 0; o++) {
 417        i = ctz64(mask);
 418        mask &= mask - 1;
 419        result |= ((src >> i) & 1) << o;
 420    }
 421
 422    return result;
 423}
 424
 425/*****************************************************************************/
 426/* PowerPC 601 specific instructions (POWER bridge) */
 427target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
 428{
 429    uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
 430
 431    if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
 432        (int32_t)arg2 == 0) {
 433        env->spr[SPR_MQ] = 0;
 434        return INT32_MIN;
 435    } else {
 436        env->spr[SPR_MQ] = tmp % arg2;
 437        return  tmp / (int32_t)arg2;
 438    }
 439}
 440
 441target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
 442                         target_ulong arg2)
 443{
 444    uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
 445
 446    if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
 447        (int32_t)arg2 == 0) {
 448        env->so = env->ov = 1;
 449        env->spr[SPR_MQ] = 0;
 450        return INT32_MIN;
 451    } else {
 452        env->spr[SPR_MQ] = tmp % arg2;
 453        tmp /= (int32_t)arg2;
 454        if ((int32_t)tmp != tmp) {
 455            env->so = env->ov = 1;
 456        } else {
 457            env->ov = 0;
 458        }
 459        return tmp;
 460    }
 461}
 462
 463target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
 464                         target_ulong arg2)
 465{
 466    if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
 467        (int32_t)arg2 == 0) {
 468        env->spr[SPR_MQ] = 0;
 469        return INT32_MIN;
 470    } else {
 471        env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
 472        return (int32_t)arg1 / (int32_t)arg2;
 473    }
 474}
 475
 476target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
 477                          target_ulong arg2)
 478{
 479    if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
 480        (int32_t)arg2 == 0) {
 481        env->so = env->ov = 1;
 482        env->spr[SPR_MQ] = 0;
 483        return INT32_MIN;
 484    } else {
 485        env->ov = 0;
 486        env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
 487        return (int32_t)arg1 / (int32_t)arg2;
 488    }
 489}
 490
 491/*****************************************************************************/
 492/* 602 specific instructions */
 493/* mfrom is the most crazy instruction ever seen, imho ! */
 494/* Real implementation uses a ROM table. Do the same */
 495/*
 496 * Extremely decomposed:
 497 *                      -arg / 256
 498 * return 256 * log10(10           + 1.0) + 0.5
 499 */
 500#if !defined(CONFIG_USER_ONLY)
 501target_ulong helper_602_mfrom(target_ulong arg)
 502{
 503    if (likely(arg < 602)) {
 504#include "mfrom_table.c.inc"
 505        return mfrom_ROM_table[arg];
 506    } else {
 507        return 0;
 508    }
 509}
 510#endif
 511
 512/*****************************************************************************/
 513/* Altivec extension helpers */
 514#if defined(HOST_WORDS_BIGENDIAN)
 515#define VECTOR_FOR_INORDER_I(index, element)                    \
 516    for (index = 0; index < ARRAY_SIZE(r->element); index++)
 517#else
 518#define VECTOR_FOR_INORDER_I(index, element)                    \
 519    for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--)
 520#endif
 521
 522/* Saturating arithmetic helpers.  */
 523#define SATCVT(from, to, from_type, to_type, min, max)          \
 524    static inline to_type cvt##from##to(from_type x, int *sat)  \
 525    {                                                           \
 526        to_type r;                                              \
 527                                                                \
 528        if (x < (from_type)min) {                               \
 529            r = min;                                            \
 530            *sat = 1;                                           \
 531        } else if (x > (from_type)max) {                        \
 532            r = max;                                            \
 533            *sat = 1;                                           \
 534        } else {                                                \
 535            r = x;                                              \
 536        }                                                       \
 537        return r;                                               \
 538    }
 539#define SATCVTU(from, to, from_type, to_type, min, max)         \
 540    static inline to_type cvt##from##to(from_type x, int *sat)  \
 541    {                                                           \
 542        to_type r;                                              \
 543                                                                \
 544        if (x > (from_type)max) {                               \
 545            r = max;                                            \
 546            *sat = 1;                                           \
 547        } else {                                                \
 548            r = x;                                              \
 549        }                                                       \
 550        return r;                                               \
 551    }
 552SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
 553SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
 554SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
 555
 556SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
 557SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
 558SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
 559SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
 560SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
 561SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
 562#undef SATCVT
 563#undef SATCVTU
 564
 565void helper_mtvscr(CPUPPCState *env, uint32_t vscr)
 566{
 567    ppc_store_vscr(env, vscr);
 568}
 569
 570uint32_t helper_mfvscr(CPUPPCState *env)
 571{
 572    return ppc_get_vscr(env);
 573}
 574
 575static inline void set_vscr_sat(CPUPPCState *env)
 576{
 577    /* The choice of non-zero value is arbitrary.  */
 578    env->vscr_sat.u32[0] = 1;
 579}
 580
 581void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
 582{
 583    int i;
 584
 585    for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
 586        r->u32[i] = ~a->u32[i] < b->u32[i];
 587    }
 588}
 589
 590/* vprtybw */
 591void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b)
 592{
 593    int i;
 594    for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
 595        uint64_t res = b->u32[i] ^ (b->u32[i] >> 16);
 596        res ^= res >> 8;
 597        r->u32[i] = res & 1;
 598    }
 599}
 600
 601/* vprtybd */
 602void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b)
 603{
 604    int i;
 605    for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
 606        uint64_t res = b->u64[i] ^ (b->u64[i] >> 32);
 607        res ^= res >> 16;
 608        res ^= res >> 8;
 609        r->u64[i] = res & 1;
 610    }
 611}
 612
 613/* vprtybq */
 614void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b)
 615{
 616    uint64_t res = b->u64[0] ^ b->u64[1];
 617    res ^= res >> 32;
 618    res ^= res >> 16;
 619    res ^= res >> 8;
 620    r->VsrD(1) = res & 1;
 621    r->VsrD(0) = 0;
 622}
 623
 624#define VARITHFP(suffix, func)                                          \
 625    void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
 626                          ppc_avr_t *b)                                 \
 627    {                                                                   \
 628        int i;                                                          \
 629                                                                        \
 630        for (i = 0; i < ARRAY_SIZE(r->f32); i++) {                      \
 631            r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status);   \
 632        }                                                               \
 633    }
 634VARITHFP(addfp, float32_add)
 635VARITHFP(subfp, float32_sub)
 636VARITHFP(minfp, float32_min)
 637VARITHFP(maxfp, float32_max)
 638#undef VARITHFP
 639
 640#define VARITHFPFMA(suffix, type)                                       \
 641    void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
 642                           ppc_avr_t *b, ppc_avr_t *c)                  \
 643    {                                                                   \
 644        int i;                                                          \
 645        for (i = 0; i < ARRAY_SIZE(r->f32); i++) {                      \
 646            r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \
 647                                       type, &env->vec_status);         \
 648        }                                                               \
 649    }
 650VARITHFPFMA(maddfp, 0);
 651VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
 652#undef VARITHFPFMA
 653
 654#define VARITHSAT_CASE(type, op, cvt, element)                          \
 655    {                                                                   \
 656        type result = (type)a->element[i] op (type)b->element[i];       \
 657        r->element[i] = cvt(result, &sat);                              \
 658    }
 659
 660#define VARITHSAT_DO(name, op, optype, cvt, element)                    \
 661    void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat,              \
 662                        ppc_avr_t *a, ppc_avr_t *b, uint32_t desc)      \
 663    {                                                                   \
 664        int sat = 0;                                                    \
 665        int i;                                                          \
 666                                                                        \
 667        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
 668            VARITHSAT_CASE(optype, op, cvt, element);                   \
 669        }                                                               \
 670        if (sat) {                                                      \
 671            vscr_sat->u32[0] = 1;                                       \
 672        }                                                               \
 673    }
 674#define VARITHSAT_SIGNED(suffix, element, optype, cvt)          \
 675    VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element)      \
 676    VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
 677#define VARITHSAT_UNSIGNED(suffix, element, optype, cvt)        \
 678    VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element)      \
 679    VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
 680VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
 681VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
 682VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
 683VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
 684VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
 685VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
 686#undef VARITHSAT_CASE
 687#undef VARITHSAT_DO
 688#undef VARITHSAT_SIGNED
 689#undef VARITHSAT_UNSIGNED
 690
 691#define VAVG_DO(name, element, etype)                                   \
 692    void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
 693    {                                                                   \
 694        int i;                                                          \
 695                                                                        \
 696        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
 697            etype x = (etype)a->element[i] + (etype)b->element[i] + 1;  \
 698            r->element[i] = x >> 1;                                     \
 699        }                                                               \
 700    }
 701
 702#define VAVG(type, signed_element, signed_type, unsigned_element,       \
 703             unsigned_type)                                             \
 704    VAVG_DO(avgs##type, signed_element, signed_type)                    \
 705    VAVG_DO(avgu##type, unsigned_element, unsigned_type)
 706VAVG(b, s8, int16_t, u8, uint16_t)
 707VAVG(h, s16, int32_t, u16, uint32_t)
 708VAVG(w, s32, int64_t, u32, uint64_t)
 709#undef VAVG_DO
 710#undef VAVG
 711
 712#define VABSDU_DO(name, element)                                        \
 713void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)           \
 714{                                                                       \
 715    int i;                                                              \
 716                                                                        \
 717    for (i = 0; i < ARRAY_SIZE(r->element); i++) {                      \
 718        r->element[i] = (a->element[i] > b->element[i]) ?               \
 719            (a->element[i] - b->element[i]) :                           \
 720            (b->element[i] - a->element[i]);                            \
 721    }                                                                   \
 722}
 723
 724/*
 725 * VABSDU - Vector absolute difference unsigned
 726 *   name    - instruction mnemonic suffix (b: byte, h: halfword, w: word)
 727 *   element - element type to access from vector
 728 */
 729#define VABSDU(type, element)                   \
 730    VABSDU_DO(absdu##type, element)
 731VABSDU(b, u8)
 732VABSDU(h, u16)
 733VABSDU(w, u32)
 734#undef VABSDU_DO
 735#undef VABSDU
 736
 737#define VCF(suffix, cvt, element)                                       \
 738    void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r,             \
 739                            ppc_avr_t *b, uint32_t uim)                 \
 740    {                                                                   \
 741        int i;                                                          \
 742                                                                        \
 743        for (i = 0; i < ARRAY_SIZE(r->f32); i++) {                      \
 744            float32 t = cvt(b->element[i], &env->vec_status);           \
 745            r->f32[i] = float32_scalbn(t, -uim, &env->vec_status);      \
 746        }                                                               \
 747    }
 748VCF(ux, uint32_to_float32, u32)
 749VCF(sx, int32_to_float32, s32)
 750#undef VCF
 751
 752#define VCMP_DO(suffix, compare, element, record)                       \
 753    void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r,            \
 754                             ppc_avr_t *a, ppc_avr_t *b)                \
 755    {                                                                   \
 756        uint64_t ones = (uint64_t)-1;                                   \
 757        uint64_t all = ones;                                            \
 758        uint64_t none = 0;                                              \
 759        int i;                                                          \
 760                                                                        \
 761        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
 762            uint64_t result = (a->element[i] compare b->element[i] ?    \
 763                               ones : 0x0);                             \
 764            switch (sizeof(a->element[0])) {                            \
 765            case 8:                                                     \
 766                r->u64[i] = result;                                     \
 767                break;                                                  \
 768            case 4:                                                     \
 769                r->u32[i] = result;                                     \
 770                break;                                                  \
 771            case 2:                                                     \
 772                r->u16[i] = result;                                     \
 773                break;                                                  \
 774            case 1:                                                     \
 775                r->u8[i] = result;                                      \
 776                break;                                                  \
 777            }                                                           \
 778            all &= result;                                              \
 779            none |= result;                                             \
 780        }                                                               \
 781        if (record) {                                                   \
 782            env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1);       \
 783        }                                                               \
 784    }
 785#define VCMP(suffix, compare, element)          \
 786    VCMP_DO(suffix, compare, element, 0)        \
 787    VCMP_DO(suffix##_dot, compare, element, 1)
 788VCMP(equb, ==, u8)
 789VCMP(equh, ==, u16)
 790VCMP(equw, ==, u32)
 791VCMP(equd, ==, u64)
 792VCMP(gtub, >, u8)
 793VCMP(gtuh, >, u16)
 794VCMP(gtuw, >, u32)
 795VCMP(gtud, >, u64)
 796VCMP(gtsb, >, s8)
 797VCMP(gtsh, >, s16)
 798VCMP(gtsw, >, s32)
 799VCMP(gtsd, >, s64)
 800#undef VCMP_DO
 801#undef VCMP
 802
 803#define VCMPNE_DO(suffix, element, etype, cmpzero, record)              \
 804void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r,              \
 805                            ppc_avr_t *a, ppc_avr_t *b)                 \
 806{                                                                       \
 807    etype ones = (etype)-1;                                             \
 808    etype all = ones;                                                   \
 809    etype result, none = 0;                                             \
 810    int i;                                                              \
 811                                                                        \
 812    for (i = 0; i < ARRAY_SIZE(r->element); i++) {                      \
 813        if (cmpzero) {                                                  \
 814            result = ((a->element[i] == 0)                              \
 815                           || (b->element[i] == 0)                      \
 816                           || (a->element[i] != b->element[i]) ?        \
 817                           ones : 0x0);                                 \
 818        } else {                                                        \
 819            result = (a->element[i] != b->element[i]) ? ones : 0x0;     \
 820        }                                                               \
 821        r->element[i] = result;                                         \
 822        all &= result;                                                  \
 823        none |= result;                                                 \
 824    }                                                                   \
 825    if (record) {                                                       \
 826        env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1);           \
 827    }                                                                   \
 828}
 829
 830/*
 831 * VCMPNEZ - Vector compare not equal to zero
 832 *   suffix  - instruction mnemonic suffix (b: byte, h: halfword, w: word)
 833 *   element - element type to access from vector
 834 */
 835#define VCMPNE(suffix, element, etype, cmpzero)         \
 836    VCMPNE_DO(suffix, element, etype, cmpzero, 0)       \
 837    VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1)
 838VCMPNE(zb, u8, uint8_t, 1)
 839VCMPNE(zh, u16, uint16_t, 1)
 840VCMPNE(zw, u32, uint32_t, 1)
 841VCMPNE(b, u8, uint8_t, 0)
 842VCMPNE(h, u16, uint16_t, 0)
 843VCMPNE(w, u32, uint32_t, 0)
 844#undef VCMPNE_DO
 845#undef VCMPNE
 846
 847#define VCMPFP_DO(suffix, compare, order, record)                       \
 848    void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r,            \
 849                             ppc_avr_t *a, ppc_avr_t *b)                \
 850    {                                                                   \
 851        uint32_t ones = (uint32_t)-1;                                   \
 852        uint32_t all = ones;                                            \
 853        uint32_t none = 0;                                              \
 854        int i;                                                          \
 855                                                                        \
 856        for (i = 0; i < ARRAY_SIZE(r->f32); i++) {                      \
 857            uint32_t result;                                            \
 858            FloatRelation rel =                                         \
 859                float32_compare_quiet(a->f32[i], b->f32[i],             \
 860                                      &env->vec_status);                \
 861            if (rel == float_relation_unordered) {                      \
 862                result = 0;                                             \
 863            } else if (rel compare order) {                             \
 864                result = ones;                                          \
 865            } else {                                                    \
 866                result = 0;                                             \
 867            }                                                           \
 868            r->u32[i] = result;                                         \
 869            all &= result;                                              \
 870            none |= result;                                             \
 871        }                                                               \
 872        if (record) {                                                   \
 873            env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1);       \
 874        }                                                               \
 875    }
 876#define VCMPFP(suffix, compare, order)          \
 877    VCMPFP_DO(suffix, compare, order, 0)        \
 878    VCMPFP_DO(suffix##_dot, compare, order, 1)
 879VCMPFP(eqfp, ==, float_relation_equal)
 880VCMPFP(gefp, !=, float_relation_less)
 881VCMPFP(gtfp, ==, float_relation_greater)
 882#undef VCMPFP_DO
 883#undef VCMPFP
 884
 885static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
 886                                    ppc_avr_t *a, ppc_avr_t *b, int record)
 887{
 888    int i;
 889    int all_in = 0;
 890
 891    for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
 892        FloatRelation le_rel = float32_compare_quiet(a->f32[i], b->f32[i],
 893                                                     &env->vec_status);
 894        if (le_rel == float_relation_unordered) {
 895            r->u32[i] = 0xc0000000;
 896            all_in = 1;
 897        } else {
 898            float32 bneg = float32_chs(b->f32[i]);
 899            FloatRelation ge_rel = float32_compare_quiet(a->f32[i], bneg,
 900                                                         &env->vec_status);
 901            int le = le_rel != float_relation_greater;
 902            int ge = ge_rel != float_relation_less;
 903
 904            r->u32[i] = ((!le) << 31) | ((!ge) << 30);
 905            all_in |= (!le | !ge);
 906        }
 907    }
 908    if (record) {
 909        env->crf[6] = (all_in == 0) << 1;
 910    }
 911}
 912
 913void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
 914{
 915    vcmpbfp_internal(env, r, a, b, 0);
 916}
 917
 918void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
 919                        ppc_avr_t *b)
 920{
 921    vcmpbfp_internal(env, r, a, b, 1);
 922}
 923
 924#define VCT(suffix, satcvt, element)                                    \
 925    void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r,             \
 926                            ppc_avr_t *b, uint32_t uim)                 \
 927    {                                                                   \
 928        int i;                                                          \
 929        int sat = 0;                                                    \
 930        float_status s = env->vec_status;                               \
 931                                                                        \
 932        set_float_rounding_mode(float_round_to_zero, &s);               \
 933        for (i = 0; i < ARRAY_SIZE(r->f32); i++) {                      \
 934            if (float32_is_any_nan(b->f32[i])) {                        \
 935                r->element[i] = 0;                                      \
 936            } else {                                                    \
 937                float64 t = float32_to_float64(b->f32[i], &s);          \
 938                int64_t j;                                              \
 939                                                                        \
 940                t = float64_scalbn(t, uim, &s);                         \
 941                j = float64_to_int64(t, &s);                            \
 942                r->element[i] = satcvt(j, &sat);                        \
 943            }                                                           \
 944        }                                                               \
 945        if (sat) {                                                      \
 946            set_vscr_sat(env);                                          \
 947        }                                                               \
 948    }
 949VCT(uxs, cvtsduw, u32)
 950VCT(sxs, cvtsdsw, s32)
 951#undef VCT
 952
 953target_ulong helper_vclzlsbb(ppc_avr_t *r)
 954{
 955    target_ulong count = 0;
 956    int i;
 957    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
 958        if (r->VsrB(i) & 0x01) {
 959            break;
 960        }
 961        count++;
 962    }
 963    return count;
 964}
 965
 966target_ulong helper_vctzlsbb(ppc_avr_t *r)
 967{
 968    target_ulong count = 0;
 969    int i;
 970    for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
 971        if (r->VsrB(i) & 0x01) {
 972            break;
 973        }
 974        count++;
 975    }
 976    return count;
 977}
 978
 979void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
 980                      ppc_avr_t *b, ppc_avr_t *c)
 981{
 982    int sat = 0;
 983    int i;
 984
 985    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
 986        int32_t prod = a->s16[i] * b->s16[i];
 987        int32_t t = (int32_t)c->s16[i] + (prod >> 15);
 988
 989        r->s16[i] = cvtswsh(t, &sat);
 990    }
 991
 992    if (sat) {
 993        set_vscr_sat(env);
 994    }
 995}
 996
 997void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
 998                       ppc_avr_t *b, ppc_avr_t *c)
 999{
1000    int sat = 0;
1001    int i;
1002
1003    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1004        int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
1005        int32_t t = (int32_t)c->s16[i] + (prod >> 15);
1006        r->s16[i] = cvtswsh(t, &sat);
1007    }
1008
1009    if (sat) {
1010        set_vscr_sat(env);
1011    }
1012}
1013
1014void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1015{
1016    int i;
1017
1018    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1019        int32_t prod = a->s16[i] * b->s16[i];
1020        r->s16[i] = (int16_t) (prod + c->s16[i]);
1021    }
1022}
1023
1024#define VMRG_DO(name, element, access, ofs)                                  \
1025    void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)            \
1026    {                                                                        \
1027        ppc_avr_t result;                                                    \
1028        int i, half = ARRAY_SIZE(r->element) / 2;                            \
1029                                                                             \
1030        for (i = 0; i < half; i++) {                                         \
1031            result.access(i * 2 + 0) = a->access(i + ofs);                   \
1032            result.access(i * 2 + 1) = b->access(i + ofs);                   \
1033        }                                                                    \
1034        *r = result;                                                         \
1035    }
1036
1037#define VMRG(suffix, element, access)          \
1038    VMRG_DO(mrgl##suffix, element, access, half)   \
1039    VMRG_DO(mrgh##suffix, element, access, 0)
1040VMRG(b, u8, VsrB)
1041VMRG(h, u16, VsrH)
1042VMRG(w, u32, VsrW)
1043#undef VMRG_DO
1044#undef VMRG
1045
1046void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1047                     ppc_avr_t *b, ppc_avr_t *c)
1048{
1049    int32_t prod[16];
1050    int i;
1051
1052    for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
1053        prod[i] = (int32_t)a->s8[i] * b->u8[i];
1054    }
1055
1056    VECTOR_FOR_INORDER_I(i, s32) {
1057        r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
1058            prod[4 * i + 2] + prod[4 * i + 3];
1059    }
1060}
1061
1062void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1063                     ppc_avr_t *b, ppc_avr_t *c)
1064{
1065    int32_t prod[8];
1066    int i;
1067
1068    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1069        prod[i] = a->s16[i] * b->s16[i];
1070    }
1071
1072    VECTOR_FOR_INORDER_I(i, s32) {
1073        r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1074    }
1075}
1076
1077void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1078                     ppc_avr_t *b, ppc_avr_t *c)
1079{
1080    int32_t prod[8];
1081    int i;
1082    int sat = 0;
1083
1084    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1085        prod[i] = (int32_t)a->s16[i] * b->s16[i];
1086    }
1087
1088    VECTOR_FOR_INORDER_I(i, s32) {
1089        int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1090
1091        r->u32[i] = cvtsdsw(t, &sat);
1092    }
1093
1094    if (sat) {
1095        set_vscr_sat(env);
1096    }
1097}
1098
1099void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1100                     ppc_avr_t *b, ppc_avr_t *c)
1101{
1102    uint16_t prod[16];
1103    int i;
1104
1105    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1106        prod[i] = a->u8[i] * b->u8[i];
1107    }
1108
1109    VECTOR_FOR_INORDER_I(i, u32) {
1110        r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
1111            prod[4 * i + 2] + prod[4 * i + 3];
1112    }
1113}
1114
1115void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1116                     ppc_avr_t *b, ppc_avr_t *c)
1117{
1118    uint32_t prod[8];
1119    int i;
1120
1121    for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1122        prod[i] = a->u16[i] * b->u16[i];
1123    }
1124
1125    VECTOR_FOR_INORDER_I(i, u32) {
1126        r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1127    }
1128}
1129
1130void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1131                     ppc_avr_t *b, ppc_avr_t *c)
1132{
1133    uint32_t prod[8];
1134    int i;
1135    int sat = 0;
1136
1137    for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1138        prod[i] = a->u16[i] * b->u16[i];
1139    }
1140
1141    VECTOR_FOR_INORDER_I(i, s32) {
1142        uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1143
1144        r->u32[i] = cvtuduw(t, &sat);
1145    }
1146
1147    if (sat) {
1148        set_vscr_sat(env);
1149    }
1150}
1151
1152#define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast)   \
1153    void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
1154    {                                                                   \
1155        int i;                                                          \
1156                                                                        \
1157        for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) {           \
1158            r->prod_access(i >> 1) = (cast)a->mul_access(i) *           \
1159                                     (cast)b->mul_access(i);            \
1160        }                                                               \
1161    }
1162
1163#define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast)   \
1164    void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
1165    {                                                                   \
1166        int i;                                                          \
1167                                                                        \
1168        for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) {           \
1169            r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) *       \
1170                                     (cast)b->mul_access(i + 1);        \
1171        }                                                               \
1172    }
1173
1174#define VMUL(suffix, mul_element, mul_access, prod_access, cast)       \
1175    VMUL_DO_EVN(mule##suffix, mul_element, mul_access, prod_access, cast)  \
1176    VMUL_DO_ODD(mulo##suffix, mul_element, mul_access, prod_access, cast)
1177VMUL(sb, s8, VsrSB, VsrSH, int16_t)
1178VMUL(sh, s16, VsrSH, VsrSW, int32_t)
1179VMUL(sw, s32, VsrSW, VsrSD, int64_t)
1180VMUL(ub, u8, VsrB, VsrH, uint16_t)
1181VMUL(uh, u16, VsrH, VsrW, uint32_t)
1182VMUL(uw, u32, VsrW, VsrD, uint64_t)
1183#undef VMUL_DO_EVN
1184#undef VMUL_DO_ODD
1185#undef VMUL
1186
1187void helper_vmulhsw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1188{
1189    int i;
1190
1191    for (i = 0; i < 4; i++) {
1192        r->s32[i] = (int32_t)(((int64_t)a->s32[i] * (int64_t)b->s32[i]) >> 32);
1193    }
1194}
1195
1196void helper_vmulhuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1197{
1198    int i;
1199
1200    for (i = 0; i < 4; i++) {
1201        r->u32[i] = (uint32_t)(((uint64_t)a->u32[i] *
1202                               (uint64_t)b->u32[i]) >> 32);
1203    }
1204}
1205
1206void helper_vmulhsd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1207{
1208    uint64_t discard;
1209
1210    muls64(&discard, &r->u64[0], a->s64[0], b->s64[0]);
1211    muls64(&discard, &r->u64[1], a->s64[1], b->s64[1]);
1212}
1213
1214void helper_vmulhud(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1215{
1216    uint64_t discard;
1217
1218    mulu64(&discard, &r->u64[0], a->u64[0], b->u64[0]);
1219    mulu64(&discard, &r->u64[1], a->u64[1], b->u64[1]);
1220}
1221
1222void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1223                  ppc_avr_t *c)
1224{
1225    ppc_avr_t result;
1226    int i;
1227
1228    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1229        int s = c->VsrB(i) & 0x1f;
1230        int index = s & 0xf;
1231
1232        if (s & 0x10) {
1233            result.VsrB(i) = b->VsrB(index);
1234        } else {
1235            result.VsrB(i) = a->VsrB(index);
1236        }
1237    }
1238    *r = result;
1239}
1240
1241void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1242                  ppc_avr_t *c)
1243{
1244    ppc_avr_t result;
1245    int i;
1246
1247    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1248        int s = c->VsrB(i) & 0x1f;
1249        int index = 15 - (s & 0xf);
1250
1251        if (s & 0x10) {
1252            result.VsrB(i) = a->VsrB(index);
1253        } else {
1254            result.VsrB(i) = b->VsrB(index);
1255        }
1256    }
1257    *r = result;
1258}
1259
1260#if defined(HOST_WORDS_BIGENDIAN)
1261#define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1262#define VBPERMD_INDEX(i) (i)
1263#define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1264#define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
1265#else
1266#define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)])
1267#define VBPERMD_INDEX(i) (1 - i)
1268#define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1269#define EXTRACT_BIT(avr, i, index) \
1270        (extract64((avr)->u64[1 - i], 63 - index, 1))
1271#endif
1272
1273void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1274{
1275    int i, j;
1276    ppc_avr_t result = { .u64 = { 0, 0 } };
1277    VECTOR_FOR_INORDER_I(i, u64) {
1278        for (j = 0; j < 8; j++) {
1279            int index = VBPERMQ_INDEX(b, (i * 8) + j);
1280            if (index < 64 && EXTRACT_BIT(a, i, index)) {
1281                result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1282            }
1283        }
1284    }
1285    *r = result;
1286}
1287
1288void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1289{
1290    int i;
1291    uint64_t perm = 0;
1292
1293    VECTOR_FOR_INORDER_I(i, u8) {
1294        int index = VBPERMQ_INDEX(b, i);
1295
1296        if (index < 128) {
1297            uint64_t mask = (1ull << (63 - (index & 0x3F)));
1298            if (a->u64[VBPERMQ_DW(index)] & mask) {
1299                perm |= (0x8000 >> i);
1300            }
1301        }
1302    }
1303
1304    r->VsrD(0) = perm;
1305    r->VsrD(1) = 0;
1306}
1307
1308#undef VBPERMQ_INDEX
1309#undef VBPERMQ_DW
1310
1311#define PMSUM(name, srcfld, trgfld, trgtyp)                   \
1312void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)  \
1313{                                                             \
1314    int i, j;                                                 \
1315    trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])];    \
1316                                                              \
1317    VECTOR_FOR_INORDER_I(i, srcfld) {                         \
1318        prod[i] = 0;                                          \
1319        for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) {      \
1320            if (a->srcfld[i] & (1ull << j)) {                 \
1321                prod[i] ^= ((trgtyp)b->srcfld[i] << j);       \
1322            }                                                 \
1323        }                                                     \
1324    }                                                         \
1325                                                              \
1326    VECTOR_FOR_INORDER_I(i, trgfld) {                         \
1327        r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1];         \
1328    }                                                         \
1329}
1330
1331PMSUM(vpmsumb, u8, u16, uint16_t)
1332PMSUM(vpmsumh, u16, u32, uint32_t)
1333PMSUM(vpmsumw, u32, u64, uint64_t)
1334
1335void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1336{
1337
1338#ifdef CONFIG_INT128
1339    int i, j;
1340    __uint128_t prod[2];
1341
1342    VECTOR_FOR_INORDER_I(i, u64) {
1343        prod[i] = 0;
1344        for (j = 0; j < 64; j++) {
1345            if (a->u64[i] & (1ull << j)) {
1346                prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1347            }
1348        }
1349    }
1350
1351    r->u128 = prod[0] ^ prod[1];
1352
1353#else
1354    int i, j;
1355    ppc_avr_t prod[2];
1356
1357    VECTOR_FOR_INORDER_I(i, u64) {
1358        prod[i].VsrD(1) = prod[i].VsrD(0) = 0;
1359        for (j = 0; j < 64; j++) {
1360            if (a->u64[i] & (1ull << j)) {
1361                ppc_avr_t bshift;
1362                if (j == 0) {
1363                    bshift.VsrD(0) = 0;
1364                    bshift.VsrD(1) = b->u64[i];
1365                } else {
1366                    bshift.VsrD(0) = b->u64[i] >> (64 - j);
1367                    bshift.VsrD(1) = b->u64[i] << j;
1368                }
1369                prod[i].VsrD(1) ^= bshift.VsrD(1);
1370                prod[i].VsrD(0) ^= bshift.VsrD(0);
1371            }
1372        }
1373    }
1374
1375    r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1);
1376    r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0);
1377#endif
1378}
1379
1380
1381#if defined(HOST_WORDS_BIGENDIAN)
1382#define PKBIG 1
1383#else
1384#define PKBIG 0
1385#endif
1386void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1387{
1388    int i, j;
1389    ppc_avr_t result;
1390#if defined(HOST_WORDS_BIGENDIAN)
1391    const ppc_avr_t *x[2] = { a, b };
1392#else
1393    const ppc_avr_t *x[2] = { b, a };
1394#endif
1395
1396    VECTOR_FOR_INORDER_I(i, u64) {
1397        VECTOR_FOR_INORDER_I(j, u32) {
1398            uint32_t e = x[i]->u32[j];
1399
1400            result.u16[4 * i + j] = (((e >> 9) & 0xfc00) |
1401                                     ((e >> 6) & 0x3e0) |
1402                                     ((e >> 3) & 0x1f));
1403        }
1404    }
1405    *r = result;
1406}
1407
1408#define VPK(suffix, from, to, cvt, dosat)                               \
1409    void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r,             \
1410                            ppc_avr_t *a, ppc_avr_t *b)                 \
1411    {                                                                   \
1412        int i;                                                          \
1413        int sat = 0;                                                    \
1414        ppc_avr_t result;                                               \
1415        ppc_avr_t *a0 = PKBIG ? a : b;                                  \
1416        ppc_avr_t *a1 = PKBIG ? b : a;                                  \
1417                                                                        \
1418        VECTOR_FOR_INORDER_I(i, from) {                                 \
1419            result.to[i] = cvt(a0->from[i], &sat);                      \
1420            result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\
1421        }                                                               \
1422        *r = result;                                                    \
1423        if (dosat && sat) {                                             \
1424            set_vscr_sat(env);                                          \
1425        }                                                               \
1426    }
1427#define I(x, y) (x)
1428VPK(shss, s16, s8, cvtshsb, 1)
1429VPK(shus, s16, u8, cvtshub, 1)
1430VPK(swss, s32, s16, cvtswsh, 1)
1431VPK(swus, s32, u16, cvtswuh, 1)
1432VPK(sdss, s64, s32, cvtsdsw, 1)
1433VPK(sdus, s64, u32, cvtsduw, 1)
1434VPK(uhus, u16, u8, cvtuhub, 1)
1435VPK(uwus, u32, u16, cvtuwuh, 1)
1436VPK(udus, u64, u32, cvtuduw, 1)
1437VPK(uhum, u16, u8, I, 0)
1438VPK(uwum, u32, u16, I, 0)
1439VPK(udum, u64, u32, I, 0)
1440#undef I
1441#undef VPK
1442#undef PKBIG
1443
1444void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1445{
1446    int i;
1447
1448    for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1449        r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status);
1450    }
1451}
1452
1453#define VRFI(suffix, rounding)                                  \
1454    void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r,    \
1455                             ppc_avr_t *b)                      \
1456    {                                                           \
1457        int i;                                                  \
1458        float_status s = env->vec_status;                       \
1459                                                                \
1460        set_float_rounding_mode(rounding, &s);                  \
1461        for (i = 0; i < ARRAY_SIZE(r->f32); i++) {              \
1462            r->f32[i] = float32_round_to_int (b->f32[i], &s);   \
1463        }                                                       \
1464    }
1465VRFI(n, float_round_nearest_even)
1466VRFI(m, float_round_down)
1467VRFI(p, float_round_up)
1468VRFI(z, float_round_to_zero)
1469#undef VRFI
1470
1471void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1472{
1473    int i;
1474
1475    for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1476        float32 t = float32_sqrt(b->f32[i], &env->vec_status);
1477
1478        r->f32[i] = float32_div(float32_one, t, &env->vec_status);
1479    }
1480}
1481
1482#define VRLMI(name, size, element, insert)                            \
1483void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)          \
1484{                                                                     \
1485    int i;                                                            \
1486    for (i = 0; i < ARRAY_SIZE(r->element); i++) {                    \
1487        uint##size##_t src1 = a->element[i];                          \
1488        uint##size##_t src2 = b->element[i];                          \
1489        uint##size##_t src3 = r->element[i];                          \
1490        uint##size##_t begin, end, shift, mask, rot_val;              \
1491                                                                      \
1492        shift = extract##size(src2, 0, 6);                            \
1493        end   = extract##size(src2, 8, 6);                            \
1494        begin = extract##size(src2, 16, 6);                           \
1495        rot_val = rol##size(src1, shift);                             \
1496        mask = mask_u##size(begin, end);                              \
1497        if (insert) {                                                 \
1498            r->element[i] = (rot_val & mask) | (src3 & ~mask);        \
1499        } else {                                                      \
1500            r->element[i] = (rot_val & mask);                         \
1501        }                                                             \
1502    }                                                                 \
1503}
1504
1505VRLMI(vrldmi, 64, u64, 1);
1506VRLMI(vrlwmi, 32, u32, 1);
1507VRLMI(vrldnm, 64, u64, 0);
1508VRLMI(vrlwnm, 32, u32, 0);
1509
1510void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1511                 ppc_avr_t *c)
1512{
1513    r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1514    r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1515}
1516
1517void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1518{
1519    int i;
1520
1521    for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1522        r->f32[i] = float32_exp2(b->f32[i], &env->vec_status);
1523    }
1524}
1525
1526void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1527{
1528    int i;
1529
1530    for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1531        r->f32[i] = float32_log2(b->f32[i], &env->vec_status);
1532    }
1533}
1534
1535#define VEXTU_X_DO(name, size, left)                            \
1536target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b)  \
1537{                                                               \
1538    int index = (a & 0xf) * 8;                                  \
1539    if (left) {                                                 \
1540        index = 128 - index - size;                             \
1541    }                                                           \
1542    return int128_getlo(int128_rshift(b->s128, index)) &        \
1543        MAKE_64BIT_MASK(0, size);                               \
1544}
1545VEXTU_X_DO(vextublx,  8, 1)
1546VEXTU_X_DO(vextuhlx, 16, 1)
1547VEXTU_X_DO(vextuwlx, 32, 1)
1548VEXTU_X_DO(vextubrx,  8, 0)
1549VEXTU_X_DO(vextuhrx, 16, 0)
1550VEXTU_X_DO(vextuwrx, 32, 0)
1551#undef VEXTU_X_DO
1552
1553void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1554{
1555    int i;
1556    unsigned int shift, bytes, size;
1557
1558    size = ARRAY_SIZE(r->u8);
1559    for (i = 0; i < size; i++) {
1560        shift = b->VsrB(i) & 0x7;             /* extract shift value */
1561        bytes = (a->VsrB(i) << 8) +           /* extract adjacent bytes */
1562            (((i + 1) < size) ? a->VsrB(i + 1) : 0);
1563        r->VsrB(i) = (bytes << shift) >> 8;   /* shift and store result */
1564    }
1565}
1566
1567void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1568{
1569    int i;
1570    unsigned int shift, bytes;
1571
1572    /*
1573     * Use reverse order, as destination and source register can be
1574     * same. Its being modified in place saving temporary, reverse
1575     * order will guarantee that computed result is not fed back.
1576     */
1577    for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1578        shift = b->VsrB(i) & 0x7;               /* extract shift value */
1579        bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i);
1580                                                /* extract adjacent bytes */
1581        r->VsrB(i) = (bytes >> shift) & 0xFF;   /* shift and store result */
1582    }
1583}
1584
1585void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1586{
1587    int sh = shift & 0xf;
1588    int i;
1589    ppc_avr_t result;
1590
1591    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1592        int index = sh + i;
1593        if (index > 0xf) {
1594            result.VsrB(i) = b->VsrB(index - 0x10);
1595        } else {
1596            result.VsrB(i) = a->VsrB(index);
1597        }
1598    }
1599    *r = result;
1600}
1601
1602void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1603{
1604    int sh = (b->VsrB(0xf) >> 3) & 0xf;
1605
1606#if defined(HOST_WORDS_BIGENDIAN)
1607    memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1608    memset(&r->u8[16 - sh], 0, sh);
1609#else
1610    memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1611    memset(&r->u8[0], 0, sh);
1612#endif
1613}
1614
1615#if defined(HOST_WORDS_BIGENDIAN)
1616#define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[IDX])
1617#else
1618#define ELEM_ADDR(VEC, IDX, SIZE) (&(VEC)->u8[15 - (IDX)] - (SIZE) + 1)
1619#endif
1620
1621#define VINSX(SUFFIX, TYPE) \
1622void glue(glue(helper_VINS, SUFFIX), LX)(CPUPPCState *env, ppc_avr_t *t,       \
1623                                         uint64_t val, target_ulong index)     \
1624{                                                                              \
1625    const int maxidx = ARRAY_SIZE(t->u8) - sizeof(TYPE);                       \
1626    target_long idx = index;                                                   \
1627                                                                               \
1628    if (idx < 0 || idx > maxidx) {                                             \
1629        idx =  idx < 0 ? sizeof(TYPE) - idx : idx;                             \
1630        qemu_log_mask(LOG_GUEST_ERROR,                                         \
1631            "Invalid index for Vector Insert Element after 0x" TARGET_FMT_lx   \
1632            ", RA = " TARGET_FMT_ld " > %d\n", env->nip, idx, maxidx);         \
1633    } else {                                                                   \
1634        TYPE src = val;                                                        \
1635        memcpy(ELEM_ADDR(t, idx, sizeof(TYPE)), &src, sizeof(TYPE));           \
1636    }                                                                          \
1637}
1638VINSX(B, uint8_t)
1639VINSX(H, uint16_t)
1640VINSX(W, uint32_t)
1641VINSX(D, uint64_t)
1642#undef ELEM_ADDR
1643#undef VINSX
1644#if defined(HOST_WORDS_BIGENDIAN)
1645#define VEXTDVLX(NAME, SIZE) \
1646void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1647                   target_ulong index)                                         \
1648{                                                                              \
1649    const target_long idx = index;                                             \
1650    ppc_avr_t tmp[2] = { *a, *b };                                             \
1651    memset(t, 0, sizeof(*t));                                                  \
1652    if (idx >= 0 && idx + SIZE <= sizeof(tmp)) {                               \
1653        memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2 - SIZE], (void *)tmp + idx, SIZE); \
1654    } else {                                                                   \
1655        qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x"  \
1656                      TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n",         \
1657                      env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE);        \
1658    }                                                                          \
1659}
1660#else
1661#define VEXTDVLX(NAME, SIZE) \
1662void helper_##NAME(CPUPPCState *env, ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, \
1663                   target_ulong index)                                         \
1664{                                                                              \
1665    const target_long idx = index;                                             \
1666    ppc_avr_t tmp[2] = { *b, *a };                                             \
1667    memset(t, 0, sizeof(*t));                                                  \
1668    if (idx >= 0 && idx + SIZE <= sizeof(tmp)) {                               \
1669        memcpy(&t->u8[ARRAY_SIZE(t->u8) / 2],                                  \
1670               (void *)tmp + sizeof(tmp) - SIZE - idx, SIZE);                  \
1671    } else {                                                                   \
1672        qemu_log_mask(LOG_GUEST_ERROR, "Invalid index for " #NAME " after 0x"  \
1673                      TARGET_FMT_lx ", RC = " TARGET_FMT_ld " > %d\n",         \
1674                      env->nip, idx < 0 ? SIZE - idx : idx, 32 - SIZE);        \
1675    }                                                                          \
1676}
1677#endif
1678VEXTDVLX(VEXTDUBVLX, 1)
1679VEXTDVLX(VEXTDUHVLX, 2)
1680VEXTDVLX(VEXTDUWVLX, 4)
1681VEXTDVLX(VEXTDDVLX, 8)
1682#undef VEXTDVLX
1683#if defined(HOST_WORDS_BIGENDIAN)
1684#define VEXTRACT(suffix, element)                                            \
1685    void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1686    {                                                                        \
1687        uint32_t es = sizeof(r->element[0]);                                 \
1688        memmove(&r->u8[8 - es], &b->u8[index], es);                          \
1689        memset(&r->u8[8], 0, 8);                                             \
1690        memset(&r->u8[0], 0, 8 - es);                                        \
1691    }
1692#else
1693#define VEXTRACT(suffix, element)                                            \
1694    void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1695    {                                                                        \
1696        uint32_t es = sizeof(r->element[0]);                                 \
1697        uint32_t s = (16 - index) - es;                                      \
1698        memmove(&r->u8[8], &b->u8[s], es);                                   \
1699        memset(&r->u8[0], 0, 8);                                             \
1700        memset(&r->u8[8 + es], 0, 8 - es);                                   \
1701    }
1702#endif
1703VEXTRACT(ub, u8)
1704VEXTRACT(uh, u16)
1705VEXTRACT(uw, u32)
1706VEXTRACT(d, u64)
1707#undef VEXTRACT
1708
1709void helper_xxextractuw(CPUPPCState *env, ppc_vsr_t *xt,
1710                        ppc_vsr_t *xb, uint32_t index)
1711{
1712    ppc_vsr_t t = { };
1713    size_t es = sizeof(uint32_t);
1714    uint32_t ext_index;
1715    int i;
1716
1717    ext_index = index;
1718    for (i = 0; i < es; i++, ext_index++) {
1719        t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16);
1720    }
1721
1722    *xt = t;
1723}
1724
1725void helper_xxinsertw(CPUPPCState *env, ppc_vsr_t *xt,
1726                      ppc_vsr_t *xb, uint32_t index)
1727{
1728    ppc_vsr_t t = *xt;
1729    size_t es = sizeof(uint32_t);
1730    int ins_index, i = 0;
1731
1732    ins_index = index;
1733    for (i = 0; i < es && ins_index < 16; i++, ins_index++) {
1734        t.VsrB(ins_index) = xb->VsrB(8 - es + i);
1735    }
1736
1737    *xt = t;
1738}
1739
1740#define XXBLEND(name, sz) \
1741void glue(helper_XXBLENDV, name)(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b,  \
1742                                 ppc_avr_t *c, uint32_t desc)               \
1743{                                                                           \
1744    for (int i = 0; i < ARRAY_SIZE(t->glue(u, sz)); i++) {                  \
1745        t->glue(u, sz)[i] = (c->glue(s, sz)[i] >> (sz - 1)) ?               \
1746            b->glue(u, sz)[i] : a->glue(u, sz)[i];                          \
1747    }                                                                       \
1748}
1749XXBLEND(B, 8)
1750XXBLEND(H, 16)
1751XXBLEND(W, 32)
1752XXBLEND(D, 64)
1753#undef XXBLEND
1754
1755#define VEXT_SIGNED(name, element, cast)                            \
1756void helper_##name(ppc_avr_t *r, ppc_avr_t *b)                      \
1757{                                                                   \
1758    int i;                                                          \
1759    for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
1760        r->element[i] = (cast)b->element[i];                        \
1761    }                                                               \
1762}
1763VEXT_SIGNED(vextsb2w, s32, int8_t)
1764VEXT_SIGNED(vextsb2d, s64, int8_t)
1765VEXT_SIGNED(vextsh2w, s32, int16_t)
1766VEXT_SIGNED(vextsh2d, s64, int16_t)
1767VEXT_SIGNED(vextsw2d, s64, int32_t)
1768#undef VEXT_SIGNED
1769
1770#define VNEG(name, element)                                         \
1771void helper_##name(ppc_avr_t *r, ppc_avr_t *b)                      \
1772{                                                                   \
1773    int i;                                                          \
1774    for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
1775        r->element[i] = -b->element[i];                             \
1776    }                                                               \
1777}
1778VNEG(vnegw, s32)
1779VNEG(vnegd, s64)
1780#undef VNEG
1781
1782void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1783{
1784    int sh = (b->VsrB(0xf) >> 3) & 0xf;
1785
1786#if defined(HOST_WORDS_BIGENDIAN)
1787    memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1788    memset(&r->u8[0], 0, sh);
1789#else
1790    memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1791    memset(&r->u8[16 - sh], 0, sh);
1792#endif
1793}
1794
1795void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1796{
1797    int i;
1798
1799    for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1800        r->u32[i] = a->u32[i] >= b->u32[i];
1801    }
1802}
1803
1804void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1805{
1806    int64_t t;
1807    int i, upper;
1808    ppc_avr_t result;
1809    int sat = 0;
1810
1811    upper = ARRAY_SIZE(r->s32) - 1;
1812    t = (int64_t)b->VsrSW(upper);
1813    for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1814        t += a->VsrSW(i);
1815        result.VsrSW(i) = 0;
1816    }
1817    result.VsrSW(upper) = cvtsdsw(t, &sat);
1818    *r = result;
1819
1820    if (sat) {
1821        set_vscr_sat(env);
1822    }
1823}
1824
1825void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1826{
1827    int i, j, upper;
1828    ppc_avr_t result;
1829    int sat = 0;
1830
1831    upper = 1;
1832    for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1833        int64_t t = (int64_t)b->VsrSW(upper + i * 2);
1834
1835        result.VsrD(i) = 0;
1836        for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1837            t += a->VsrSW(2 * i + j);
1838        }
1839        result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat);
1840    }
1841
1842    *r = result;
1843    if (sat) {
1844        set_vscr_sat(env);
1845    }
1846}
1847
1848void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1849{
1850    int i, j;
1851    int sat = 0;
1852
1853    for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1854        int64_t t = (int64_t)b->s32[i];
1855
1856        for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1857            t += a->s8[4 * i + j];
1858        }
1859        r->s32[i] = cvtsdsw(t, &sat);
1860    }
1861
1862    if (sat) {
1863        set_vscr_sat(env);
1864    }
1865}
1866
1867void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1868{
1869    int sat = 0;
1870    int i;
1871
1872    for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1873        int64_t t = (int64_t)b->s32[i];
1874
1875        t += a->s16[2 * i] + a->s16[2 * i + 1];
1876        r->s32[i] = cvtsdsw(t, &sat);
1877    }
1878
1879    if (sat) {
1880        set_vscr_sat(env);
1881    }
1882}
1883
1884void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1885{
1886    int i, j;
1887    int sat = 0;
1888
1889    for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1890        uint64_t t = (uint64_t)b->u32[i];
1891
1892        for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
1893            t += a->u8[4 * i + j];
1894        }
1895        r->u32[i] = cvtuduw(t, &sat);
1896    }
1897
1898    if (sat) {
1899        set_vscr_sat(env);
1900    }
1901}
1902
1903#if defined(HOST_WORDS_BIGENDIAN)
1904#define UPKHI 1
1905#define UPKLO 0
1906#else
1907#define UPKHI 0
1908#define UPKLO 1
1909#endif
1910#define VUPKPX(suffix, hi)                                              \
1911    void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b)                \
1912    {                                                                   \
1913        int i;                                                          \
1914        ppc_avr_t result;                                               \
1915                                                                        \
1916        for (i = 0; i < ARRAY_SIZE(r->u32); i++) {                      \
1917            uint16_t e = b->u16[hi ? i : i + 4];                        \
1918            uint8_t a = (e >> 15) ? 0xff : 0;                           \
1919            uint8_t r = (e >> 10) & 0x1f;                               \
1920            uint8_t g = (e >> 5) & 0x1f;                                \
1921            uint8_t b = e & 0x1f;                                       \
1922                                                                        \
1923            result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b;       \
1924        }                                                               \
1925        *r = result;                                                    \
1926    }
1927VUPKPX(lpx, UPKLO)
1928VUPKPX(hpx, UPKHI)
1929#undef VUPKPX
1930
1931#define VUPK(suffix, unpacked, packee, hi)                              \
1932    void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b)                \
1933    {                                                                   \
1934        int i;                                                          \
1935        ppc_avr_t result;                                               \
1936                                                                        \
1937        if (hi) {                                                       \
1938            for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) {             \
1939                result.unpacked[i] = b->packee[i];                      \
1940            }                                                           \
1941        } else {                                                        \
1942            for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
1943                 i++) {                                                 \
1944                result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
1945            }                                                           \
1946        }                                                               \
1947        *r = result;                                                    \
1948    }
1949VUPK(hsb, s16, s8, UPKHI)
1950VUPK(hsh, s32, s16, UPKHI)
1951VUPK(hsw, s64, s32, UPKHI)
1952VUPK(lsb, s16, s8, UPKLO)
1953VUPK(lsh, s32, s16, UPKLO)
1954VUPK(lsw, s64, s32, UPKLO)
1955#undef VUPK
1956#undef UPKHI
1957#undef UPKLO
1958
1959#define VGENERIC_DO(name, element)                                      \
1960    void helper_v##name(ppc_avr_t *r, ppc_avr_t *b)                     \
1961    {                                                                   \
1962        int i;                                                          \
1963                                                                        \
1964        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
1965            r->element[i] = name(b->element[i]);                        \
1966        }                                                               \
1967    }
1968
1969#define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
1970#define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
1971
1972VGENERIC_DO(clzb, u8)
1973VGENERIC_DO(clzh, u16)
1974
1975#undef clzb
1976#undef clzh
1977
1978#define ctzb(v) ((v) ? ctz32(v) : 8)
1979#define ctzh(v) ((v) ? ctz32(v) : 16)
1980#define ctzw(v) ctz32((v))
1981#define ctzd(v) ctz64((v))
1982
1983VGENERIC_DO(ctzb, u8)
1984VGENERIC_DO(ctzh, u16)
1985VGENERIC_DO(ctzw, u32)
1986VGENERIC_DO(ctzd, u64)
1987
1988#undef ctzb
1989#undef ctzh
1990#undef ctzw
1991#undef ctzd
1992
1993#define popcntb(v) ctpop8(v)
1994#define popcnth(v) ctpop16(v)
1995#define popcntw(v) ctpop32(v)
1996#define popcntd(v) ctpop64(v)
1997
1998VGENERIC_DO(popcntb, u8)
1999VGENERIC_DO(popcnth, u16)
2000VGENERIC_DO(popcntw, u32)
2001VGENERIC_DO(popcntd, u64)
2002
2003#undef popcntb
2004#undef popcnth
2005#undef popcntw
2006#undef popcntd
2007
2008#undef VGENERIC_DO
2009
2010#if defined(HOST_WORDS_BIGENDIAN)
2011#define QW_ONE { .u64 = { 0, 1 } }
2012#else
2013#define QW_ONE { .u64 = { 1, 0 } }
2014#endif
2015
2016#ifndef CONFIG_INT128
2017
2018static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
2019{
2020    t->u64[0] = ~a.u64[0];
2021    t->u64[1] = ~a.u64[1];
2022}
2023
2024static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
2025{
2026    if (a.VsrD(0) < b.VsrD(0)) {
2027        return -1;
2028    } else if (a.VsrD(0) > b.VsrD(0)) {
2029        return 1;
2030    } else if (a.VsrD(1) < b.VsrD(1)) {
2031        return -1;
2032    } else if (a.VsrD(1) > b.VsrD(1)) {
2033        return 1;
2034    } else {
2035        return 0;
2036    }
2037}
2038
2039static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2040{
2041    t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
2042    t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
2043                     (~a.VsrD(1) < b.VsrD(1));
2044}
2045
2046static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2047{
2048    ppc_avr_t not_a;
2049    t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
2050    t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
2051                     (~a.VsrD(1) < b.VsrD(1));
2052    avr_qw_not(&not_a, a);
2053    return avr_qw_cmpu(not_a, b) < 0;
2054}
2055
2056#endif
2057
2058void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2059{
2060#ifdef CONFIG_INT128
2061    r->u128 = a->u128 + b->u128;
2062#else
2063    avr_qw_add(r, *a, *b);
2064#endif
2065}
2066
2067void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2068{
2069#ifdef CONFIG_INT128
2070    r->u128 = a->u128 + b->u128 + (c->u128 & 1);
2071#else
2072
2073    if (c->VsrD(1) & 1) {
2074        ppc_avr_t tmp;
2075
2076        tmp.VsrD(0) = 0;
2077        tmp.VsrD(1) = c->VsrD(1) & 1;
2078        avr_qw_add(&tmp, *a, tmp);
2079        avr_qw_add(r, tmp, *b);
2080    } else {
2081        avr_qw_add(r, *a, *b);
2082    }
2083#endif
2084}
2085
2086void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2087{
2088#ifdef CONFIG_INT128
2089    r->u128 = (~a->u128 < b->u128);
2090#else
2091    ppc_avr_t not_a;
2092
2093    avr_qw_not(&not_a, *a);
2094
2095    r->VsrD(0) = 0;
2096    r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0);
2097#endif
2098}
2099
2100void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2101{
2102#ifdef CONFIG_INT128
2103    int carry_out = (~a->u128 < b->u128);
2104    if (!carry_out && (c->u128 & 1)) {
2105        carry_out = ((a->u128 + b->u128 + 1) == 0) &&
2106                    ((a->u128 != 0) || (b->u128 != 0));
2107    }
2108    r->u128 = carry_out;
2109#else
2110
2111    int carry_in = c->VsrD(1) & 1;
2112    int carry_out = 0;
2113    ppc_avr_t tmp;
2114
2115    carry_out = avr_qw_addc(&tmp, *a, *b);
2116
2117    if (!carry_out && carry_in) {
2118        ppc_avr_t one = QW_ONE;
2119        carry_out = avr_qw_addc(&tmp, tmp, one);
2120    }
2121    r->VsrD(0) = 0;
2122    r->VsrD(1) = carry_out;
2123#endif
2124}
2125
2126void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2127{
2128#ifdef CONFIG_INT128
2129    r->u128 = a->u128 - b->u128;
2130#else
2131    ppc_avr_t tmp;
2132    ppc_avr_t one = QW_ONE;
2133
2134    avr_qw_not(&tmp, *b);
2135    avr_qw_add(&tmp, *a, tmp);
2136    avr_qw_add(r, tmp, one);
2137#endif
2138}
2139
2140void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2141{
2142#ifdef CONFIG_INT128
2143    r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2144#else
2145    ppc_avr_t tmp, sum;
2146
2147    avr_qw_not(&tmp, *b);
2148    avr_qw_add(&sum, *a, tmp);
2149
2150    tmp.VsrD(0) = 0;
2151    tmp.VsrD(1) = c->VsrD(1) & 1;
2152    avr_qw_add(r, sum, tmp);
2153#endif
2154}
2155
2156void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2157{
2158#ifdef CONFIG_INT128
2159    r->u128 = (~a->u128 < ~b->u128) ||
2160                 (a->u128 + ~b->u128 == (__uint128_t)-1);
2161#else
2162    int carry = (avr_qw_cmpu(*a, *b) > 0);
2163    if (!carry) {
2164        ppc_avr_t tmp;
2165        avr_qw_not(&tmp, *b);
2166        avr_qw_add(&tmp, *a, tmp);
2167        carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull));
2168    }
2169    r->VsrD(0) = 0;
2170    r->VsrD(1) = carry;
2171#endif
2172}
2173
2174void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2175{
2176#ifdef CONFIG_INT128
2177    r->u128 =
2178        (~a->u128 < ~b->u128) ||
2179        ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2180#else
2181    int carry_in = c->VsrD(1) & 1;
2182    int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2183    if (!carry_out && carry_in) {
2184        ppc_avr_t tmp;
2185        avr_qw_not(&tmp, *b);
2186        avr_qw_add(&tmp, *a, tmp);
2187        carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull));
2188    }
2189
2190    r->VsrD(0) = 0;
2191    r->VsrD(1) = carry_out;
2192#endif
2193}
2194
2195#define BCD_PLUS_PREF_1 0xC
2196#define BCD_PLUS_PREF_2 0xF
2197#define BCD_PLUS_ALT_1  0xA
2198#define BCD_NEG_PREF    0xD
2199#define BCD_NEG_ALT     0xB
2200#define BCD_PLUS_ALT_2  0xE
2201#define NATIONAL_PLUS   0x2B
2202#define NATIONAL_NEG    0x2D
2203
2204#define BCD_DIG_BYTE(n) (15 - ((n) / 2))
2205
2206static int bcd_get_sgn(ppc_avr_t *bcd)
2207{
2208    switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) {
2209    case BCD_PLUS_PREF_1:
2210    case BCD_PLUS_PREF_2:
2211    case BCD_PLUS_ALT_1:
2212    case BCD_PLUS_ALT_2:
2213    {
2214        return 1;
2215    }
2216
2217    case BCD_NEG_PREF:
2218    case BCD_NEG_ALT:
2219    {
2220        return -1;
2221    }
2222
2223    default:
2224    {
2225        return 0;
2226    }
2227    }
2228}
2229
2230static int bcd_preferred_sgn(int sgn, int ps)
2231{
2232    if (sgn >= 0) {
2233        return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2234    } else {
2235        return BCD_NEG_PREF;
2236    }
2237}
2238
2239static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2240{
2241    uint8_t result;
2242    if (n & 1) {
2243        result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4;
2244    } else {
2245       result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF;
2246    }
2247
2248    if (unlikely(result > 9)) {
2249        *invalid = true;
2250    }
2251    return result;
2252}
2253
2254static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2255{
2256    if (n & 1) {
2257        bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F;
2258        bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4);
2259    } else {
2260        bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0;
2261        bcd->VsrB(BCD_DIG_BYTE(n)) |= digit;
2262    }
2263}
2264
2265static bool bcd_is_valid(ppc_avr_t *bcd)
2266{
2267    int i;
2268    int invalid = 0;
2269
2270    if (bcd_get_sgn(bcd) == 0) {
2271        return false;
2272    }
2273
2274    for (i = 1; i < 32; i++) {
2275        bcd_get_digit(bcd, i, &invalid);
2276        if (unlikely(invalid)) {
2277            return false;
2278        }
2279    }
2280    return true;
2281}
2282
2283static int bcd_cmp_zero(ppc_avr_t *bcd)
2284{
2285    if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) {
2286        return CRF_EQ;
2287    } else {
2288        return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT;
2289    }
2290}
2291
2292static uint16_t get_national_digit(ppc_avr_t *reg, int n)
2293{
2294    return reg->VsrH(7 - n);
2295}
2296
2297static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
2298{
2299    reg->VsrH(7 - n) = val;
2300}
2301
2302static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2303{
2304    int i;
2305    int invalid = 0;
2306    for (i = 31; i > 0; i--) {
2307        uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2308        uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2309        if (unlikely(invalid)) {
2310            return 0; /* doesn't matter */
2311        } else if (dig_a > dig_b) {
2312            return 1;
2313        } else if (dig_a < dig_b) {
2314            return -1;
2315        }
2316    }
2317
2318    return 0;
2319}
2320
2321static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2322                       int *overflow)
2323{
2324    int carry = 0;
2325    int i;
2326    int is_zero = 1;
2327
2328    for (i = 1; i <= 31; i++) {
2329        uint8_t digit = bcd_get_digit(a, i, invalid) +
2330                        bcd_get_digit(b, i, invalid) + carry;
2331        is_zero &= (digit == 0);
2332        if (digit > 9) {
2333            carry = 1;
2334            digit -= 10;
2335        } else {
2336            carry = 0;
2337        }
2338
2339        bcd_put_digit(t, digit, i);
2340    }
2341
2342    *overflow = carry;
2343    return is_zero;
2344}
2345
2346static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2347                       int *overflow)
2348{
2349    int carry = 0;
2350    int i;
2351
2352    for (i = 1; i <= 31; i++) {
2353        uint8_t digit = bcd_get_digit(a, i, invalid) -
2354                        bcd_get_digit(b, i, invalid) + carry;
2355        if (digit & 0x80) {
2356            carry = -1;
2357            digit += 10;
2358        } else {
2359            carry = 0;
2360        }
2361
2362        bcd_put_digit(t, digit, i);
2363    }
2364
2365    *overflow = carry;
2366}
2367
2368uint32_t helper_bcdadd(ppc_avr_t *r,  ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2369{
2370
2371    int sgna = bcd_get_sgn(a);
2372    int sgnb = bcd_get_sgn(b);
2373    int invalid = (sgna == 0) || (sgnb == 0);
2374    int overflow = 0;
2375    int zero = 0;
2376    uint32_t cr = 0;
2377    ppc_avr_t result = { .u64 = { 0, 0 } };
2378
2379    if (!invalid) {
2380        if (sgna == sgnb) {
2381            result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
2382            zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
2383            cr = (sgna > 0) ? CRF_GT : CRF_LT;
2384        } else {
2385            int magnitude = bcd_cmp_mag(a, b);
2386            if (magnitude > 0) {
2387                result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
2388                bcd_sub_mag(&result, a, b, &invalid, &overflow);
2389                cr = (sgna > 0) ? CRF_GT : CRF_LT;
2390            } else if (magnitude < 0) {
2391                result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps);
2392                bcd_sub_mag(&result, b, a, &invalid, &overflow);
2393                cr = (sgnb > 0) ? CRF_GT : CRF_LT;
2394            } else {
2395                result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps);
2396                cr = CRF_EQ;
2397            }
2398        }
2399    }
2400
2401    if (unlikely(invalid)) {
2402        result.VsrD(0) = result.VsrD(1) = -1;
2403        cr = CRF_SO;
2404    } else if (overflow) {
2405        cr |= CRF_SO;
2406    } else if (zero) {
2407        cr |= CRF_EQ;
2408    }
2409
2410    *r = result;
2411
2412    return cr;
2413}
2414
2415uint32_t helper_bcdsub(ppc_avr_t *r,  ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2416{
2417    ppc_avr_t bcopy = *b;
2418    int sgnb = bcd_get_sgn(b);
2419    if (sgnb < 0) {
2420        bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2421    } else if (sgnb > 0) {
2422        bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2423    }
2424    /* else invalid ... defer to bcdadd code for proper handling */
2425
2426    return helper_bcdadd(r, a, &bcopy, ps);
2427}
2428
2429uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2430{
2431    int i;
2432    int cr = 0;
2433    uint16_t national = 0;
2434    uint16_t sgnb = get_national_digit(b, 0);
2435    ppc_avr_t ret = { .u64 = { 0, 0 } };
2436    int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
2437
2438    for (i = 1; i < 8; i++) {
2439        national = get_national_digit(b, i);
2440        if (unlikely(national < 0x30 || national > 0x39)) {
2441            invalid = 1;
2442            break;
2443        }
2444
2445        bcd_put_digit(&ret, national & 0xf, i);
2446    }
2447
2448    if (sgnb == NATIONAL_PLUS) {
2449        bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
2450    } else {
2451        bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2452    }
2453
2454    cr = bcd_cmp_zero(&ret);
2455
2456    if (unlikely(invalid)) {
2457        cr = CRF_SO;
2458    }
2459
2460    *r = ret;
2461
2462    return cr;
2463}
2464
2465uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2466{
2467    int i;
2468    int cr = 0;
2469    int sgnb = bcd_get_sgn(b);
2470    int invalid = (sgnb == 0);
2471    ppc_avr_t ret = { .u64 = { 0, 0 } };
2472
2473    int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0);
2474
2475    for (i = 1; i < 8; i++) {
2476        set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
2477
2478        if (unlikely(invalid)) {
2479            break;
2480        }
2481    }
2482    set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
2483
2484    cr = bcd_cmp_zero(b);
2485
2486    if (ox_flag) {
2487        cr |= CRF_SO;
2488    }
2489
2490    if (unlikely(invalid)) {
2491        cr = CRF_SO;
2492    }
2493
2494    *r = ret;
2495
2496    return cr;
2497}
2498
2499uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2500{
2501    int i;
2502    int cr = 0;
2503    int invalid = 0;
2504    int zone_digit = 0;
2505    int zone_lead = ps ? 0xF : 0x3;
2506    int digit = 0;
2507    ppc_avr_t ret = { .u64 = { 0, 0 } };
2508    int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4;
2509
2510    if (unlikely((sgnb < 0xA) && ps)) {
2511        invalid = 1;
2512    }
2513
2514    for (i = 0; i < 16; i++) {
2515        zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead;
2516        digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF;
2517        if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
2518            invalid = 1;
2519            break;
2520        }
2521
2522        bcd_put_digit(&ret, digit, i + 1);
2523    }
2524
2525    if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
2526            (!ps && (sgnb & 0x4))) {
2527        bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2528    } else {
2529        bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
2530    }
2531
2532    cr = bcd_cmp_zero(&ret);
2533
2534    if (unlikely(invalid)) {
2535        cr = CRF_SO;
2536    }
2537
2538    *r = ret;
2539
2540    return cr;
2541}
2542
2543uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2544{
2545    int i;
2546    int cr = 0;
2547    uint8_t digit = 0;
2548    int sgnb = bcd_get_sgn(b);
2549    int zone_lead = (ps) ? 0xF0 : 0x30;
2550    int invalid = (sgnb == 0);
2551    ppc_avr_t ret = { .u64 = { 0, 0 } };
2552
2553    int ox_flag = ((b->VsrD(0) >> 4) != 0);
2554
2555    for (i = 0; i < 16; i++) {
2556        digit = bcd_get_digit(b, i + 1, &invalid);
2557
2558        if (unlikely(invalid)) {
2559            break;
2560        }
2561
2562        ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit;
2563    }
2564
2565    if (ps) {
2566        bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
2567    } else {
2568        bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
2569    }
2570
2571    cr = bcd_cmp_zero(b);
2572
2573    if (ox_flag) {
2574        cr |= CRF_SO;
2575    }
2576
2577    if (unlikely(invalid)) {
2578        cr = CRF_SO;
2579    }
2580
2581    *r = ret;
2582
2583    return cr;
2584}
2585
2586/**
2587 * Compare 2 128-bit unsigned integers, passed in as unsigned 64-bit pairs
2588 *
2589 * Returns:
2590 * > 0 if ahi|alo > bhi|blo,
2591 * 0 if ahi|alo == bhi|blo,
2592 * < 0 if ahi|alo < bhi|blo
2593 */
2594static inline int ucmp128(uint64_t alo, uint64_t ahi,
2595                          uint64_t blo, uint64_t bhi)
2596{
2597    return (ahi == bhi) ?
2598        (alo > blo ? 1 : (alo == blo ? 0 : -1)) :
2599        (ahi > bhi ? 1 : -1);
2600}
2601
2602uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2603{
2604    int i;
2605    int cr;
2606    uint64_t lo_value;
2607    uint64_t hi_value;
2608    uint64_t rem;
2609    ppc_avr_t ret = { .u64 = { 0, 0 } };
2610
2611    if (b->VsrSD(0) < 0) {
2612        lo_value = -b->VsrSD(1);
2613        hi_value = ~b->VsrD(0) + !lo_value;
2614        bcd_put_digit(&ret, 0xD, 0);
2615
2616        cr = CRF_LT;
2617    } else {
2618        lo_value = b->VsrD(1);
2619        hi_value = b->VsrD(0);
2620        bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0);
2621
2622        if (hi_value == 0 && lo_value == 0) {
2623            cr = CRF_EQ;
2624        } else {
2625            cr = CRF_GT;
2626        }
2627    }
2628
2629    /*
2630     * Check src limits: abs(src) <= 10^31 - 1
2631     *
2632     * 10^31 - 1 = 0x0000007e37be2022 c0914b267fffffff
2633     */
2634    if (ucmp128(lo_value, hi_value,
2635                0xc0914b267fffffffULL, 0x7e37be2022ULL) > 0) {
2636        cr |= CRF_SO;
2637
2638        /*
2639         * According to the ISA, if src wouldn't fit in the destination
2640         * register, the result is undefined.
2641         * In that case, we leave r unchanged.
2642         */
2643    } else {
2644        rem = divu128(&lo_value, &hi_value, 1000000000000000ULL);
2645
2646        for (i = 1; i < 16; rem /= 10, i++) {
2647            bcd_put_digit(&ret, rem % 10, i);
2648        }
2649
2650        for (; i < 32; lo_value /= 10, i++) {
2651            bcd_put_digit(&ret, lo_value % 10, i);
2652        }
2653
2654        *r = ret;
2655    }
2656
2657    return cr;
2658}
2659
2660uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2661{
2662    uint8_t i;
2663    int cr;
2664    uint64_t carry;
2665    uint64_t unused;
2666    uint64_t lo_value;
2667    uint64_t hi_value = 0;
2668    int sgnb = bcd_get_sgn(b);
2669    int invalid = (sgnb == 0);
2670
2671    lo_value = bcd_get_digit(b, 31, &invalid);
2672    for (i = 30; i > 0; i--) {
2673        mulu64(&lo_value, &carry, lo_value, 10ULL);
2674        mulu64(&hi_value, &unused, hi_value, 10ULL);
2675        lo_value += bcd_get_digit(b, i, &invalid);
2676        hi_value += carry;
2677
2678        if (unlikely(invalid)) {
2679            break;
2680        }
2681    }
2682
2683    if (sgnb == -1) {
2684        r->VsrSD(1) = -lo_value;
2685        r->VsrSD(0) = ~hi_value + !r->VsrSD(1);
2686    } else {
2687        r->VsrSD(1) = lo_value;
2688        r->VsrSD(0) = hi_value;
2689    }
2690
2691    cr = bcd_cmp_zero(b);
2692
2693    if (unlikely(invalid)) {
2694        cr = CRF_SO;
2695    }
2696
2697    return cr;
2698}
2699
2700uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2701{
2702    int i;
2703    int invalid = 0;
2704
2705    if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) {
2706        return CRF_SO;
2707    }
2708
2709    *r = *a;
2710    bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0);
2711
2712    for (i = 1; i < 32; i++) {
2713        bcd_get_digit(a, i, &invalid);
2714        bcd_get_digit(b, i, &invalid);
2715        if (unlikely(invalid)) {
2716            return CRF_SO;
2717        }
2718    }
2719
2720    return bcd_cmp_zero(r);
2721}
2722
2723uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2724{
2725    int sgnb = bcd_get_sgn(b);
2726
2727    *r = *b;
2728    bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0);
2729
2730    if (bcd_is_valid(b) == false) {
2731        return CRF_SO;
2732    }
2733
2734    return bcd_cmp_zero(r);
2735}
2736
2737uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2738{
2739    int cr;
2740    int i = a->VsrSB(7);
2741    bool ox_flag = false;
2742    int sgnb = bcd_get_sgn(b);
2743    ppc_avr_t ret = *b;
2744    ret.VsrD(1) &= ~0xf;
2745
2746    if (bcd_is_valid(b) == false) {
2747        return CRF_SO;
2748    }
2749
2750    if (unlikely(i > 31)) {
2751        i = 31;
2752    } else if (unlikely(i < -31)) {
2753        i = -31;
2754    }
2755
2756    if (i > 0) {
2757        ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2758    } else {
2759        urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2760    }
2761    bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2762
2763    *r = ret;
2764
2765    cr = bcd_cmp_zero(r);
2766    if (ox_flag) {
2767        cr |= CRF_SO;
2768    }
2769
2770    return cr;
2771}
2772
2773uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2774{
2775    int cr;
2776    int i;
2777    int invalid = 0;
2778    bool ox_flag = false;
2779    ppc_avr_t ret = *b;
2780
2781    for (i = 0; i < 32; i++) {
2782        bcd_get_digit(b, i, &invalid);
2783
2784        if (unlikely(invalid)) {
2785            return CRF_SO;
2786        }
2787    }
2788
2789    i = a->VsrSB(7);
2790    if (i >= 32) {
2791        ox_flag = true;
2792        ret.VsrD(1) = ret.VsrD(0) = 0;
2793    } else if (i <= -32) {
2794        ret.VsrD(1) = ret.VsrD(0) = 0;
2795    } else if (i > 0) {
2796        ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2797    } else {
2798        urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2799    }
2800    *r = ret;
2801
2802    cr = bcd_cmp_zero(r);
2803    if (ox_flag) {
2804        cr |= CRF_SO;
2805    }
2806
2807    return cr;
2808}
2809
2810uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2811{
2812    int cr;
2813    int unused = 0;
2814    int invalid = 0;
2815    bool ox_flag = false;
2816    int sgnb = bcd_get_sgn(b);
2817    ppc_avr_t ret = *b;
2818    ret.VsrD(1) &= ~0xf;
2819
2820    int i = a->VsrSB(7);
2821    ppc_avr_t bcd_one;
2822
2823    bcd_one.VsrD(0) = 0;
2824    bcd_one.VsrD(1) = 0x10;
2825
2826    if (bcd_is_valid(b) == false) {
2827        return CRF_SO;
2828    }
2829
2830    if (unlikely(i > 31)) {
2831        i = 31;
2832    } else if (unlikely(i < -31)) {
2833        i = -31;
2834    }
2835
2836    if (i > 0) {
2837        ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2838    } else {
2839        urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2840
2841        if (bcd_get_digit(&ret, 0, &invalid) >= 5) {
2842            bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused);
2843        }
2844    }
2845    bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2846
2847    cr = bcd_cmp_zero(&ret);
2848    if (ox_flag) {
2849        cr |= CRF_SO;
2850    }
2851    *r = ret;
2852
2853    return cr;
2854}
2855
2856uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2857{
2858    uint64_t mask;
2859    uint32_t ox_flag = 0;
2860    int i = a->VsrSH(3) + 1;
2861    ppc_avr_t ret = *b;
2862
2863    if (bcd_is_valid(b) == false) {
2864        return CRF_SO;
2865    }
2866
2867    if (i > 16 && i < 32) {
2868        mask = (uint64_t)-1 >> (128 - i * 4);
2869        if (ret.VsrD(0) & ~mask) {
2870            ox_flag = CRF_SO;
2871        }
2872
2873        ret.VsrD(0) &= mask;
2874    } else if (i >= 0 && i <= 16) {
2875        mask = (uint64_t)-1 >> (64 - i * 4);
2876        if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
2877            ox_flag = CRF_SO;
2878        }
2879
2880        ret.VsrD(1) &= mask;
2881        ret.VsrD(0) = 0;
2882    }
2883    bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0);
2884    *r = ret;
2885
2886    return bcd_cmp_zero(&ret) | ox_flag;
2887}
2888
2889uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2890{
2891    int i;
2892    uint64_t mask;
2893    uint32_t ox_flag = 0;
2894    int invalid = 0;
2895    ppc_avr_t ret = *b;
2896
2897    for (i = 0; i < 32; i++) {
2898        bcd_get_digit(b, i, &invalid);
2899
2900        if (unlikely(invalid)) {
2901            return CRF_SO;
2902        }
2903    }
2904
2905    i = a->VsrSH(3);
2906    if (i > 16 && i < 33) {
2907        mask = (uint64_t)-1 >> (128 - i * 4);
2908        if (ret.VsrD(0) & ~mask) {
2909            ox_flag = CRF_SO;
2910        }
2911
2912        ret.VsrD(0) &= mask;
2913    } else if (i > 0 && i <= 16) {
2914        mask = (uint64_t)-1 >> (64 - i * 4);
2915        if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
2916            ox_flag = CRF_SO;
2917        }
2918
2919        ret.VsrD(1) &= mask;
2920        ret.VsrD(0) = 0;
2921    } else if (i == 0) {
2922        if (ret.VsrD(0) || ret.VsrD(1)) {
2923            ox_flag = CRF_SO;
2924        }
2925        ret.VsrD(0) = ret.VsrD(1) = 0;
2926    }
2927
2928    *r = ret;
2929    if (r->VsrD(0) == 0 && r->VsrD(1) == 0) {
2930        return ox_flag | CRF_EQ;
2931    }
2932
2933    return ox_flag | CRF_GT;
2934}
2935
2936void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
2937{
2938    int i;
2939    VECTOR_FOR_INORDER_I(i, u8) {
2940        r->u8[i] = AES_sbox[a->u8[i]];
2941    }
2942}
2943
2944void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2945{
2946    ppc_avr_t result;
2947    int i;
2948
2949    VECTOR_FOR_INORDER_I(i, u32) {
2950        result.VsrW(i) = b->VsrW(i) ^
2951            (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^
2952             AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^
2953             AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^
2954             AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]);
2955    }
2956    *r = result;
2957}
2958
2959void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2960{
2961    ppc_avr_t result;
2962    int i;
2963
2964    VECTOR_FOR_INORDER_I(i, u8) {
2965        result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]);
2966    }
2967    *r = result;
2968}
2969
2970void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2971{
2972    /* This differs from what is written in ISA V2.07.  The RTL is */
2973    /* incorrect and will be fixed in V2.07B.                      */
2974    int i;
2975    ppc_avr_t tmp;
2976
2977    VECTOR_FOR_INORDER_I(i, u8) {
2978        tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])];
2979    }
2980
2981    VECTOR_FOR_INORDER_I(i, u32) {
2982        r->VsrW(i) =
2983            AES_imc[tmp.VsrB(4 * i + 0)][0] ^
2984            AES_imc[tmp.VsrB(4 * i + 1)][1] ^
2985            AES_imc[tmp.VsrB(4 * i + 2)][2] ^
2986            AES_imc[tmp.VsrB(4 * i + 3)][3];
2987    }
2988}
2989
2990void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2991{
2992    ppc_avr_t result;
2993    int i;
2994
2995    VECTOR_FOR_INORDER_I(i, u8) {
2996        result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]);
2997    }
2998    *r = result;
2999}
3000
3001void helper_vshasigmaw(ppc_avr_t *r,  ppc_avr_t *a, uint32_t st_six)
3002{
3003    int st = (st_six & 0x10) != 0;
3004    int six = st_six & 0xF;
3005    int i;
3006
3007    for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
3008        if (st == 0) {
3009            if ((six & (0x8 >> i)) == 0) {
3010                r->VsrW(i) = ror32(a->VsrW(i), 7) ^
3011                             ror32(a->VsrW(i), 18) ^
3012                             (a->VsrW(i) >> 3);
3013            } else { /* six.bit[i] == 1 */
3014                r->VsrW(i) = ror32(a->VsrW(i), 17) ^
3015                             ror32(a->VsrW(i), 19) ^
3016                             (a->VsrW(i) >> 10);
3017            }
3018        } else { /* st == 1 */
3019            if ((six & (0x8 >> i)) == 0) {
3020                r->VsrW(i) = ror32(a->VsrW(i), 2) ^
3021                             ror32(a->VsrW(i), 13) ^
3022                             ror32(a->VsrW(i), 22);
3023            } else { /* six.bit[i] == 1 */
3024                r->VsrW(i) = ror32(a->VsrW(i), 6) ^
3025                             ror32(a->VsrW(i), 11) ^
3026                             ror32(a->VsrW(i), 25);
3027            }
3028        }
3029    }
3030}
3031
3032void helper_vshasigmad(ppc_avr_t *r,  ppc_avr_t *a, uint32_t st_six)
3033{
3034    int st = (st_six & 0x10) != 0;
3035    int six = st_six & 0xF;
3036    int i;
3037
3038    for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
3039        if (st == 0) {
3040            if ((six & (0x8 >> (2 * i))) == 0) {
3041                r->VsrD(i) = ror64(a->VsrD(i), 1) ^
3042                             ror64(a->VsrD(i), 8) ^
3043                             (a->VsrD(i) >> 7);
3044            } else { /* six.bit[2*i] == 1 */
3045                r->VsrD(i) = ror64(a->VsrD(i), 19) ^
3046                             ror64(a->VsrD(i), 61) ^
3047                             (a->VsrD(i) >> 6);
3048            }
3049        } else { /* st == 1 */
3050            if ((six & (0x8 >> (2 * i))) == 0) {
3051                r->VsrD(i) = ror64(a->VsrD(i), 28) ^
3052                             ror64(a->VsrD(i), 34) ^
3053                             ror64(a->VsrD(i), 39);
3054            } else { /* six.bit[2*i] == 1 */
3055                r->VsrD(i) = ror64(a->VsrD(i), 14) ^
3056                             ror64(a->VsrD(i), 18) ^
3057                             ror64(a->VsrD(i), 41);
3058            }
3059        }
3060    }
3061}
3062
3063void helper_vpermxor(ppc_avr_t *r,  ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
3064{
3065    ppc_avr_t result;
3066    int i;
3067
3068    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
3069        int indexA = c->VsrB(i) >> 4;
3070        int indexB = c->VsrB(i) & 0xF;
3071
3072        result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB);
3073    }
3074    *r = result;
3075}
3076
3077#undef VECTOR_FOR_INORDER_I
3078
3079/*****************************************************************************/
3080/* SPE extension helpers */
3081/* Use a table to make this quicker */
3082static const uint8_t hbrev[16] = {
3083    0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
3084    0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
3085};
3086
3087static inline uint8_t byte_reverse(uint8_t val)
3088{
3089    return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
3090}
3091
3092static inline uint32_t word_reverse(uint32_t val)
3093{
3094    return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
3095        (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
3096}
3097
3098#define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
3099target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
3100{
3101    uint32_t a, b, d, mask;
3102
3103    mask = UINT32_MAX >> (32 - MASKBITS);
3104    a = arg1 & mask;
3105    b = arg2 & mask;
3106    d = word_reverse(1 + word_reverse(a | ~b));
3107    return (arg1 & ~mask) | (d & b);
3108}
3109
3110uint32_t helper_cntlsw32(uint32_t val)
3111{
3112    if (val & 0x80000000) {
3113        return clz32(~val);
3114    } else {
3115        return clz32(val);
3116    }
3117}
3118
3119uint32_t helper_cntlzw32(uint32_t val)
3120{
3121    return clz32(val);
3122}
3123
3124/* 440 specific */
3125target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
3126                          target_ulong low, uint32_t update_Rc)
3127{
3128    target_ulong mask;
3129    int i;
3130
3131    i = 1;
3132    for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3133        if ((high & mask) == 0) {
3134            if (update_Rc) {
3135                env->crf[0] = 0x4;
3136            }
3137            goto done;
3138        }
3139        i++;
3140    }
3141    for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3142        if ((low & mask) == 0) {
3143            if (update_Rc) {
3144                env->crf[0] = 0x8;
3145            }
3146            goto done;
3147        }
3148        i++;
3149    }
3150    i = 8;
3151    if (update_Rc) {
3152        env->crf[0] = 0x2;
3153    }
3154 done:
3155    env->xer = (env->xer & ~0x7F) | i;
3156    if (update_Rc) {
3157        env->crf[0] |= xer_so;
3158    }
3159    return i;
3160}
3161