qemu/target/ppc/int_helper.c
<<
>>
Prefs
   1/*
   2 *  PowerPC integer and vector emulation helpers for QEMU.
   3 *
   4 *  Copyright (c) 2003-2007 Jocelyn Mayer
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include "qemu/osdep.h"
  20#include "cpu.h"
  21#include "internal.h"
  22#include "qemu/host-utils.h"
  23#include "exec/helper-proto.h"
  24#include "crypto/aes.h"
  25#include "fpu/softfloat.h"
  26#include "qapi/error.h"
  27#include "qemu/guest-random.h"
  28
  29#include "helper_regs.h"
  30/*****************************************************************************/
  31/* Fixed point operations helpers */
  32
  33static inline void helper_update_ov_legacy(CPUPPCState *env, int ov)
  34{
  35    if (unlikely(ov)) {
  36        env->so = env->ov = 1;
  37    } else {
  38        env->ov = 0;
  39    }
  40}
  41
  42target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
  43                           uint32_t oe)
  44{
  45    uint64_t rt = 0;
  46    int overflow = 0;
  47
  48    uint64_t dividend = (uint64_t)ra << 32;
  49    uint64_t divisor = (uint32_t)rb;
  50
  51    if (unlikely(divisor == 0)) {
  52        overflow = 1;
  53    } else {
  54        rt = dividend / divisor;
  55        overflow = rt > UINT32_MAX;
  56    }
  57
  58    if (unlikely(overflow)) {
  59        rt = 0; /* Undefined */
  60    }
  61
  62    if (oe) {
  63        helper_update_ov_legacy(env, overflow);
  64    }
  65
  66    return (target_ulong)rt;
  67}
  68
  69target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
  70                          uint32_t oe)
  71{
  72    int64_t rt = 0;
  73    int overflow = 0;
  74
  75    int64_t dividend = (int64_t)ra << 32;
  76    int64_t divisor = (int64_t)((int32_t)rb);
  77
  78    if (unlikely((divisor == 0) ||
  79                 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
  80        overflow = 1;
  81    } else {
  82        rt = dividend / divisor;
  83        overflow = rt != (int32_t)rt;
  84    }
  85
  86    if (unlikely(overflow)) {
  87        rt = 0; /* Undefined */
  88    }
  89
  90    if (oe) {
  91        helper_update_ov_legacy(env, overflow);
  92    }
  93
  94    return (target_ulong)rt;
  95}
  96
  97#if defined(TARGET_PPC64)
  98
  99uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
 100{
 101    uint64_t rt = 0;
 102    int overflow = 0;
 103
 104    overflow = divu128(&rt, &ra, rb);
 105
 106    if (unlikely(overflow)) {
 107        rt = 0; /* Undefined */
 108    }
 109
 110    if (oe) {
 111        helper_update_ov_legacy(env, overflow);
 112    }
 113
 114    return rt;
 115}
 116
 117uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
 118{
 119    int64_t rt = 0;
 120    int64_t ra = (int64_t)rau;
 121    int64_t rb = (int64_t)rbu;
 122    int overflow = divs128(&rt, &ra, rb);
 123
 124    if (unlikely(overflow)) {
 125        rt = 0; /* Undefined */
 126    }
 127
 128    if (oe) {
 129        helper_update_ov_legacy(env, overflow);
 130    }
 131
 132    return rt;
 133}
 134
 135#endif
 136
 137
 138#if defined(TARGET_PPC64)
 139/* if x = 0xab, returns 0xababababababababa */
 140#define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
 141
 142/*
 143 * subtract 1 from each byte, and with inverse, check if MSB is set at each
 144 * byte.
 145 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
 146 *      (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
 147 */
 148#define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
 149
 150/* When you XOR the pattern and there is a match, that byte will be zero */
 151#define hasvalue(x, n)  (haszero((x) ^ pattern(n)))
 152
 153uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
 154{
 155    return hasvalue(rb, ra) ? CRF_GT : 0;
 156}
 157
 158#undef pattern
 159#undef haszero
 160#undef hasvalue
 161
 162/*
 163 * Return a random number.
 164 */
 165uint64_t helper_darn32(void)
 166{
 167    Error *err = NULL;
 168    uint32_t ret;
 169
 170    if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
 171        qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
 172                      error_get_pretty(err));
 173        error_free(err);
 174        return -1;
 175    }
 176
 177    return ret;
 178}
 179
 180uint64_t helper_darn64(void)
 181{
 182    Error *err = NULL;
 183    uint64_t ret;
 184
 185    if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
 186        qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
 187                      error_get_pretty(err));
 188        error_free(err);
 189        return -1;
 190    }
 191
 192    return ret;
 193}
 194
 195uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
 196{
 197    int i;
 198    uint64_t ra = 0;
 199
 200    for (i = 0; i < 8; i++) {
 201        int index = (rs >> (i * 8)) & 0xFF;
 202        if (index < 64) {
 203            if (rb & PPC_BIT(index)) {
 204                ra |= 1 << i;
 205            }
 206        }
 207    }
 208    return ra;
 209}
 210
 211#endif
 212
 213target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
 214{
 215    target_ulong mask = 0xff;
 216    target_ulong ra = 0;
 217    int i;
 218
 219    for (i = 0; i < sizeof(target_ulong); i++) {
 220        if ((rs & mask) == (rb & mask)) {
 221            ra |= mask;
 222        }
 223        mask <<= 8;
 224    }
 225    return ra;
 226}
 227
 228/* shift right arithmetic helper */
 229target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
 230                         target_ulong shift)
 231{
 232    int32_t ret;
 233
 234    if (likely(!(shift & 0x20))) {
 235        if (likely((uint32_t)shift != 0)) {
 236            shift &= 0x1f;
 237            ret = (int32_t)value >> shift;
 238            if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
 239                env->ca32 = env->ca = 0;
 240            } else {
 241                env->ca32 = env->ca = 1;
 242            }
 243        } else {
 244            ret = (int32_t)value;
 245            env->ca32 = env->ca = 0;
 246        }
 247    } else {
 248        ret = (int32_t)value >> 31;
 249        env->ca32 = env->ca = (ret != 0);
 250    }
 251    return (target_long)ret;
 252}
 253
 254#if defined(TARGET_PPC64)
 255target_ulong helper_srad(CPUPPCState *env, target_ulong value,
 256                         target_ulong shift)
 257{
 258    int64_t ret;
 259
 260    if (likely(!(shift & 0x40))) {
 261        if (likely((uint64_t)shift != 0)) {
 262            shift &= 0x3f;
 263            ret = (int64_t)value >> shift;
 264            if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
 265                env->ca32 = env->ca = 0;
 266            } else {
 267                env->ca32 = env->ca = 1;
 268            }
 269        } else {
 270            ret = (int64_t)value;
 271            env->ca32 = env->ca = 0;
 272        }
 273    } else {
 274        ret = (int64_t)value >> 63;
 275        env->ca32 = env->ca = (ret != 0);
 276    }
 277    return ret;
 278}
 279#endif
 280
 281#if defined(TARGET_PPC64)
 282target_ulong helper_popcntb(target_ulong val)
 283{
 284    /* Note that we don't fold past bytes */
 285    val = (val & 0x5555555555555555ULL) + ((val >>  1) &
 286                                           0x5555555555555555ULL);
 287    val = (val & 0x3333333333333333ULL) + ((val >>  2) &
 288                                           0x3333333333333333ULL);
 289    val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >>  4) &
 290                                           0x0f0f0f0f0f0f0f0fULL);
 291    return val;
 292}
 293
 294target_ulong helper_popcntw(target_ulong val)
 295{
 296    /* Note that we don't fold past words.  */
 297    val = (val & 0x5555555555555555ULL) + ((val >>  1) &
 298                                           0x5555555555555555ULL);
 299    val = (val & 0x3333333333333333ULL) + ((val >>  2) &
 300                                           0x3333333333333333ULL);
 301    val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >>  4) &
 302                                           0x0f0f0f0f0f0f0f0fULL);
 303    val = (val & 0x00ff00ff00ff00ffULL) + ((val >>  8) &
 304                                           0x00ff00ff00ff00ffULL);
 305    val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
 306                                           0x0000ffff0000ffffULL);
 307    return val;
 308}
 309#else
 310target_ulong helper_popcntb(target_ulong val)
 311{
 312    /* Note that we don't fold past bytes */
 313    val = (val & 0x55555555) + ((val >>  1) & 0x55555555);
 314    val = (val & 0x33333333) + ((val >>  2) & 0x33333333);
 315    val = (val & 0x0f0f0f0f) + ((val >>  4) & 0x0f0f0f0f);
 316    return val;
 317}
 318#endif
 319
 320/*****************************************************************************/
 321/* PowerPC 601 specific instructions (POWER bridge) */
 322target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
 323{
 324    uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
 325
 326    if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
 327        (int32_t)arg2 == 0) {
 328        env->spr[SPR_MQ] = 0;
 329        return INT32_MIN;
 330    } else {
 331        env->spr[SPR_MQ] = tmp % arg2;
 332        return  tmp / (int32_t)arg2;
 333    }
 334}
 335
 336target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
 337                         target_ulong arg2)
 338{
 339    uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
 340
 341    if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
 342        (int32_t)arg2 == 0) {
 343        env->so = env->ov = 1;
 344        env->spr[SPR_MQ] = 0;
 345        return INT32_MIN;
 346    } else {
 347        env->spr[SPR_MQ] = tmp % arg2;
 348        tmp /= (int32_t)arg2;
 349        if ((int32_t)tmp != tmp) {
 350            env->so = env->ov = 1;
 351        } else {
 352            env->ov = 0;
 353        }
 354        return tmp;
 355    }
 356}
 357
 358target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
 359                         target_ulong arg2)
 360{
 361    if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
 362        (int32_t)arg2 == 0) {
 363        env->spr[SPR_MQ] = 0;
 364        return INT32_MIN;
 365    } else {
 366        env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
 367        return (int32_t)arg1 / (int32_t)arg2;
 368    }
 369}
 370
 371target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
 372                          target_ulong arg2)
 373{
 374    if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
 375        (int32_t)arg2 == 0) {
 376        env->so = env->ov = 1;
 377        env->spr[SPR_MQ] = 0;
 378        return INT32_MIN;
 379    } else {
 380        env->ov = 0;
 381        env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
 382        return (int32_t)arg1 / (int32_t)arg2;
 383    }
 384}
 385
 386/*****************************************************************************/
 387/* 602 specific instructions */
 388/* mfrom is the most crazy instruction ever seen, imho ! */
 389/* Real implementation uses a ROM table. Do the same */
 390/*
 391 * Extremely decomposed:
 392 *                      -arg / 256
 393 * return 256 * log10(10           + 1.0) + 0.5
 394 */
 395#if !defined(CONFIG_USER_ONLY)
 396target_ulong helper_602_mfrom(target_ulong arg)
 397{
 398    if (likely(arg < 602)) {
 399#include "mfrom_table.inc.c"
 400        return mfrom_ROM_table[arg];
 401    } else {
 402        return 0;
 403    }
 404}
 405#endif
 406
 407/*****************************************************************************/
 408/* Altivec extension helpers */
 409#if defined(HOST_WORDS_BIGENDIAN)
 410#define VECTOR_FOR_INORDER_I(index, element)                    \
 411    for (index = 0; index < ARRAY_SIZE(r->element); index++)
 412#else
 413#define VECTOR_FOR_INORDER_I(index, element)                    \
 414    for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--)
 415#endif
 416
 417/* Saturating arithmetic helpers.  */
 418#define SATCVT(from, to, from_type, to_type, min, max)          \
 419    static inline to_type cvt##from##to(from_type x, int *sat)  \
 420    {                                                           \
 421        to_type r;                                              \
 422                                                                \
 423        if (x < (from_type)min) {                               \
 424            r = min;                                            \
 425            *sat = 1;                                           \
 426        } else if (x > (from_type)max) {                        \
 427            r = max;                                            \
 428            *sat = 1;                                           \
 429        } else {                                                \
 430            r = x;                                              \
 431        }                                                       \
 432        return r;                                               \
 433    }
 434#define SATCVTU(from, to, from_type, to_type, min, max)         \
 435    static inline to_type cvt##from##to(from_type x, int *sat)  \
 436    {                                                           \
 437        to_type r;                                              \
 438                                                                \
 439        if (x > (from_type)max) {                               \
 440            r = max;                                            \
 441            *sat = 1;                                           \
 442        } else {                                                \
 443            r = x;                                              \
 444        }                                                       \
 445        return r;                                               \
 446    }
 447SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
 448SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
 449SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
 450
 451SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
 452SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
 453SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
 454SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
 455SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
 456SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
 457#undef SATCVT
 458#undef SATCVTU
 459
 460void helper_lvsl(ppc_avr_t *r, target_ulong sh)
 461{
 462    int i, j = (sh & 0xf);
 463
 464    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
 465        r->VsrB(i) = j++;
 466    }
 467}
 468
 469void helper_lvsr(ppc_avr_t *r, target_ulong sh)
 470{
 471    int i, j = 0x10 - (sh & 0xf);
 472
 473    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
 474        r->VsrB(i) = j++;
 475    }
 476}
 477
 478void helper_mtvscr(CPUPPCState *env, uint32_t vscr)
 479{
 480    env->vscr = vscr & ~(1u << VSCR_SAT);
 481    /* Which bit we set is completely arbitrary, but clear the rest.  */
 482    env->vscr_sat.u64[0] = vscr & (1u << VSCR_SAT);
 483    env->vscr_sat.u64[1] = 0;
 484    set_flush_to_zero((vscr >> VSCR_NJ) & 1, &env->vec_status);
 485}
 486
 487uint32_t helper_mfvscr(CPUPPCState *env)
 488{
 489    uint32_t sat = (env->vscr_sat.u64[0] | env->vscr_sat.u64[1]) != 0;
 490    return env->vscr | (sat << VSCR_SAT);
 491}
 492
 493static inline void set_vscr_sat(CPUPPCState *env)
 494{
 495    /* The choice of non-zero value is arbitrary.  */
 496    env->vscr_sat.u32[0] = 1;
 497}
 498
 499void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
 500{
 501    int i;
 502
 503    for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
 504        r->u32[i] = ~a->u32[i] < b->u32[i];
 505    }
 506}
 507
 508/* vprtybw */
 509void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b)
 510{
 511    int i;
 512    for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
 513        uint64_t res = b->u32[i] ^ (b->u32[i] >> 16);
 514        res ^= res >> 8;
 515        r->u32[i] = res & 1;
 516    }
 517}
 518
 519/* vprtybd */
 520void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b)
 521{
 522    int i;
 523    for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
 524        uint64_t res = b->u64[i] ^ (b->u64[i] >> 32);
 525        res ^= res >> 16;
 526        res ^= res >> 8;
 527        r->u64[i] = res & 1;
 528    }
 529}
 530
 531/* vprtybq */
 532void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b)
 533{
 534    uint64_t res = b->u64[0] ^ b->u64[1];
 535    res ^= res >> 32;
 536    res ^= res >> 16;
 537    res ^= res >> 8;
 538    r->VsrD(1) = res & 1;
 539    r->VsrD(0) = 0;
 540}
 541
 542#define VARITH_DO(name, op, element)                                    \
 543    void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
 544    {                                                                   \
 545        int i;                                                          \
 546                                                                        \
 547        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
 548            r->element[i] = a->element[i] op b->element[i];             \
 549        }                                                               \
 550    }
 551VARITH_DO(muluwm, *, u32)
 552#undef VARITH_DO
 553#undef VARITH
 554
 555#define VARITHFP(suffix, func)                                          \
 556    void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
 557                          ppc_avr_t *b)                                 \
 558    {                                                                   \
 559        int i;                                                          \
 560                                                                        \
 561        for (i = 0; i < ARRAY_SIZE(r->f32); i++) {                      \
 562            r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status);   \
 563        }                                                               \
 564    }
 565VARITHFP(addfp, float32_add)
 566VARITHFP(subfp, float32_sub)
 567VARITHFP(minfp, float32_min)
 568VARITHFP(maxfp, float32_max)
 569#undef VARITHFP
 570
 571#define VARITHFPFMA(suffix, type)                                       \
 572    void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
 573                           ppc_avr_t *b, ppc_avr_t *c)                  \
 574    {                                                                   \
 575        int i;                                                          \
 576        for (i = 0; i < ARRAY_SIZE(r->f32); i++) {                      \
 577            r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \
 578                                       type, &env->vec_status);         \
 579        }                                                               \
 580    }
 581VARITHFPFMA(maddfp, 0);
 582VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
 583#undef VARITHFPFMA
 584
 585#define VARITHSAT_CASE(type, op, cvt, element)                          \
 586    {                                                                   \
 587        type result = (type)a->element[i] op (type)b->element[i];       \
 588        r->element[i] = cvt(result, &sat);                              \
 589    }
 590
 591#define VARITHSAT_DO(name, op, optype, cvt, element)                    \
 592    void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat,              \
 593                        ppc_avr_t *a, ppc_avr_t *b, uint32_t desc)      \
 594    {                                                                   \
 595        int sat = 0;                                                    \
 596        int i;                                                          \
 597                                                                        \
 598        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
 599            VARITHSAT_CASE(optype, op, cvt, element);                   \
 600        }                                                               \
 601        if (sat) {                                                      \
 602            vscr_sat->u32[0] = 1;                                       \
 603        }                                                               \
 604    }
 605#define VARITHSAT_SIGNED(suffix, element, optype, cvt)          \
 606    VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element)      \
 607    VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
 608#define VARITHSAT_UNSIGNED(suffix, element, optype, cvt)        \
 609    VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element)      \
 610    VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
 611VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
 612VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
 613VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
 614VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
 615VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
 616VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
 617#undef VARITHSAT_CASE
 618#undef VARITHSAT_DO
 619#undef VARITHSAT_SIGNED
 620#undef VARITHSAT_UNSIGNED
 621
 622#define VAVG_DO(name, element, etype)                                   \
 623    void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
 624    {                                                                   \
 625        int i;                                                          \
 626                                                                        \
 627        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
 628            etype x = (etype)a->element[i] + (etype)b->element[i] + 1;  \
 629            r->element[i] = x >> 1;                                     \
 630        }                                                               \
 631    }
 632
 633#define VAVG(type, signed_element, signed_type, unsigned_element,       \
 634             unsigned_type)                                             \
 635    VAVG_DO(avgs##type, signed_element, signed_type)                    \
 636    VAVG_DO(avgu##type, unsigned_element, unsigned_type)
 637VAVG(b, s8, int16_t, u8, uint16_t)
 638VAVG(h, s16, int32_t, u16, uint32_t)
 639VAVG(w, s32, int64_t, u32, uint64_t)
 640#undef VAVG_DO
 641#undef VAVG
 642
 643#define VABSDU_DO(name, element)                                        \
 644void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)           \
 645{                                                                       \
 646    int i;                                                              \
 647                                                                        \
 648    for (i = 0; i < ARRAY_SIZE(r->element); i++) {                      \
 649        r->element[i] = (a->element[i] > b->element[i]) ?               \
 650            (a->element[i] - b->element[i]) :                           \
 651            (b->element[i] - a->element[i]);                            \
 652    }                                                                   \
 653}
 654
 655/*
 656 * VABSDU - Vector absolute difference unsigned
 657 *   name    - instruction mnemonic suffix (b: byte, h: halfword, w: word)
 658 *   element - element type to access from vector
 659 */
 660#define VABSDU(type, element)                   \
 661    VABSDU_DO(absdu##type, element)
 662VABSDU(b, u8)
 663VABSDU(h, u16)
 664VABSDU(w, u32)
 665#undef VABSDU_DO
 666#undef VABSDU
 667
 668#define VCF(suffix, cvt, element)                                       \
 669    void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r,             \
 670                            ppc_avr_t *b, uint32_t uim)                 \
 671    {                                                                   \
 672        int i;                                                          \
 673                                                                        \
 674        for (i = 0; i < ARRAY_SIZE(r->f32); i++) {                      \
 675            float32 t = cvt(b->element[i], &env->vec_status);           \
 676            r->f32[i] = float32_scalbn(t, -uim, &env->vec_status);      \
 677        }                                                               \
 678    }
 679VCF(ux, uint32_to_float32, u32)
 680VCF(sx, int32_to_float32, s32)
 681#undef VCF
 682
 683#define VCMP_DO(suffix, compare, element, record)                       \
 684    void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r,            \
 685                             ppc_avr_t *a, ppc_avr_t *b)                \
 686    {                                                                   \
 687        uint64_t ones = (uint64_t)-1;                                   \
 688        uint64_t all = ones;                                            \
 689        uint64_t none = 0;                                              \
 690        int i;                                                          \
 691                                                                        \
 692        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
 693            uint64_t result = (a->element[i] compare b->element[i] ?    \
 694                               ones : 0x0);                             \
 695            switch (sizeof(a->element[0])) {                            \
 696            case 8:                                                     \
 697                r->u64[i] = result;                                     \
 698                break;                                                  \
 699            case 4:                                                     \
 700                r->u32[i] = result;                                     \
 701                break;                                                  \
 702            case 2:                                                     \
 703                r->u16[i] = result;                                     \
 704                break;                                                  \
 705            case 1:                                                     \
 706                r->u8[i] = result;                                      \
 707                break;                                                  \
 708            }                                                           \
 709            all &= result;                                              \
 710            none |= result;                                             \
 711        }                                                               \
 712        if (record) {                                                   \
 713            env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1);       \
 714        }                                                               \
 715    }
 716#define VCMP(suffix, compare, element)          \
 717    VCMP_DO(suffix, compare, element, 0)        \
 718    VCMP_DO(suffix##_dot, compare, element, 1)
 719VCMP(equb, ==, u8)
 720VCMP(equh, ==, u16)
 721VCMP(equw, ==, u32)
 722VCMP(equd, ==, u64)
 723VCMP(gtub, >, u8)
 724VCMP(gtuh, >, u16)
 725VCMP(gtuw, >, u32)
 726VCMP(gtud, >, u64)
 727VCMP(gtsb, >, s8)
 728VCMP(gtsh, >, s16)
 729VCMP(gtsw, >, s32)
 730VCMP(gtsd, >, s64)
 731#undef VCMP_DO
 732#undef VCMP
 733
 734#define VCMPNE_DO(suffix, element, etype, cmpzero, record)              \
 735void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r,              \
 736                            ppc_avr_t *a, ppc_avr_t *b)                 \
 737{                                                                       \
 738    etype ones = (etype)-1;                                             \
 739    etype all = ones;                                                   \
 740    etype result, none = 0;                                             \
 741    int i;                                                              \
 742                                                                        \
 743    for (i = 0; i < ARRAY_SIZE(r->element); i++) {                      \
 744        if (cmpzero) {                                                  \
 745            result = ((a->element[i] == 0)                              \
 746                           || (b->element[i] == 0)                      \
 747                           || (a->element[i] != b->element[i]) ?        \
 748                           ones : 0x0);                                 \
 749        } else {                                                        \
 750            result = (a->element[i] != b->element[i]) ? ones : 0x0;     \
 751        }                                                               \
 752        r->element[i] = result;                                         \
 753        all &= result;                                                  \
 754        none |= result;                                                 \
 755    }                                                                   \
 756    if (record) {                                                       \
 757        env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1);           \
 758    }                                                                   \
 759}
 760
 761/*
 762 * VCMPNEZ - Vector compare not equal to zero
 763 *   suffix  - instruction mnemonic suffix (b: byte, h: halfword, w: word)
 764 *   element - element type to access from vector
 765 */
 766#define VCMPNE(suffix, element, etype, cmpzero)         \
 767    VCMPNE_DO(suffix, element, etype, cmpzero, 0)       \
 768    VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1)
 769VCMPNE(zb, u8, uint8_t, 1)
 770VCMPNE(zh, u16, uint16_t, 1)
 771VCMPNE(zw, u32, uint32_t, 1)
 772VCMPNE(b, u8, uint8_t, 0)
 773VCMPNE(h, u16, uint16_t, 0)
 774VCMPNE(w, u32, uint32_t, 0)
 775#undef VCMPNE_DO
 776#undef VCMPNE
 777
 778#define VCMPFP_DO(suffix, compare, order, record)                       \
 779    void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r,            \
 780                             ppc_avr_t *a, ppc_avr_t *b)                \
 781    {                                                                   \
 782        uint32_t ones = (uint32_t)-1;                                   \
 783        uint32_t all = ones;                                            \
 784        uint32_t none = 0;                                              \
 785        int i;                                                          \
 786                                                                        \
 787        for (i = 0; i < ARRAY_SIZE(r->f32); i++) {                      \
 788            uint32_t result;                                            \
 789            int rel = float32_compare_quiet(a->f32[i], b->f32[i],       \
 790                                            &env->vec_status);          \
 791            if (rel == float_relation_unordered) {                      \
 792                result = 0;                                             \
 793            } else if (rel compare order) {                             \
 794                result = ones;                                          \
 795            } else {                                                    \
 796                result = 0;                                             \
 797            }                                                           \
 798            r->u32[i] = result;                                         \
 799            all &= result;                                              \
 800            none |= result;                                             \
 801        }                                                               \
 802        if (record) {                                                   \
 803            env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1);       \
 804        }                                                               \
 805    }
 806#define VCMPFP(suffix, compare, order)          \
 807    VCMPFP_DO(suffix, compare, order, 0)        \
 808    VCMPFP_DO(suffix##_dot, compare, order, 1)
 809VCMPFP(eqfp, ==, float_relation_equal)
 810VCMPFP(gefp, !=, float_relation_less)
 811VCMPFP(gtfp, ==, float_relation_greater)
 812#undef VCMPFP_DO
 813#undef VCMPFP
 814
 815static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
 816                                    ppc_avr_t *a, ppc_avr_t *b, int record)
 817{
 818    int i;
 819    int all_in = 0;
 820
 821    for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
 822        int le_rel = float32_compare_quiet(a->f32[i], b->f32[i],
 823                                           &env->vec_status);
 824        if (le_rel == float_relation_unordered) {
 825            r->u32[i] = 0xc0000000;
 826            all_in = 1;
 827        } else {
 828            float32 bneg = float32_chs(b->f32[i]);
 829            int ge_rel = float32_compare_quiet(a->f32[i], bneg,
 830                                               &env->vec_status);
 831            int le = le_rel != float_relation_greater;
 832            int ge = ge_rel != float_relation_less;
 833
 834            r->u32[i] = ((!le) << 31) | ((!ge) << 30);
 835            all_in |= (!le | !ge);
 836        }
 837    }
 838    if (record) {
 839        env->crf[6] = (all_in == 0) << 1;
 840    }
 841}
 842
 843void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
 844{
 845    vcmpbfp_internal(env, r, a, b, 0);
 846}
 847
 848void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
 849                        ppc_avr_t *b)
 850{
 851    vcmpbfp_internal(env, r, a, b, 1);
 852}
 853
 854#define VCT(suffix, satcvt, element)                                    \
 855    void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r,             \
 856                            ppc_avr_t *b, uint32_t uim)                 \
 857    {                                                                   \
 858        int i;                                                          \
 859        int sat = 0;                                                    \
 860        float_status s = env->vec_status;                               \
 861                                                                        \
 862        set_float_rounding_mode(float_round_to_zero, &s);               \
 863        for (i = 0; i < ARRAY_SIZE(r->f32); i++) {                      \
 864            if (float32_is_any_nan(b->f32[i])) {                        \
 865                r->element[i] = 0;                                      \
 866            } else {                                                    \
 867                float64 t = float32_to_float64(b->f32[i], &s);          \
 868                int64_t j;                                              \
 869                                                                        \
 870                t = float64_scalbn(t, uim, &s);                         \
 871                j = float64_to_int64(t, &s);                            \
 872                r->element[i] = satcvt(j, &sat);                        \
 873            }                                                           \
 874        }                                                               \
 875        if (sat) {                                                      \
 876            set_vscr_sat(env);                                          \
 877        }                                                               \
 878    }
 879VCT(uxs, cvtsduw, u32)
 880VCT(sxs, cvtsdsw, s32)
 881#undef VCT
 882
 883target_ulong helper_vclzlsbb(ppc_avr_t *r)
 884{
 885    target_ulong count = 0;
 886    int i;
 887    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
 888        if (r->VsrB(i) & 0x01) {
 889            break;
 890        }
 891        count++;
 892    }
 893    return count;
 894}
 895
 896target_ulong helper_vctzlsbb(ppc_avr_t *r)
 897{
 898    target_ulong count = 0;
 899    int i;
 900    for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
 901        if (r->VsrB(i) & 0x01) {
 902            break;
 903        }
 904        count++;
 905    }
 906    return count;
 907}
 908
 909void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
 910                      ppc_avr_t *b, ppc_avr_t *c)
 911{
 912    int sat = 0;
 913    int i;
 914
 915    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
 916        int32_t prod = a->s16[i] * b->s16[i];
 917        int32_t t = (int32_t)c->s16[i] + (prod >> 15);
 918
 919        r->s16[i] = cvtswsh(t, &sat);
 920    }
 921
 922    if (sat) {
 923        set_vscr_sat(env);
 924    }
 925}
 926
 927void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
 928                       ppc_avr_t *b, ppc_avr_t *c)
 929{
 930    int sat = 0;
 931    int i;
 932
 933    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
 934        int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
 935        int32_t t = (int32_t)c->s16[i] + (prod >> 15);
 936        r->s16[i] = cvtswsh(t, &sat);
 937    }
 938
 939    if (sat) {
 940        set_vscr_sat(env);
 941    }
 942}
 943
 944void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
 945{
 946    int i;
 947
 948    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
 949        int32_t prod = a->s16[i] * b->s16[i];
 950        r->s16[i] = (int16_t) (prod + c->s16[i]);
 951    }
 952}
 953
 954#define VMRG_DO(name, element, access, ofs)                                  \
 955    void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)            \
 956    {                                                                        \
 957        ppc_avr_t result;                                                    \
 958        int i, half = ARRAY_SIZE(r->element) / 2;                            \
 959                                                                             \
 960        for (i = 0; i < half; i++) {                                         \
 961            result.access(i * 2 + 0) = a->access(i + ofs);                   \
 962            result.access(i * 2 + 1) = b->access(i + ofs);                   \
 963        }                                                                    \
 964        *r = result;                                                         \
 965    }
 966
 967#define VMRG(suffix, element, access)          \
 968    VMRG_DO(mrgl##suffix, element, access, half)   \
 969    VMRG_DO(mrgh##suffix, element, access, 0)
 970VMRG(b, u8, VsrB)
 971VMRG(h, u16, VsrH)
 972VMRG(w, u32, VsrW)
 973#undef VMRG_DO
 974#undef VMRG
 975
 976void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
 977                     ppc_avr_t *b, ppc_avr_t *c)
 978{
 979    int32_t prod[16];
 980    int i;
 981
 982    for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
 983        prod[i] = (int32_t)a->s8[i] * b->u8[i];
 984    }
 985
 986    VECTOR_FOR_INORDER_I(i, s32) {
 987        r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
 988            prod[4 * i + 2] + prod[4 * i + 3];
 989    }
 990}
 991
 992void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
 993                     ppc_avr_t *b, ppc_avr_t *c)
 994{
 995    int32_t prod[8];
 996    int i;
 997
 998    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
 999        prod[i] = a->s16[i] * b->s16[i];
1000    }
1001
1002    VECTOR_FOR_INORDER_I(i, s32) {
1003        r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1004    }
1005}
1006
1007void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1008                     ppc_avr_t *b, ppc_avr_t *c)
1009{
1010    int32_t prod[8];
1011    int i;
1012    int sat = 0;
1013
1014    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1015        prod[i] = (int32_t)a->s16[i] * b->s16[i];
1016    }
1017
1018    VECTOR_FOR_INORDER_I(i, s32) {
1019        int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1020
1021        r->u32[i] = cvtsdsw(t, &sat);
1022    }
1023
1024    if (sat) {
1025        set_vscr_sat(env);
1026    }
1027}
1028
1029void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1030                     ppc_avr_t *b, ppc_avr_t *c)
1031{
1032    uint16_t prod[16];
1033    int i;
1034
1035    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1036        prod[i] = a->u8[i] * b->u8[i];
1037    }
1038
1039    VECTOR_FOR_INORDER_I(i, u32) {
1040        r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
1041            prod[4 * i + 2] + prod[4 * i + 3];
1042    }
1043}
1044
1045void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1046                     ppc_avr_t *b, ppc_avr_t *c)
1047{
1048    uint32_t prod[8];
1049    int i;
1050
1051    for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1052        prod[i] = a->u16[i] * b->u16[i];
1053    }
1054
1055    VECTOR_FOR_INORDER_I(i, u32) {
1056        r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1057    }
1058}
1059
1060void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1061                     ppc_avr_t *b, ppc_avr_t *c)
1062{
1063    uint32_t prod[8];
1064    int i;
1065    int sat = 0;
1066
1067    for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1068        prod[i] = a->u16[i] * b->u16[i];
1069    }
1070
1071    VECTOR_FOR_INORDER_I(i, s32) {
1072        uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1073
1074        r->u32[i] = cvtuduw(t, &sat);
1075    }
1076
1077    if (sat) {
1078        set_vscr_sat(env);
1079    }
1080}
1081
1082#define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast)   \
1083    void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
1084    {                                                                   \
1085        int i;                                                          \
1086                                                                        \
1087        for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) {           \
1088            r->prod_access(i >> 1) = (cast)a->mul_access(i) *           \
1089                                     (cast)b->mul_access(i);            \
1090        }                                                               \
1091    }
1092
1093#define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast)   \
1094    void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
1095    {                                                                   \
1096        int i;                                                          \
1097                                                                        \
1098        for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) {           \
1099            r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) *       \
1100                                     (cast)b->mul_access(i + 1);        \
1101        }                                                               \
1102    }
1103
1104#define VMUL(suffix, mul_element, mul_access, prod_access, cast)       \
1105    VMUL_DO_EVN(mule##suffix, mul_element, mul_access, prod_access, cast)  \
1106    VMUL_DO_ODD(mulo##suffix, mul_element, mul_access, prod_access, cast)
1107VMUL(sb, s8, VsrSB, VsrSH, int16_t)
1108VMUL(sh, s16, VsrSH, VsrSW, int32_t)
1109VMUL(sw, s32, VsrSW, VsrSD, int64_t)
1110VMUL(ub, u8, VsrB, VsrH, uint16_t)
1111VMUL(uh, u16, VsrH, VsrW, uint32_t)
1112VMUL(uw, u32, VsrW, VsrD, uint64_t)
1113#undef VMUL_DO_EVN
1114#undef VMUL_DO_ODD
1115#undef VMUL
1116
1117void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1118                  ppc_avr_t *c)
1119{
1120    ppc_avr_t result;
1121    int i;
1122
1123    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1124        int s = c->VsrB(i) & 0x1f;
1125        int index = s & 0xf;
1126
1127        if (s & 0x10) {
1128            result.VsrB(i) = b->VsrB(index);
1129        } else {
1130            result.VsrB(i) = a->VsrB(index);
1131        }
1132    }
1133    *r = result;
1134}
1135
1136void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1137                  ppc_avr_t *c)
1138{
1139    ppc_avr_t result;
1140    int i;
1141
1142    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1143        int s = c->VsrB(i) & 0x1f;
1144        int index = 15 - (s & 0xf);
1145
1146        if (s & 0x10) {
1147            result.VsrB(i) = a->VsrB(index);
1148        } else {
1149            result.VsrB(i) = b->VsrB(index);
1150        }
1151    }
1152    *r = result;
1153}
1154
1155#if defined(HOST_WORDS_BIGENDIAN)
1156#define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1157#define VBPERMD_INDEX(i) (i)
1158#define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1159#define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
1160#else
1161#define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)])
1162#define VBPERMD_INDEX(i) (1 - i)
1163#define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1164#define EXTRACT_BIT(avr, i, index) \
1165        (extract64((avr)->u64[1 - i], 63 - index, 1))
1166#endif
1167
1168void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1169{
1170    int i, j;
1171    ppc_avr_t result = { .u64 = { 0, 0 } };
1172    VECTOR_FOR_INORDER_I(i, u64) {
1173        for (j = 0; j < 8; j++) {
1174            int index = VBPERMQ_INDEX(b, (i * 8) + j);
1175            if (index < 64 && EXTRACT_BIT(a, i, index)) {
1176                result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1177            }
1178        }
1179    }
1180    *r = result;
1181}
1182
1183void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1184{
1185    int i;
1186    uint64_t perm = 0;
1187
1188    VECTOR_FOR_INORDER_I(i, u8) {
1189        int index = VBPERMQ_INDEX(b, i);
1190
1191        if (index < 128) {
1192            uint64_t mask = (1ull << (63 - (index & 0x3F)));
1193            if (a->u64[VBPERMQ_DW(index)] & mask) {
1194                perm |= (0x8000 >> i);
1195            }
1196        }
1197    }
1198
1199    r->VsrD(0) = perm;
1200    r->VsrD(1) = 0;
1201}
1202
1203#undef VBPERMQ_INDEX
1204#undef VBPERMQ_DW
1205
1206static const uint64_t VGBBD_MASKS[256] = {
1207    0x0000000000000000ull, /* 00 */
1208    0x0000000000000080ull, /* 01 */
1209    0x0000000000008000ull, /* 02 */
1210    0x0000000000008080ull, /* 03 */
1211    0x0000000000800000ull, /* 04 */
1212    0x0000000000800080ull, /* 05 */
1213    0x0000000000808000ull, /* 06 */
1214    0x0000000000808080ull, /* 07 */
1215    0x0000000080000000ull, /* 08 */
1216    0x0000000080000080ull, /* 09 */
1217    0x0000000080008000ull, /* 0A */
1218    0x0000000080008080ull, /* 0B */
1219    0x0000000080800000ull, /* 0C */
1220    0x0000000080800080ull, /* 0D */
1221    0x0000000080808000ull, /* 0E */
1222    0x0000000080808080ull, /* 0F */
1223    0x0000008000000000ull, /* 10 */
1224    0x0000008000000080ull, /* 11 */
1225    0x0000008000008000ull, /* 12 */
1226    0x0000008000008080ull, /* 13 */
1227    0x0000008000800000ull, /* 14 */
1228    0x0000008000800080ull, /* 15 */
1229    0x0000008000808000ull, /* 16 */
1230    0x0000008000808080ull, /* 17 */
1231    0x0000008080000000ull, /* 18 */
1232    0x0000008080000080ull, /* 19 */
1233    0x0000008080008000ull, /* 1A */
1234    0x0000008080008080ull, /* 1B */
1235    0x0000008080800000ull, /* 1C */
1236    0x0000008080800080ull, /* 1D */
1237    0x0000008080808000ull, /* 1E */
1238    0x0000008080808080ull, /* 1F */
1239    0x0000800000000000ull, /* 20 */
1240    0x0000800000000080ull, /* 21 */
1241    0x0000800000008000ull, /* 22 */
1242    0x0000800000008080ull, /* 23 */
1243    0x0000800000800000ull, /* 24 */
1244    0x0000800000800080ull, /* 25 */
1245    0x0000800000808000ull, /* 26 */
1246    0x0000800000808080ull, /* 27 */
1247    0x0000800080000000ull, /* 28 */
1248    0x0000800080000080ull, /* 29 */
1249    0x0000800080008000ull, /* 2A */
1250    0x0000800080008080ull, /* 2B */
1251    0x0000800080800000ull, /* 2C */
1252    0x0000800080800080ull, /* 2D */
1253    0x0000800080808000ull, /* 2E */
1254    0x0000800080808080ull, /* 2F */
1255    0x0000808000000000ull, /* 30 */
1256    0x0000808000000080ull, /* 31 */
1257    0x0000808000008000ull, /* 32 */
1258    0x0000808000008080ull, /* 33 */
1259    0x0000808000800000ull, /* 34 */
1260    0x0000808000800080ull, /* 35 */
1261    0x0000808000808000ull, /* 36 */
1262    0x0000808000808080ull, /* 37 */
1263    0x0000808080000000ull, /* 38 */
1264    0x0000808080000080ull, /* 39 */
1265    0x0000808080008000ull, /* 3A */
1266    0x0000808080008080ull, /* 3B */
1267    0x0000808080800000ull, /* 3C */
1268    0x0000808080800080ull, /* 3D */
1269    0x0000808080808000ull, /* 3E */
1270    0x0000808080808080ull, /* 3F */
1271    0x0080000000000000ull, /* 40 */
1272    0x0080000000000080ull, /* 41 */
1273    0x0080000000008000ull, /* 42 */
1274    0x0080000000008080ull, /* 43 */
1275    0x0080000000800000ull, /* 44 */
1276    0x0080000000800080ull, /* 45 */
1277    0x0080000000808000ull, /* 46 */
1278    0x0080000000808080ull, /* 47 */
1279    0x0080000080000000ull, /* 48 */
1280    0x0080000080000080ull, /* 49 */
1281    0x0080000080008000ull, /* 4A */
1282    0x0080000080008080ull, /* 4B */
1283    0x0080000080800000ull, /* 4C */
1284    0x0080000080800080ull, /* 4D */
1285    0x0080000080808000ull, /* 4E */
1286    0x0080000080808080ull, /* 4F */
1287    0x0080008000000000ull, /* 50 */
1288    0x0080008000000080ull, /* 51 */
1289    0x0080008000008000ull, /* 52 */
1290    0x0080008000008080ull, /* 53 */
1291    0x0080008000800000ull, /* 54 */
1292    0x0080008000800080ull, /* 55 */
1293    0x0080008000808000ull, /* 56 */
1294    0x0080008000808080ull, /* 57 */
1295    0x0080008080000000ull, /* 58 */
1296    0x0080008080000080ull, /* 59 */
1297    0x0080008080008000ull, /* 5A */
1298    0x0080008080008080ull, /* 5B */
1299    0x0080008080800000ull, /* 5C */
1300    0x0080008080800080ull, /* 5D */
1301    0x0080008080808000ull, /* 5E */
1302    0x0080008080808080ull, /* 5F */
1303    0x0080800000000000ull, /* 60 */
1304    0x0080800000000080ull, /* 61 */
1305    0x0080800000008000ull, /* 62 */
1306    0x0080800000008080ull, /* 63 */
1307    0x0080800000800000ull, /* 64 */
1308    0x0080800000800080ull, /* 65 */
1309    0x0080800000808000ull, /* 66 */
1310    0x0080800000808080ull, /* 67 */
1311    0x0080800080000000ull, /* 68 */
1312    0x0080800080000080ull, /* 69 */
1313    0x0080800080008000ull, /* 6A */
1314    0x0080800080008080ull, /* 6B */
1315    0x0080800080800000ull, /* 6C */
1316    0x0080800080800080ull, /* 6D */
1317    0x0080800080808000ull, /* 6E */
1318    0x0080800080808080ull, /* 6F */
1319    0x0080808000000000ull, /* 70 */
1320    0x0080808000000080ull, /* 71 */
1321    0x0080808000008000ull, /* 72 */
1322    0x0080808000008080ull, /* 73 */
1323    0x0080808000800000ull, /* 74 */
1324    0x0080808000800080ull, /* 75 */
1325    0x0080808000808000ull, /* 76 */
1326    0x0080808000808080ull, /* 77 */
1327    0x0080808080000000ull, /* 78 */
1328    0x0080808080000080ull, /* 79 */
1329    0x0080808080008000ull, /* 7A */
1330    0x0080808080008080ull, /* 7B */
1331    0x0080808080800000ull, /* 7C */
1332    0x0080808080800080ull, /* 7D */
1333    0x0080808080808000ull, /* 7E */
1334    0x0080808080808080ull, /* 7F */
1335    0x8000000000000000ull, /* 80 */
1336    0x8000000000000080ull, /* 81 */
1337    0x8000000000008000ull, /* 82 */
1338    0x8000000000008080ull, /* 83 */
1339    0x8000000000800000ull, /* 84 */
1340    0x8000000000800080ull, /* 85 */
1341    0x8000000000808000ull, /* 86 */
1342    0x8000000000808080ull, /* 87 */
1343    0x8000000080000000ull, /* 88 */
1344    0x8000000080000080ull, /* 89 */
1345    0x8000000080008000ull, /* 8A */
1346    0x8000000080008080ull, /* 8B */
1347    0x8000000080800000ull, /* 8C */
1348    0x8000000080800080ull, /* 8D */
1349    0x8000000080808000ull, /* 8E */
1350    0x8000000080808080ull, /* 8F */
1351    0x8000008000000000ull, /* 90 */
1352    0x8000008000000080ull, /* 91 */
1353    0x8000008000008000ull, /* 92 */
1354    0x8000008000008080ull, /* 93 */
1355    0x8000008000800000ull, /* 94 */
1356    0x8000008000800080ull, /* 95 */
1357    0x8000008000808000ull, /* 96 */
1358    0x8000008000808080ull, /* 97 */
1359    0x8000008080000000ull, /* 98 */
1360    0x8000008080000080ull, /* 99 */
1361    0x8000008080008000ull, /* 9A */
1362    0x8000008080008080ull, /* 9B */
1363    0x8000008080800000ull, /* 9C */
1364    0x8000008080800080ull, /* 9D */
1365    0x8000008080808000ull, /* 9E */
1366    0x8000008080808080ull, /* 9F */
1367    0x8000800000000000ull, /* A0 */
1368    0x8000800000000080ull, /* A1 */
1369    0x8000800000008000ull, /* A2 */
1370    0x8000800000008080ull, /* A3 */
1371    0x8000800000800000ull, /* A4 */
1372    0x8000800000800080ull, /* A5 */
1373    0x8000800000808000ull, /* A6 */
1374    0x8000800000808080ull, /* A7 */
1375    0x8000800080000000ull, /* A8 */
1376    0x8000800080000080ull, /* A9 */
1377    0x8000800080008000ull, /* AA */
1378    0x8000800080008080ull, /* AB */
1379    0x8000800080800000ull, /* AC */
1380    0x8000800080800080ull, /* AD */
1381    0x8000800080808000ull, /* AE */
1382    0x8000800080808080ull, /* AF */
1383    0x8000808000000000ull, /* B0 */
1384    0x8000808000000080ull, /* B1 */
1385    0x8000808000008000ull, /* B2 */
1386    0x8000808000008080ull, /* B3 */
1387    0x8000808000800000ull, /* B4 */
1388    0x8000808000800080ull, /* B5 */
1389    0x8000808000808000ull, /* B6 */
1390    0x8000808000808080ull, /* B7 */
1391    0x8000808080000000ull, /* B8 */
1392    0x8000808080000080ull, /* B9 */
1393    0x8000808080008000ull, /* BA */
1394    0x8000808080008080ull, /* BB */
1395    0x8000808080800000ull, /* BC */
1396    0x8000808080800080ull, /* BD */
1397    0x8000808080808000ull, /* BE */
1398    0x8000808080808080ull, /* BF */
1399    0x8080000000000000ull, /* C0 */
1400    0x8080000000000080ull, /* C1 */
1401    0x8080000000008000ull, /* C2 */
1402    0x8080000000008080ull, /* C3 */
1403    0x8080000000800000ull, /* C4 */
1404    0x8080000000800080ull, /* C5 */
1405    0x8080000000808000ull, /* C6 */
1406    0x8080000000808080ull, /* C7 */
1407    0x8080000080000000ull, /* C8 */
1408    0x8080000080000080ull, /* C9 */
1409    0x8080000080008000ull, /* CA */
1410    0x8080000080008080ull, /* CB */
1411    0x8080000080800000ull, /* CC */
1412    0x8080000080800080ull, /* CD */
1413    0x8080000080808000ull, /* CE */
1414    0x8080000080808080ull, /* CF */
1415    0x8080008000000000ull, /* D0 */
1416    0x8080008000000080ull, /* D1 */
1417    0x8080008000008000ull, /* D2 */
1418    0x8080008000008080ull, /* D3 */
1419    0x8080008000800000ull, /* D4 */
1420    0x8080008000800080ull, /* D5 */
1421    0x8080008000808000ull, /* D6 */
1422    0x8080008000808080ull, /* D7 */
1423    0x8080008080000000ull, /* D8 */
1424    0x8080008080000080ull, /* D9 */
1425    0x8080008080008000ull, /* DA */
1426    0x8080008080008080ull, /* DB */
1427    0x8080008080800000ull, /* DC */
1428    0x8080008080800080ull, /* DD */
1429    0x8080008080808000ull, /* DE */
1430    0x8080008080808080ull, /* DF */
1431    0x8080800000000000ull, /* E0 */
1432    0x8080800000000080ull, /* E1 */
1433    0x8080800000008000ull, /* E2 */
1434    0x8080800000008080ull, /* E3 */
1435    0x8080800000800000ull, /* E4 */
1436    0x8080800000800080ull, /* E5 */
1437    0x8080800000808000ull, /* E6 */
1438    0x8080800000808080ull, /* E7 */
1439    0x8080800080000000ull, /* E8 */
1440    0x8080800080000080ull, /* E9 */
1441    0x8080800080008000ull, /* EA */
1442    0x8080800080008080ull, /* EB */
1443    0x8080800080800000ull, /* EC */
1444    0x8080800080800080ull, /* ED */
1445    0x8080800080808000ull, /* EE */
1446    0x8080800080808080ull, /* EF */
1447    0x8080808000000000ull, /* F0 */
1448    0x8080808000000080ull, /* F1 */
1449    0x8080808000008000ull, /* F2 */
1450    0x8080808000008080ull, /* F3 */
1451    0x8080808000800000ull, /* F4 */
1452    0x8080808000800080ull, /* F5 */
1453    0x8080808000808000ull, /* F6 */
1454    0x8080808000808080ull, /* F7 */
1455    0x8080808080000000ull, /* F8 */
1456    0x8080808080000080ull, /* F9 */
1457    0x8080808080008000ull, /* FA */
1458    0x8080808080008080ull, /* FB */
1459    0x8080808080800000ull, /* FC */
1460    0x8080808080800080ull, /* FD */
1461    0x8080808080808000ull, /* FE */
1462    0x8080808080808080ull, /* FF */
1463};
1464
1465void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b)
1466{
1467    int i;
1468    uint64_t t[2] = { 0, 0 };
1469
1470    VECTOR_FOR_INORDER_I(i, u8) {
1471#if defined(HOST_WORDS_BIGENDIAN)
1472        t[i >> 3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7);
1473#else
1474        t[i >> 3] |= VGBBD_MASKS[b->u8[i]] >> (7 - (i & 7));
1475#endif
1476    }
1477
1478    r->u64[0] = t[0];
1479    r->u64[1] = t[1];
1480}
1481
1482#define PMSUM(name, srcfld, trgfld, trgtyp)                   \
1483void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)  \
1484{                                                             \
1485    int i, j;                                                 \
1486    trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])];    \
1487                                                              \
1488    VECTOR_FOR_INORDER_I(i, srcfld) {                         \
1489        prod[i] = 0;                                          \
1490        for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) {      \
1491            if (a->srcfld[i] & (1ull << j)) {                 \
1492                prod[i] ^= ((trgtyp)b->srcfld[i] << j);       \
1493            }                                                 \
1494        }                                                     \
1495    }                                                         \
1496                                                              \
1497    VECTOR_FOR_INORDER_I(i, trgfld) {                         \
1498        r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1];         \
1499    }                                                         \
1500}
1501
1502PMSUM(vpmsumb, u8, u16, uint16_t)
1503PMSUM(vpmsumh, u16, u32, uint32_t)
1504PMSUM(vpmsumw, u32, u64, uint64_t)
1505
1506void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1507{
1508
1509#ifdef CONFIG_INT128
1510    int i, j;
1511    __uint128_t prod[2];
1512
1513    VECTOR_FOR_INORDER_I(i, u64) {
1514        prod[i] = 0;
1515        for (j = 0; j < 64; j++) {
1516            if (a->u64[i] & (1ull << j)) {
1517                prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1518            }
1519        }
1520    }
1521
1522    r->u128 = prod[0] ^ prod[1];
1523
1524#else
1525    int i, j;
1526    ppc_avr_t prod[2];
1527
1528    VECTOR_FOR_INORDER_I(i, u64) {
1529        prod[i].VsrD(1) = prod[i].VsrD(0) = 0;
1530        for (j = 0; j < 64; j++) {
1531            if (a->u64[i] & (1ull << j)) {
1532                ppc_avr_t bshift;
1533                if (j == 0) {
1534                    bshift.VsrD(0) = 0;
1535                    bshift.VsrD(1) = b->u64[i];
1536                } else {
1537                    bshift.VsrD(0) = b->u64[i] >> (64 - j);
1538                    bshift.VsrD(1) = b->u64[i] << j;
1539                }
1540                prod[i].VsrD(1) ^= bshift.VsrD(1);
1541                prod[i].VsrD(0) ^= bshift.VsrD(0);
1542            }
1543        }
1544    }
1545
1546    r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1);
1547    r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0);
1548#endif
1549}
1550
1551
1552#if defined(HOST_WORDS_BIGENDIAN)
1553#define PKBIG 1
1554#else
1555#define PKBIG 0
1556#endif
1557void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1558{
1559    int i, j;
1560    ppc_avr_t result;
1561#if defined(HOST_WORDS_BIGENDIAN)
1562    const ppc_avr_t *x[2] = { a, b };
1563#else
1564    const ppc_avr_t *x[2] = { b, a };
1565#endif
1566
1567    VECTOR_FOR_INORDER_I(i, u64) {
1568        VECTOR_FOR_INORDER_I(j, u32) {
1569            uint32_t e = x[i]->u32[j];
1570
1571            result.u16[4 * i + j] = (((e >> 9) & 0xfc00) |
1572                                     ((e >> 6) & 0x3e0) |
1573                                     ((e >> 3) & 0x1f));
1574        }
1575    }
1576    *r = result;
1577}
1578
1579#define VPK(suffix, from, to, cvt, dosat)                               \
1580    void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r,             \
1581                            ppc_avr_t *a, ppc_avr_t *b)                 \
1582    {                                                                   \
1583        int i;                                                          \
1584        int sat = 0;                                                    \
1585        ppc_avr_t result;                                               \
1586        ppc_avr_t *a0 = PKBIG ? a : b;                                  \
1587        ppc_avr_t *a1 = PKBIG ? b : a;                                  \
1588                                                                        \
1589        VECTOR_FOR_INORDER_I(i, from) {                                 \
1590            result.to[i] = cvt(a0->from[i], &sat);                      \
1591            result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\
1592        }                                                               \
1593        *r = result;                                                    \
1594        if (dosat && sat) {                                             \
1595            set_vscr_sat(env);                                          \
1596        }                                                               \
1597    }
1598#define I(x, y) (x)
1599VPK(shss, s16, s8, cvtshsb, 1)
1600VPK(shus, s16, u8, cvtshub, 1)
1601VPK(swss, s32, s16, cvtswsh, 1)
1602VPK(swus, s32, u16, cvtswuh, 1)
1603VPK(sdss, s64, s32, cvtsdsw, 1)
1604VPK(sdus, s64, u32, cvtsduw, 1)
1605VPK(uhus, u16, u8, cvtuhub, 1)
1606VPK(uwus, u32, u16, cvtuwuh, 1)
1607VPK(udus, u64, u32, cvtuduw, 1)
1608VPK(uhum, u16, u8, I, 0)
1609VPK(uwum, u32, u16, I, 0)
1610VPK(udum, u64, u32, I, 0)
1611#undef I
1612#undef VPK
1613#undef PKBIG
1614
1615void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1616{
1617    int i;
1618
1619    for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1620        r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status);
1621    }
1622}
1623
1624#define VRFI(suffix, rounding)                                  \
1625    void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r,    \
1626                             ppc_avr_t *b)                      \
1627    {                                                           \
1628        int i;                                                  \
1629        float_status s = env->vec_status;                       \
1630                                                                \
1631        set_float_rounding_mode(rounding, &s);                  \
1632        for (i = 0; i < ARRAY_SIZE(r->f32); i++) {              \
1633            r->f32[i] = float32_round_to_int (b->f32[i], &s);   \
1634        }                                                       \
1635    }
1636VRFI(n, float_round_nearest_even)
1637VRFI(m, float_round_down)
1638VRFI(p, float_round_up)
1639VRFI(z, float_round_to_zero)
1640#undef VRFI
1641
1642#define VROTATE(suffix, element, mask)                                  \
1643    void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)   \
1644    {                                                                   \
1645        int i;                                                          \
1646                                                                        \
1647        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
1648            unsigned int shift = b->element[i] & mask;                  \
1649            r->element[i] = (a->element[i] << shift) |                  \
1650                (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1651        }                                                               \
1652    }
1653VROTATE(b, u8, 0x7)
1654VROTATE(h, u16, 0xF)
1655VROTATE(w, u32, 0x1F)
1656VROTATE(d, u64, 0x3F)
1657#undef VROTATE
1658
1659void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1660{
1661    int i;
1662
1663    for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1664        float32 t = float32_sqrt(b->f32[i], &env->vec_status);
1665
1666        r->f32[i] = float32_div(float32_one, t, &env->vec_status);
1667    }
1668}
1669
1670#define VRLMI(name, size, element, insert)                            \
1671void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)          \
1672{                                                                     \
1673    int i;                                                            \
1674    for (i = 0; i < ARRAY_SIZE(r->element); i++) {                    \
1675        uint##size##_t src1 = a->element[i];                          \
1676        uint##size##_t src2 = b->element[i];                          \
1677        uint##size##_t src3 = r->element[i];                          \
1678        uint##size##_t begin, end, shift, mask, rot_val;              \
1679                                                                      \
1680        shift = extract##size(src2, 0, 6);                            \
1681        end   = extract##size(src2, 8, 6);                            \
1682        begin = extract##size(src2, 16, 6);                           \
1683        rot_val = rol##size(src1, shift);                             \
1684        mask = mask_u##size(begin, end);                              \
1685        if (insert) {                                                 \
1686            r->element[i] = (rot_val & mask) | (src3 & ~mask);        \
1687        } else {                                                      \
1688            r->element[i] = (rot_val & mask);                         \
1689        }                                                             \
1690    }                                                                 \
1691}
1692
1693VRLMI(vrldmi, 64, u64, 1);
1694VRLMI(vrlwmi, 32, u32, 1);
1695VRLMI(vrldnm, 64, u64, 0);
1696VRLMI(vrlwnm, 32, u32, 0);
1697
1698void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1699                 ppc_avr_t *c)
1700{
1701    r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1702    r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1703}
1704
1705void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1706{
1707    int i;
1708
1709    for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1710        r->f32[i] = float32_exp2(b->f32[i], &env->vec_status);
1711    }
1712}
1713
1714void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1715{
1716    int i;
1717
1718    for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1719        r->f32[i] = float32_log2(b->f32[i], &env->vec_status);
1720    }
1721}
1722
1723#if defined(HOST_WORDS_BIGENDIAN)
1724#define VEXTU_X_DO(name, size, left)                                \
1725    target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b)  \
1726    {                                                               \
1727        int index;                                                  \
1728        if (left) {                                                 \
1729            index = (a & 0xf) * 8;                                  \
1730        } else {                                                    \
1731            index = ((15 - (a & 0xf) + 1) * 8) - size;              \
1732        }                                                           \
1733        return int128_getlo(int128_rshift(b->s128, index)) &        \
1734            MAKE_64BIT_MASK(0, size);                               \
1735    }
1736#else
1737#define VEXTU_X_DO(name, size, left)                                \
1738    target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b)  \
1739    {                                                               \
1740        int index;                                                  \
1741        if (left) {                                                 \
1742            index = ((15 - (a & 0xf) + 1) * 8) - size;              \
1743        } else {                                                    \
1744            index = (a & 0xf) * 8;                                  \
1745        }                                                           \
1746        return int128_getlo(int128_rshift(b->s128, index)) &        \
1747            MAKE_64BIT_MASK(0, size);                               \
1748    }
1749#endif
1750
1751VEXTU_X_DO(vextublx,  8, 1)
1752VEXTU_X_DO(vextuhlx, 16, 1)
1753VEXTU_X_DO(vextuwlx, 32, 1)
1754VEXTU_X_DO(vextubrx,  8, 0)
1755VEXTU_X_DO(vextuhrx, 16, 0)
1756VEXTU_X_DO(vextuwrx, 32, 0)
1757#undef VEXTU_X_DO
1758
1759/*
1760 * The specification says that the results are undefined if all of the
1761 * shift counts are not identical.  We check to make sure that they
1762 * are to conform to what real hardware appears to do.
1763 */
1764#define VSHIFT(suffix, leftp)                                           \
1765    void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)    \
1766    {                                                                   \
1767        int shift = b->VsrB(15) & 0x7;                                  \
1768        int doit = 1;                                                   \
1769        int i;                                                          \
1770                                                                        \
1771        for (i = 0; i < ARRAY_SIZE(r->u8); i++) {                       \
1772            doit = doit && ((b->u8[i] & 0x7) == shift);                 \
1773        }                                                               \
1774        if (doit) {                                                     \
1775            if (shift == 0) {                                           \
1776                *r = *a;                                                \
1777            } else if (leftp) {                                         \
1778                uint64_t carry = a->VsrD(1) >> (64 - shift);            \
1779                                                                        \
1780                r->VsrD(0) = (a->VsrD(0) << shift) | carry;             \
1781                r->VsrD(1) = a->VsrD(1) << shift;                       \
1782            } else {                                                    \
1783                uint64_t carry = a->VsrD(0) << (64 - shift);            \
1784                                                                        \
1785                r->VsrD(1) = (a->VsrD(1) >> shift) | carry;             \
1786                r->VsrD(0) = a->VsrD(0) >> shift;                       \
1787            }                                                           \
1788        }                                                               \
1789    }
1790VSHIFT(l, 1)
1791VSHIFT(r, 0)
1792#undef VSHIFT
1793
1794void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1795{
1796    int i;
1797    unsigned int shift, bytes, size;
1798
1799    size = ARRAY_SIZE(r->u8);
1800    for (i = 0; i < size; i++) {
1801        shift = b->VsrB(i) & 0x7;             /* extract shift value */
1802        bytes = (a->VsrB(i) << 8) +           /* extract adjacent bytes */
1803            (((i + 1) < size) ? a->VsrB(i + 1) : 0);
1804        r->VsrB(i) = (bytes << shift) >> 8;   /* shift and store result */
1805    }
1806}
1807
1808void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1809{
1810    int i;
1811    unsigned int shift, bytes;
1812
1813    /*
1814     * Use reverse order, as destination and source register can be
1815     * same. Its being modified in place saving temporary, reverse
1816     * order will guarantee that computed result is not fed back.
1817     */
1818    for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1819        shift = b->VsrB(i) & 0x7;               /* extract shift value */
1820        bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i);
1821                                                /* extract adjacent bytes */
1822        r->VsrB(i) = (bytes >> shift) & 0xFF;   /* shift and store result */
1823    }
1824}
1825
1826void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1827{
1828    int sh = shift & 0xf;
1829    int i;
1830    ppc_avr_t result;
1831
1832    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1833        int index = sh + i;
1834        if (index > 0xf) {
1835            result.VsrB(i) = b->VsrB(index - 0x10);
1836        } else {
1837            result.VsrB(i) = a->VsrB(index);
1838        }
1839    }
1840    *r = result;
1841}
1842
1843void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1844{
1845    int sh = (b->VsrB(0xf) >> 3) & 0xf;
1846
1847#if defined(HOST_WORDS_BIGENDIAN)
1848    memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1849    memset(&r->u8[16 - sh], 0, sh);
1850#else
1851    memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1852    memset(&r->u8[0], 0, sh);
1853#endif
1854}
1855
1856#if defined(HOST_WORDS_BIGENDIAN)
1857#define VINSERT(suffix, element)                                            \
1858    void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1859    {                                                                       \
1860        memmove(&r->u8[index], &b->u8[8 - sizeof(r->element[0])],           \
1861               sizeof(r->element[0]));                                      \
1862    }
1863#else
1864#define VINSERT(suffix, element)                                            \
1865    void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1866    {                                                                       \
1867        uint32_t d = (16 - index) - sizeof(r->element[0]);                  \
1868        memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0]));               \
1869    }
1870#endif
1871VINSERT(b, u8)
1872VINSERT(h, u16)
1873VINSERT(w, u32)
1874VINSERT(d, u64)
1875#undef VINSERT
1876#if defined(HOST_WORDS_BIGENDIAN)
1877#define VEXTRACT(suffix, element)                                            \
1878    void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1879    {                                                                        \
1880        uint32_t es = sizeof(r->element[0]);                                 \
1881        memmove(&r->u8[8 - es], &b->u8[index], es);                          \
1882        memset(&r->u8[8], 0, 8);                                             \
1883        memset(&r->u8[0], 0, 8 - es);                                        \
1884    }
1885#else
1886#define VEXTRACT(suffix, element)                                            \
1887    void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1888    {                                                                        \
1889        uint32_t es = sizeof(r->element[0]);                                 \
1890        uint32_t s = (16 - index) - es;                                      \
1891        memmove(&r->u8[8], &b->u8[s], es);                                   \
1892        memset(&r->u8[0], 0, 8);                                             \
1893        memset(&r->u8[8 + es], 0, 8 - es);                                   \
1894    }
1895#endif
1896VEXTRACT(ub, u8)
1897VEXTRACT(uh, u16)
1898VEXTRACT(uw, u32)
1899VEXTRACT(d, u64)
1900#undef VEXTRACT
1901
1902void helper_xxextractuw(CPUPPCState *env, ppc_vsr_t *xt,
1903                        ppc_vsr_t *xb, uint32_t index)
1904{
1905    ppc_vsr_t t = { };
1906    size_t es = sizeof(uint32_t);
1907    uint32_t ext_index;
1908    int i;
1909
1910    ext_index = index;
1911    for (i = 0; i < es; i++, ext_index++) {
1912        t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16);
1913    }
1914
1915    *xt = t;
1916}
1917
1918void helper_xxinsertw(CPUPPCState *env, ppc_vsr_t *xt,
1919                      ppc_vsr_t *xb, uint32_t index)
1920{
1921    ppc_vsr_t t = *xt;
1922    size_t es = sizeof(uint32_t);
1923    int ins_index, i = 0;
1924
1925    ins_index = index;
1926    for (i = 0; i < es && ins_index < 16; i++, ins_index++) {
1927        t.VsrB(ins_index) = xb->VsrB(8 - es + i);
1928    }
1929
1930    *xt = t;
1931}
1932
1933#define VEXT_SIGNED(name, element, cast)                            \
1934void helper_##name(ppc_avr_t *r, ppc_avr_t *b)                      \
1935{                                                                   \
1936    int i;                                                          \
1937    for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
1938        r->element[i] = (cast)b->element[i];                        \
1939    }                                                               \
1940}
1941VEXT_SIGNED(vextsb2w, s32, int8_t)
1942VEXT_SIGNED(vextsb2d, s64, int8_t)
1943VEXT_SIGNED(vextsh2w, s32, int16_t)
1944VEXT_SIGNED(vextsh2d, s64, int16_t)
1945VEXT_SIGNED(vextsw2d, s64, int32_t)
1946#undef VEXT_SIGNED
1947
1948#define VNEG(name, element)                                         \
1949void helper_##name(ppc_avr_t *r, ppc_avr_t *b)                      \
1950{                                                                   \
1951    int i;                                                          \
1952    for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
1953        r->element[i] = -b->element[i];                             \
1954    }                                                               \
1955}
1956VNEG(vnegw, s32)
1957VNEG(vnegd, s64)
1958#undef VNEG
1959
1960void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1961{
1962    int sh = (b->VsrB(0xf) >> 3) & 0xf;
1963
1964#if defined(HOST_WORDS_BIGENDIAN)
1965    memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1966    memset(&r->u8[0], 0, sh);
1967#else
1968    memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1969    memset(&r->u8[16 - sh], 0, sh);
1970#endif
1971}
1972
1973void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1974{
1975    int i;
1976
1977    for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1978        r->u32[i] = a->u32[i] >= b->u32[i];
1979    }
1980}
1981
1982void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1983{
1984    int64_t t;
1985    int i, upper;
1986    ppc_avr_t result;
1987    int sat = 0;
1988
1989    upper = ARRAY_SIZE(r->s32) - 1;
1990    t = (int64_t)b->VsrSW(upper);
1991    for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1992        t += a->VsrSW(i);
1993        result.VsrSW(i) = 0;
1994    }
1995    result.VsrSW(upper) = cvtsdsw(t, &sat);
1996    *r = result;
1997
1998    if (sat) {
1999        set_vscr_sat(env);
2000    }
2001}
2002
2003void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2004{
2005    int i, j, upper;
2006    ppc_avr_t result;
2007    int sat = 0;
2008
2009    upper = 1;
2010    for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
2011        int64_t t = (int64_t)b->VsrSW(upper + i * 2);
2012
2013        result.VsrD(i) = 0;
2014        for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
2015            t += a->VsrSW(2 * i + j);
2016        }
2017        result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat);
2018    }
2019
2020    *r = result;
2021    if (sat) {
2022        set_vscr_sat(env);
2023    }
2024}
2025
2026void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2027{
2028    int i, j;
2029    int sat = 0;
2030
2031    for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2032        int64_t t = (int64_t)b->s32[i];
2033
2034        for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
2035            t += a->s8[4 * i + j];
2036        }
2037        r->s32[i] = cvtsdsw(t, &sat);
2038    }
2039
2040    if (sat) {
2041        set_vscr_sat(env);
2042    }
2043}
2044
2045void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2046{
2047    int sat = 0;
2048    int i;
2049
2050    for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2051        int64_t t = (int64_t)b->s32[i];
2052
2053        t += a->s16[2 * i] + a->s16[2 * i + 1];
2054        r->s32[i] = cvtsdsw(t, &sat);
2055    }
2056
2057    if (sat) {
2058        set_vscr_sat(env);
2059    }
2060}
2061
2062void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2063{
2064    int i, j;
2065    int sat = 0;
2066
2067    for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2068        uint64_t t = (uint64_t)b->u32[i];
2069
2070        for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
2071            t += a->u8[4 * i + j];
2072        }
2073        r->u32[i] = cvtuduw(t, &sat);
2074    }
2075
2076    if (sat) {
2077        set_vscr_sat(env);
2078    }
2079}
2080
2081#if defined(HOST_WORDS_BIGENDIAN)
2082#define UPKHI 1
2083#define UPKLO 0
2084#else
2085#define UPKHI 0
2086#define UPKLO 1
2087#endif
2088#define VUPKPX(suffix, hi)                                              \
2089    void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b)                \
2090    {                                                                   \
2091        int i;                                                          \
2092        ppc_avr_t result;                                               \
2093                                                                        \
2094        for (i = 0; i < ARRAY_SIZE(r->u32); i++) {                      \
2095            uint16_t e = b->u16[hi ? i : i + 4];                        \
2096            uint8_t a = (e >> 15) ? 0xff : 0;                           \
2097            uint8_t r = (e >> 10) & 0x1f;                               \
2098            uint8_t g = (e >> 5) & 0x1f;                                \
2099            uint8_t b = e & 0x1f;                                       \
2100                                                                        \
2101            result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b;       \
2102        }                                                               \
2103        *r = result;                                                    \
2104    }
2105VUPKPX(lpx, UPKLO)
2106VUPKPX(hpx, UPKHI)
2107#undef VUPKPX
2108
2109#define VUPK(suffix, unpacked, packee, hi)                              \
2110    void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b)                \
2111    {                                                                   \
2112        int i;                                                          \
2113        ppc_avr_t result;                                               \
2114                                                                        \
2115        if (hi) {                                                       \
2116            for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) {             \
2117                result.unpacked[i] = b->packee[i];                      \
2118            }                                                           \
2119        } else {                                                        \
2120            for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
2121                 i++) {                                                 \
2122                result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
2123            }                                                           \
2124        }                                                               \
2125        *r = result;                                                    \
2126    }
2127VUPK(hsb, s16, s8, UPKHI)
2128VUPK(hsh, s32, s16, UPKHI)
2129VUPK(hsw, s64, s32, UPKHI)
2130VUPK(lsb, s16, s8, UPKLO)
2131VUPK(lsh, s32, s16, UPKLO)
2132VUPK(lsw, s64, s32, UPKLO)
2133#undef VUPK
2134#undef UPKHI
2135#undef UPKLO
2136
2137#define VGENERIC_DO(name, element)                                      \
2138    void helper_v##name(ppc_avr_t *r, ppc_avr_t *b)                     \
2139    {                                                                   \
2140        int i;                                                          \
2141                                                                        \
2142        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
2143            r->element[i] = name(b->element[i]);                        \
2144        }                                                               \
2145    }
2146
2147#define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
2148#define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
2149#define clzw(v) clz32((v))
2150#define clzd(v) clz64((v))
2151
2152VGENERIC_DO(clzb, u8)
2153VGENERIC_DO(clzh, u16)
2154VGENERIC_DO(clzw, u32)
2155VGENERIC_DO(clzd, u64)
2156
2157#undef clzb
2158#undef clzh
2159#undef clzw
2160#undef clzd
2161
2162#define ctzb(v) ((v) ? ctz32(v) : 8)
2163#define ctzh(v) ((v) ? ctz32(v) : 16)
2164#define ctzw(v) ctz32((v))
2165#define ctzd(v) ctz64((v))
2166
2167VGENERIC_DO(ctzb, u8)
2168VGENERIC_DO(ctzh, u16)
2169VGENERIC_DO(ctzw, u32)
2170VGENERIC_DO(ctzd, u64)
2171
2172#undef ctzb
2173#undef ctzh
2174#undef ctzw
2175#undef ctzd
2176
2177#define popcntb(v) ctpop8(v)
2178#define popcnth(v) ctpop16(v)
2179#define popcntw(v) ctpop32(v)
2180#define popcntd(v) ctpop64(v)
2181
2182VGENERIC_DO(popcntb, u8)
2183VGENERIC_DO(popcnth, u16)
2184VGENERIC_DO(popcntw, u32)
2185VGENERIC_DO(popcntd, u64)
2186
2187#undef popcntb
2188#undef popcnth
2189#undef popcntw
2190#undef popcntd
2191
2192#undef VGENERIC_DO
2193
2194#if defined(HOST_WORDS_BIGENDIAN)
2195#define QW_ONE { .u64 = { 0, 1 } }
2196#else
2197#define QW_ONE { .u64 = { 1, 0 } }
2198#endif
2199
2200#ifndef CONFIG_INT128
2201
2202static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
2203{
2204    t->u64[0] = ~a.u64[0];
2205    t->u64[1] = ~a.u64[1];
2206}
2207
2208static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
2209{
2210    if (a.VsrD(0) < b.VsrD(0)) {
2211        return -1;
2212    } else if (a.VsrD(0) > b.VsrD(0)) {
2213        return 1;
2214    } else if (a.VsrD(1) < b.VsrD(1)) {
2215        return -1;
2216    } else if (a.VsrD(1) > b.VsrD(1)) {
2217        return 1;
2218    } else {
2219        return 0;
2220    }
2221}
2222
2223static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2224{
2225    t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
2226    t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
2227                     (~a.VsrD(1) < b.VsrD(1));
2228}
2229
2230static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2231{
2232    ppc_avr_t not_a;
2233    t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
2234    t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
2235                     (~a.VsrD(1) < b.VsrD(1));
2236    avr_qw_not(&not_a, a);
2237    return avr_qw_cmpu(not_a, b) < 0;
2238}
2239
2240#endif
2241
2242void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2243{
2244#ifdef CONFIG_INT128
2245    r->u128 = a->u128 + b->u128;
2246#else
2247    avr_qw_add(r, *a, *b);
2248#endif
2249}
2250
2251void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2252{
2253#ifdef CONFIG_INT128
2254    r->u128 = a->u128 + b->u128 + (c->u128 & 1);
2255#else
2256
2257    if (c->VsrD(1) & 1) {
2258        ppc_avr_t tmp;
2259
2260        tmp.VsrD(0) = 0;
2261        tmp.VsrD(1) = c->VsrD(1) & 1;
2262        avr_qw_add(&tmp, *a, tmp);
2263        avr_qw_add(r, tmp, *b);
2264    } else {
2265        avr_qw_add(r, *a, *b);
2266    }
2267#endif
2268}
2269
2270void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2271{
2272#ifdef CONFIG_INT128
2273    r->u128 = (~a->u128 < b->u128);
2274#else
2275    ppc_avr_t not_a;
2276
2277    avr_qw_not(&not_a, *a);
2278
2279    r->VsrD(0) = 0;
2280    r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0);
2281#endif
2282}
2283
2284void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2285{
2286#ifdef CONFIG_INT128
2287    int carry_out = (~a->u128 < b->u128);
2288    if (!carry_out && (c->u128 & 1)) {
2289        carry_out = ((a->u128 + b->u128 + 1) == 0) &&
2290                    ((a->u128 != 0) || (b->u128 != 0));
2291    }
2292    r->u128 = carry_out;
2293#else
2294
2295    int carry_in = c->VsrD(1) & 1;
2296    int carry_out = 0;
2297    ppc_avr_t tmp;
2298
2299    carry_out = avr_qw_addc(&tmp, *a, *b);
2300
2301    if (!carry_out && carry_in) {
2302        ppc_avr_t one = QW_ONE;
2303        carry_out = avr_qw_addc(&tmp, tmp, one);
2304    }
2305    r->VsrD(0) = 0;
2306    r->VsrD(1) = carry_out;
2307#endif
2308}
2309
2310void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2311{
2312#ifdef CONFIG_INT128
2313    r->u128 = a->u128 - b->u128;
2314#else
2315    ppc_avr_t tmp;
2316    ppc_avr_t one = QW_ONE;
2317
2318    avr_qw_not(&tmp, *b);
2319    avr_qw_add(&tmp, *a, tmp);
2320    avr_qw_add(r, tmp, one);
2321#endif
2322}
2323
2324void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2325{
2326#ifdef CONFIG_INT128
2327    r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2328#else
2329    ppc_avr_t tmp, sum;
2330
2331    avr_qw_not(&tmp, *b);
2332    avr_qw_add(&sum, *a, tmp);
2333
2334    tmp.VsrD(0) = 0;
2335    tmp.VsrD(1) = c->VsrD(1) & 1;
2336    avr_qw_add(r, sum, tmp);
2337#endif
2338}
2339
2340void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2341{
2342#ifdef CONFIG_INT128
2343    r->u128 = (~a->u128 < ~b->u128) ||
2344                 (a->u128 + ~b->u128 == (__uint128_t)-1);
2345#else
2346    int carry = (avr_qw_cmpu(*a, *b) > 0);
2347    if (!carry) {
2348        ppc_avr_t tmp;
2349        avr_qw_not(&tmp, *b);
2350        avr_qw_add(&tmp, *a, tmp);
2351        carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull));
2352    }
2353    r->VsrD(0) = 0;
2354    r->VsrD(1) = carry;
2355#endif
2356}
2357
2358void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2359{
2360#ifdef CONFIG_INT128
2361    r->u128 =
2362        (~a->u128 < ~b->u128) ||
2363        ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2364#else
2365    int carry_in = c->VsrD(1) & 1;
2366    int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2367    if (!carry_out && carry_in) {
2368        ppc_avr_t tmp;
2369        avr_qw_not(&tmp, *b);
2370        avr_qw_add(&tmp, *a, tmp);
2371        carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull));
2372    }
2373
2374    r->VsrD(0) = 0;
2375    r->VsrD(1) = carry_out;
2376#endif
2377}
2378
2379#define BCD_PLUS_PREF_1 0xC
2380#define BCD_PLUS_PREF_2 0xF
2381#define BCD_PLUS_ALT_1  0xA
2382#define BCD_NEG_PREF    0xD
2383#define BCD_NEG_ALT     0xB
2384#define BCD_PLUS_ALT_2  0xE
2385#define NATIONAL_PLUS   0x2B
2386#define NATIONAL_NEG    0x2D
2387
2388#if defined(HOST_WORDS_BIGENDIAN)
2389#define BCD_DIG_BYTE(n) (15 - ((n) / 2))
2390#else
2391#define BCD_DIG_BYTE(n) ((n) / 2)
2392#endif
2393
2394static int bcd_get_sgn(ppc_avr_t *bcd)
2395{
2396    switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) {
2397    case BCD_PLUS_PREF_1:
2398    case BCD_PLUS_PREF_2:
2399    case BCD_PLUS_ALT_1:
2400    case BCD_PLUS_ALT_2:
2401    {
2402        return 1;
2403    }
2404
2405    case BCD_NEG_PREF:
2406    case BCD_NEG_ALT:
2407    {
2408        return -1;
2409    }
2410
2411    default:
2412    {
2413        return 0;
2414    }
2415    }
2416}
2417
2418static int bcd_preferred_sgn(int sgn, int ps)
2419{
2420    if (sgn >= 0) {
2421        return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2422    } else {
2423        return BCD_NEG_PREF;
2424    }
2425}
2426
2427static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2428{
2429    uint8_t result;
2430    if (n & 1) {
2431        result = bcd->u8[BCD_DIG_BYTE(n)] >> 4;
2432    } else {
2433       result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF;
2434    }
2435
2436    if (unlikely(result > 9)) {
2437        *invalid = true;
2438    }
2439    return result;
2440}
2441
2442static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2443{
2444    if (n & 1) {
2445        bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F;
2446        bcd->u8[BCD_DIG_BYTE(n)] |= (digit << 4);
2447    } else {
2448        bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0;
2449        bcd->u8[BCD_DIG_BYTE(n)] |= digit;
2450    }
2451}
2452
2453static bool bcd_is_valid(ppc_avr_t *bcd)
2454{
2455    int i;
2456    int invalid = 0;
2457
2458    if (bcd_get_sgn(bcd) == 0) {
2459        return false;
2460    }
2461
2462    for (i = 1; i < 32; i++) {
2463        bcd_get_digit(bcd, i, &invalid);
2464        if (unlikely(invalid)) {
2465            return false;
2466        }
2467    }
2468    return true;
2469}
2470
2471static int bcd_cmp_zero(ppc_avr_t *bcd)
2472{
2473    if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) {
2474        return CRF_EQ;
2475    } else {
2476        return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT;
2477    }
2478}
2479
2480static uint16_t get_national_digit(ppc_avr_t *reg, int n)
2481{
2482    return reg->VsrH(7 - n);
2483}
2484
2485static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
2486{
2487    reg->VsrH(7 - n) = val;
2488}
2489
2490static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2491{
2492    int i;
2493    int invalid = 0;
2494    for (i = 31; i > 0; i--) {
2495        uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2496        uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2497        if (unlikely(invalid)) {
2498            return 0; /* doesn't matter */
2499        } else if (dig_a > dig_b) {
2500            return 1;
2501        } else if (dig_a < dig_b) {
2502            return -1;
2503        }
2504    }
2505
2506    return 0;
2507}
2508
2509static void bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2510                       int *overflow)
2511{
2512    int carry = 0;
2513    int i;
2514    for (i = 1; i <= 31; i++) {
2515        uint8_t digit = bcd_get_digit(a, i, invalid) +
2516                        bcd_get_digit(b, i, invalid) + carry;
2517        if (digit > 9) {
2518            carry = 1;
2519            digit -= 10;
2520        } else {
2521            carry = 0;
2522        }
2523
2524        bcd_put_digit(t, digit, i);
2525    }
2526
2527    *overflow = carry;
2528}
2529
2530static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2531                       int *overflow)
2532{
2533    int carry = 0;
2534    int i;
2535
2536    for (i = 1; i <= 31; i++) {
2537        uint8_t digit = bcd_get_digit(a, i, invalid) -
2538                        bcd_get_digit(b, i, invalid) + carry;
2539        if (digit & 0x80) {
2540            carry = -1;
2541            digit += 10;
2542        } else {
2543            carry = 0;
2544        }
2545
2546        bcd_put_digit(t, digit, i);
2547    }
2548
2549    *overflow = carry;
2550}
2551
2552uint32_t helper_bcdadd(ppc_avr_t *r,  ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2553{
2554
2555    int sgna = bcd_get_sgn(a);
2556    int sgnb = bcd_get_sgn(b);
2557    int invalid = (sgna == 0) || (sgnb == 0);
2558    int overflow = 0;
2559    uint32_t cr = 0;
2560    ppc_avr_t result = { .u64 = { 0, 0 } };
2561
2562    if (!invalid) {
2563        if (sgna == sgnb) {
2564            result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2565            bcd_add_mag(&result, a, b, &invalid, &overflow);
2566            cr = bcd_cmp_zero(&result);
2567        } else {
2568            int magnitude = bcd_cmp_mag(a, b);
2569            if (magnitude > 0) {
2570                result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2571                bcd_sub_mag(&result, a, b, &invalid, &overflow);
2572                cr = (sgna > 0) ? CRF_GT : CRF_LT;
2573            } else if (magnitude < 0) {
2574                result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps);
2575                bcd_sub_mag(&result, b, a, &invalid, &overflow);
2576                cr = (sgnb > 0) ? CRF_GT : CRF_LT;
2577            } else {
2578                result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(0, ps);
2579                cr = CRF_EQ;
2580            }
2581        }
2582    }
2583
2584    if (unlikely(invalid)) {
2585        result.VsrD(0) = result.VsrD(1) = -1;
2586        cr = CRF_SO;
2587    } else if (overflow) {
2588        cr |= CRF_SO;
2589    }
2590
2591    *r = result;
2592
2593    return cr;
2594}
2595
2596uint32_t helper_bcdsub(ppc_avr_t *r,  ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2597{
2598    ppc_avr_t bcopy = *b;
2599    int sgnb = bcd_get_sgn(b);
2600    if (sgnb < 0) {
2601        bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2602    } else if (sgnb > 0) {
2603        bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2604    }
2605    /* else invalid ... defer to bcdadd code for proper handling */
2606
2607    return helper_bcdadd(r, a, &bcopy, ps);
2608}
2609
2610uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2611{
2612    int i;
2613    int cr = 0;
2614    uint16_t national = 0;
2615    uint16_t sgnb = get_national_digit(b, 0);
2616    ppc_avr_t ret = { .u64 = { 0, 0 } };
2617    int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
2618
2619    for (i = 1; i < 8; i++) {
2620        national = get_national_digit(b, i);
2621        if (unlikely(national < 0x30 || national > 0x39)) {
2622            invalid = 1;
2623            break;
2624        }
2625
2626        bcd_put_digit(&ret, national & 0xf, i);
2627    }
2628
2629    if (sgnb == NATIONAL_PLUS) {
2630        bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
2631    } else {
2632        bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2633    }
2634
2635    cr = bcd_cmp_zero(&ret);
2636
2637    if (unlikely(invalid)) {
2638        cr = CRF_SO;
2639    }
2640
2641    *r = ret;
2642
2643    return cr;
2644}
2645
2646uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2647{
2648    int i;
2649    int cr = 0;
2650    int sgnb = bcd_get_sgn(b);
2651    int invalid = (sgnb == 0);
2652    ppc_avr_t ret = { .u64 = { 0, 0 } };
2653
2654    int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0);
2655
2656    for (i = 1; i < 8; i++) {
2657        set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
2658
2659        if (unlikely(invalid)) {
2660            break;
2661        }
2662    }
2663    set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
2664
2665    cr = bcd_cmp_zero(b);
2666
2667    if (ox_flag) {
2668        cr |= CRF_SO;
2669    }
2670
2671    if (unlikely(invalid)) {
2672        cr = CRF_SO;
2673    }
2674
2675    *r = ret;
2676
2677    return cr;
2678}
2679
2680uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2681{
2682    int i;
2683    int cr = 0;
2684    int invalid = 0;
2685    int zone_digit = 0;
2686    int zone_lead = ps ? 0xF : 0x3;
2687    int digit = 0;
2688    ppc_avr_t ret = { .u64 = { 0, 0 } };
2689    int sgnb = b->u8[BCD_DIG_BYTE(0)] >> 4;
2690
2691    if (unlikely((sgnb < 0xA) && ps)) {
2692        invalid = 1;
2693    }
2694
2695    for (i = 0; i < 16; i++) {
2696        zone_digit = i ? b->u8[BCD_DIG_BYTE(i * 2)] >> 4 : zone_lead;
2697        digit = b->u8[BCD_DIG_BYTE(i * 2)] & 0xF;
2698        if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
2699            invalid = 1;
2700            break;
2701        }
2702
2703        bcd_put_digit(&ret, digit, i + 1);
2704    }
2705
2706    if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
2707            (!ps && (sgnb & 0x4))) {
2708        bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2709    } else {
2710        bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
2711    }
2712
2713    cr = bcd_cmp_zero(&ret);
2714
2715    if (unlikely(invalid)) {
2716        cr = CRF_SO;
2717    }
2718
2719    *r = ret;
2720
2721    return cr;
2722}
2723
2724uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2725{
2726    int i;
2727    int cr = 0;
2728    uint8_t digit = 0;
2729    int sgnb = bcd_get_sgn(b);
2730    int zone_lead = (ps) ? 0xF0 : 0x30;
2731    int invalid = (sgnb == 0);
2732    ppc_avr_t ret = { .u64 = { 0, 0 } };
2733
2734    int ox_flag = ((b->VsrD(0) >> 4) != 0);
2735
2736    for (i = 0; i < 16; i++) {
2737        digit = bcd_get_digit(b, i + 1, &invalid);
2738
2739        if (unlikely(invalid)) {
2740            break;
2741        }
2742
2743        ret.u8[BCD_DIG_BYTE(i * 2)] = zone_lead + digit;
2744    }
2745
2746    if (ps) {
2747        bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
2748    } else {
2749        bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
2750    }
2751
2752    cr = bcd_cmp_zero(b);
2753
2754    if (ox_flag) {
2755        cr |= CRF_SO;
2756    }
2757
2758    if (unlikely(invalid)) {
2759        cr = CRF_SO;
2760    }
2761
2762    *r = ret;
2763
2764    return cr;
2765}
2766
2767uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2768{
2769    int i;
2770    int cr = 0;
2771    uint64_t lo_value;
2772    uint64_t hi_value;
2773    ppc_avr_t ret = { .u64 = { 0, 0 } };
2774
2775    if (b->VsrSD(0) < 0) {
2776        lo_value = -b->VsrSD(1);
2777        hi_value = ~b->VsrD(0) + !lo_value;
2778        bcd_put_digit(&ret, 0xD, 0);
2779    } else {
2780        lo_value = b->VsrD(1);
2781        hi_value = b->VsrD(0);
2782        bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0);
2783    }
2784
2785    if (divu128(&lo_value, &hi_value, 1000000000000000ULL) ||
2786            lo_value > 9999999999999999ULL) {
2787        cr = CRF_SO;
2788    }
2789
2790    for (i = 1; i < 16; hi_value /= 10, i++) {
2791        bcd_put_digit(&ret, hi_value % 10, i);
2792    }
2793
2794    for (; i < 32; lo_value /= 10, i++) {
2795        bcd_put_digit(&ret, lo_value % 10, i);
2796    }
2797
2798    cr |= bcd_cmp_zero(&ret);
2799
2800    *r = ret;
2801
2802    return cr;
2803}
2804
2805uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2806{
2807    uint8_t i;
2808    int cr;
2809    uint64_t carry;
2810    uint64_t unused;
2811    uint64_t lo_value;
2812    uint64_t hi_value = 0;
2813    int sgnb = bcd_get_sgn(b);
2814    int invalid = (sgnb == 0);
2815
2816    lo_value = bcd_get_digit(b, 31, &invalid);
2817    for (i = 30; i > 0; i--) {
2818        mulu64(&lo_value, &carry, lo_value, 10ULL);
2819        mulu64(&hi_value, &unused, hi_value, 10ULL);
2820        lo_value += bcd_get_digit(b, i, &invalid);
2821        hi_value += carry;
2822
2823        if (unlikely(invalid)) {
2824            break;
2825        }
2826    }
2827
2828    if (sgnb == -1) {
2829        r->VsrSD(1) = -lo_value;
2830        r->VsrSD(0) = ~hi_value + !r->VsrSD(1);
2831    } else {
2832        r->VsrSD(1) = lo_value;
2833        r->VsrSD(0) = hi_value;
2834    }
2835
2836    cr = bcd_cmp_zero(b);
2837
2838    if (unlikely(invalid)) {
2839        cr = CRF_SO;
2840    }
2841
2842    return cr;
2843}
2844
2845uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2846{
2847    int i;
2848    int invalid = 0;
2849
2850    if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) {
2851        return CRF_SO;
2852    }
2853
2854    *r = *a;
2855    bcd_put_digit(r, b->u8[BCD_DIG_BYTE(0)] & 0xF, 0);
2856
2857    for (i = 1; i < 32; i++) {
2858        bcd_get_digit(a, i, &invalid);
2859        bcd_get_digit(b, i, &invalid);
2860        if (unlikely(invalid)) {
2861            return CRF_SO;
2862        }
2863    }
2864
2865    return bcd_cmp_zero(r);
2866}
2867
2868uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2869{
2870    int sgnb = bcd_get_sgn(b);
2871
2872    *r = *b;
2873    bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0);
2874
2875    if (bcd_is_valid(b) == false) {
2876        return CRF_SO;
2877    }
2878
2879    return bcd_cmp_zero(r);
2880}
2881
2882uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2883{
2884    int cr;
2885#if defined(HOST_WORDS_BIGENDIAN)
2886    int i = a->s8[7];
2887#else
2888    int i = a->s8[8];
2889#endif
2890    bool ox_flag = false;
2891    int sgnb = bcd_get_sgn(b);
2892    ppc_avr_t ret = *b;
2893    ret.VsrD(1) &= ~0xf;
2894
2895    if (bcd_is_valid(b) == false) {
2896        return CRF_SO;
2897    }
2898
2899    if (unlikely(i > 31)) {
2900        i = 31;
2901    } else if (unlikely(i < -31)) {
2902        i = -31;
2903    }
2904
2905    if (i > 0) {
2906        ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2907    } else {
2908        urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2909    }
2910    bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2911
2912    *r = ret;
2913
2914    cr = bcd_cmp_zero(r);
2915    if (ox_flag) {
2916        cr |= CRF_SO;
2917    }
2918
2919    return cr;
2920}
2921
2922uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2923{
2924    int cr;
2925    int i;
2926    int invalid = 0;
2927    bool ox_flag = false;
2928    ppc_avr_t ret = *b;
2929
2930    for (i = 0; i < 32; i++) {
2931        bcd_get_digit(b, i, &invalid);
2932
2933        if (unlikely(invalid)) {
2934            return CRF_SO;
2935        }
2936    }
2937
2938#if defined(HOST_WORDS_BIGENDIAN)
2939    i = a->s8[7];
2940#else
2941    i = a->s8[8];
2942#endif
2943    if (i >= 32) {
2944        ox_flag = true;
2945        ret.VsrD(1) = ret.VsrD(0) = 0;
2946    } else if (i <= -32) {
2947        ret.VsrD(1) = ret.VsrD(0) = 0;
2948    } else if (i > 0) {
2949        ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2950    } else {
2951        urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2952    }
2953    *r = ret;
2954
2955    cr = bcd_cmp_zero(r);
2956    if (ox_flag) {
2957        cr |= CRF_SO;
2958    }
2959
2960    return cr;
2961}
2962
2963uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2964{
2965    int cr;
2966    int unused = 0;
2967    int invalid = 0;
2968    bool ox_flag = false;
2969    int sgnb = bcd_get_sgn(b);
2970    ppc_avr_t ret = *b;
2971    ret.VsrD(1) &= ~0xf;
2972
2973#if defined(HOST_WORDS_BIGENDIAN)
2974    int i = a->s8[7];
2975    ppc_avr_t bcd_one = { .u64 = { 0, 0x10 } };
2976#else
2977    int i = a->s8[8];
2978    ppc_avr_t bcd_one = { .u64 = { 0x10, 0 } };
2979#endif
2980
2981    if (bcd_is_valid(b) == false) {
2982        return CRF_SO;
2983    }
2984
2985    if (unlikely(i > 31)) {
2986        i = 31;
2987    } else if (unlikely(i < -31)) {
2988        i = -31;
2989    }
2990
2991    if (i > 0) {
2992        ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2993    } else {
2994        urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2995
2996        if (bcd_get_digit(&ret, 0, &invalid) >= 5) {
2997            bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused);
2998        }
2999    }
3000    bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
3001
3002    cr = bcd_cmp_zero(&ret);
3003    if (ox_flag) {
3004        cr |= CRF_SO;
3005    }
3006    *r = ret;
3007
3008    return cr;
3009}
3010
3011uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3012{
3013    uint64_t mask;
3014    uint32_t ox_flag = 0;
3015#if defined(HOST_WORDS_BIGENDIAN)
3016    int i = a->s16[3] + 1;
3017#else
3018    int i = a->s16[4] + 1;
3019#endif
3020    ppc_avr_t ret = *b;
3021
3022    if (bcd_is_valid(b) == false) {
3023        return CRF_SO;
3024    }
3025
3026    if (i > 16 && i < 32) {
3027        mask = (uint64_t)-1 >> (128 - i * 4);
3028        if (ret.VsrD(0) & ~mask) {
3029            ox_flag = CRF_SO;
3030        }
3031
3032        ret.VsrD(0) &= mask;
3033    } else if (i >= 0 && i <= 16) {
3034        mask = (uint64_t)-1 >> (64 - i * 4);
3035        if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
3036            ox_flag = CRF_SO;
3037        }
3038
3039        ret.VsrD(1) &= mask;
3040        ret.VsrD(0) = 0;
3041    }
3042    bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0);
3043    *r = ret;
3044
3045    return bcd_cmp_zero(&ret) | ox_flag;
3046}
3047
3048uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
3049{
3050    int i;
3051    uint64_t mask;
3052    uint32_t ox_flag = 0;
3053    int invalid = 0;
3054    ppc_avr_t ret = *b;
3055
3056    for (i = 0; i < 32; i++) {
3057        bcd_get_digit(b, i, &invalid);
3058
3059        if (unlikely(invalid)) {
3060            return CRF_SO;
3061        }
3062    }
3063
3064#if defined(HOST_WORDS_BIGENDIAN)
3065    i = a->s16[3];
3066#else
3067    i = a->s16[4];
3068#endif
3069    if (i > 16 && i < 33) {
3070        mask = (uint64_t)-1 >> (128 - i * 4);
3071        if (ret.VsrD(0) & ~mask) {
3072            ox_flag = CRF_SO;
3073        }
3074
3075        ret.VsrD(0) &= mask;
3076    } else if (i > 0 && i <= 16) {
3077        mask = (uint64_t)-1 >> (64 - i * 4);
3078        if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
3079            ox_flag = CRF_SO;
3080        }
3081
3082        ret.VsrD(1) &= mask;
3083        ret.VsrD(0) = 0;
3084    } else if (i == 0) {
3085        if (ret.VsrD(0) || ret.VsrD(1)) {
3086            ox_flag = CRF_SO;
3087        }
3088        ret.VsrD(0) = ret.VsrD(1) = 0;
3089    }
3090
3091    *r = ret;
3092    if (r->VsrD(0) == 0 && r->VsrD(1) == 0) {
3093        return ox_flag | CRF_EQ;
3094    }
3095
3096    return ox_flag | CRF_GT;
3097}
3098
3099void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
3100{
3101    int i;
3102    VECTOR_FOR_INORDER_I(i, u8) {
3103        r->u8[i] = AES_sbox[a->u8[i]];
3104    }
3105}
3106
3107void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3108{
3109    ppc_avr_t result;
3110    int i;
3111
3112    VECTOR_FOR_INORDER_I(i, u32) {
3113        result.VsrW(i) = b->VsrW(i) ^
3114            (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^
3115             AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^
3116             AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^
3117             AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]);
3118    }
3119    *r = result;
3120}
3121
3122void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3123{
3124    ppc_avr_t result;
3125    int i;
3126
3127    VECTOR_FOR_INORDER_I(i, u8) {
3128        result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]);
3129    }
3130    *r = result;
3131}
3132
3133void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3134{
3135    /* This differs from what is written in ISA V2.07.  The RTL is */
3136    /* incorrect and will be fixed in V2.07B.                      */
3137    int i;
3138    ppc_avr_t tmp;
3139
3140    VECTOR_FOR_INORDER_I(i, u8) {
3141        tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])];
3142    }
3143
3144    VECTOR_FOR_INORDER_I(i, u32) {
3145        r->VsrW(i) =
3146            AES_imc[tmp.VsrB(4 * i + 0)][0] ^
3147            AES_imc[tmp.VsrB(4 * i + 1)][1] ^
3148            AES_imc[tmp.VsrB(4 * i + 2)][2] ^
3149            AES_imc[tmp.VsrB(4 * i + 3)][3];
3150    }
3151}
3152
3153void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
3154{
3155    ppc_avr_t result;
3156    int i;
3157
3158    VECTOR_FOR_INORDER_I(i, u8) {
3159        result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]);
3160    }
3161    *r = result;
3162}
3163
3164void helper_vshasigmaw(ppc_avr_t *r,  ppc_avr_t *a, uint32_t st_six)
3165{
3166    int st = (st_six & 0x10) != 0;
3167    int six = st_six & 0xF;
3168    int i;
3169
3170    for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
3171        if (st == 0) {
3172            if ((six & (0x8 >> i)) == 0) {
3173                r->VsrW(i) = ror32(a->VsrW(i), 7) ^
3174                             ror32(a->VsrW(i), 18) ^
3175                             (a->VsrW(i) >> 3);
3176            } else { /* six.bit[i] == 1 */
3177                r->VsrW(i) = ror32(a->VsrW(i), 17) ^
3178                             ror32(a->VsrW(i), 19) ^
3179                             (a->VsrW(i) >> 10);
3180            }
3181        } else { /* st == 1 */
3182            if ((six & (0x8 >> i)) == 0) {
3183                r->VsrW(i) = ror32(a->VsrW(i), 2) ^
3184                             ror32(a->VsrW(i), 13) ^
3185                             ror32(a->VsrW(i), 22);
3186            } else { /* six.bit[i] == 1 */
3187                r->VsrW(i) = ror32(a->VsrW(i), 6) ^
3188                             ror32(a->VsrW(i), 11) ^
3189                             ror32(a->VsrW(i), 25);
3190            }
3191        }
3192    }
3193}
3194
3195void helper_vshasigmad(ppc_avr_t *r,  ppc_avr_t *a, uint32_t st_six)
3196{
3197    int st = (st_six & 0x10) != 0;
3198    int six = st_six & 0xF;
3199    int i;
3200
3201    for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
3202        if (st == 0) {
3203            if ((six & (0x8 >> (2 * i))) == 0) {
3204                r->VsrD(i) = ror64(a->VsrD(i), 1) ^
3205                             ror64(a->VsrD(i), 8) ^
3206                             (a->VsrD(i) >> 7);
3207            } else { /* six.bit[2*i] == 1 */
3208                r->VsrD(i) = ror64(a->VsrD(i), 19) ^
3209                             ror64(a->VsrD(i), 61) ^
3210                             (a->VsrD(i) >> 6);
3211            }
3212        } else { /* st == 1 */
3213            if ((six & (0x8 >> (2 * i))) == 0) {
3214                r->VsrD(i) = ror64(a->VsrD(i), 28) ^
3215                             ror64(a->VsrD(i), 34) ^
3216                             ror64(a->VsrD(i), 39);
3217            } else { /* six.bit[2*i] == 1 */
3218                r->VsrD(i) = ror64(a->VsrD(i), 14) ^
3219                             ror64(a->VsrD(i), 18) ^
3220                             ror64(a->VsrD(i), 41);
3221            }
3222        }
3223    }
3224}
3225
3226void helper_vpermxor(ppc_avr_t *r,  ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
3227{
3228    ppc_avr_t result;
3229    int i;
3230
3231    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
3232        int indexA = c->VsrB(i) >> 4;
3233        int indexB = c->VsrB(i) & 0xF;
3234
3235        result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB);
3236    }
3237    *r = result;
3238}
3239
3240#undef VECTOR_FOR_INORDER_I
3241
3242/*****************************************************************************/
3243/* SPE extension helpers */
3244/* Use a table to make this quicker */
3245static const uint8_t hbrev[16] = {
3246    0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
3247    0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
3248};
3249
3250static inline uint8_t byte_reverse(uint8_t val)
3251{
3252    return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
3253}
3254
3255static inline uint32_t word_reverse(uint32_t val)
3256{
3257    return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
3258        (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
3259}
3260
3261#define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
3262target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
3263{
3264    uint32_t a, b, d, mask;
3265
3266    mask = UINT32_MAX >> (32 - MASKBITS);
3267    a = arg1 & mask;
3268    b = arg2 & mask;
3269    d = word_reverse(1 + word_reverse(a | ~b));
3270    return (arg1 & ~mask) | (d & b);
3271}
3272
3273uint32_t helper_cntlsw32(uint32_t val)
3274{
3275    if (val & 0x80000000) {
3276        return clz32(~val);
3277    } else {
3278        return clz32(val);
3279    }
3280}
3281
3282uint32_t helper_cntlzw32(uint32_t val)
3283{
3284    return clz32(val);
3285}
3286
3287/* 440 specific */
3288target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
3289                          target_ulong low, uint32_t update_Rc)
3290{
3291    target_ulong mask;
3292    int i;
3293
3294    i = 1;
3295    for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3296        if ((high & mask) == 0) {
3297            if (update_Rc) {
3298                env->crf[0] = 0x4;
3299            }
3300            goto done;
3301        }
3302        i++;
3303    }
3304    for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3305        if ((low & mask) == 0) {
3306            if (update_Rc) {
3307                env->crf[0] = 0x8;
3308            }
3309            goto done;
3310        }
3311        i++;
3312    }
3313    i = 8;
3314    if (update_Rc) {
3315        env->crf[0] = 0x2;
3316    }
3317 done:
3318    env->xer = (env->xer & ~0x7F) | i;
3319    if (update_Rc) {
3320        env->crf[0] |= xer_so;
3321    }
3322    return i;
3323}
3324