qemu/target/ppc/int_helper.c
<<
>>
Prefs
   1/*
   2 *  PowerPC integer and vector emulation helpers for QEMU.
   3 *
   4 *  Copyright (c) 2003-2007 Jocelyn Mayer
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include "cpu.h"
  22#include "internal.h"
  23#include "qemu/host-utils.h"
  24#include "qemu/main-loop.h"
  25#include "exec/helper-proto.h"
  26#include "crypto/aes.h"
  27#include "fpu/softfloat.h"
  28#include "qapi/error.h"
  29#include "qemu/guest-random.h"
  30
  31#include "helper_regs.h"
  32/*****************************************************************************/
  33/* Fixed point operations helpers */
  34
  35static inline void helper_update_ov_legacy(CPUPPCState *env, int ov)
  36{
  37    if (unlikely(ov)) {
  38        env->so = env->ov = 1;
  39    } else {
  40        env->ov = 0;
  41    }
  42}
  43
  44target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
  45                           uint32_t oe)
  46{
  47    uint64_t rt = 0;
  48    int overflow = 0;
  49
  50    uint64_t dividend = (uint64_t)ra << 32;
  51    uint64_t divisor = (uint32_t)rb;
  52
  53    if (unlikely(divisor == 0)) {
  54        overflow = 1;
  55    } else {
  56        rt = dividend / divisor;
  57        overflow = rt > UINT32_MAX;
  58    }
  59
  60    if (unlikely(overflow)) {
  61        rt = 0; /* Undefined */
  62    }
  63
  64    if (oe) {
  65        helper_update_ov_legacy(env, overflow);
  66    }
  67
  68    return (target_ulong)rt;
  69}
  70
  71target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
  72                          uint32_t oe)
  73{
  74    int64_t rt = 0;
  75    int overflow = 0;
  76
  77    int64_t dividend = (int64_t)ra << 32;
  78    int64_t divisor = (int64_t)((int32_t)rb);
  79
  80    if (unlikely((divisor == 0) ||
  81                 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
  82        overflow = 1;
  83    } else {
  84        rt = dividend / divisor;
  85        overflow = rt != (int32_t)rt;
  86    }
  87
  88    if (unlikely(overflow)) {
  89        rt = 0; /* Undefined */
  90    }
  91
  92    if (oe) {
  93        helper_update_ov_legacy(env, overflow);
  94    }
  95
  96    return (target_ulong)rt;
  97}
  98
  99#if defined(TARGET_PPC64)
 100
 101uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
 102{
 103    uint64_t rt = 0;
 104    int overflow = 0;
 105
 106    overflow = divu128(&rt, &ra, rb);
 107
 108    if (unlikely(overflow)) {
 109        rt = 0; /* Undefined */
 110    }
 111
 112    if (oe) {
 113        helper_update_ov_legacy(env, overflow);
 114    }
 115
 116    return rt;
 117}
 118
 119uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
 120{
 121    int64_t rt = 0;
 122    int64_t ra = (int64_t)rau;
 123    int64_t rb = (int64_t)rbu;
 124    int overflow = divs128(&rt, &ra, rb);
 125
 126    if (unlikely(overflow)) {
 127        rt = 0; /* Undefined */
 128    }
 129
 130    if (oe) {
 131        helper_update_ov_legacy(env, overflow);
 132    }
 133
 134    return rt;
 135}
 136
 137#endif
 138
 139
 140#if defined(TARGET_PPC64)
 141/* if x = 0xab, returns 0xababababababababa */
 142#define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
 143
 144/*
 145 * subtract 1 from each byte, and with inverse, check if MSB is set at each
 146 * byte.
 147 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
 148 *      (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
 149 */
 150#define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
 151
 152/* When you XOR the pattern and there is a match, that byte will be zero */
 153#define hasvalue(x, n)  (haszero((x) ^ pattern(n)))
 154
 155uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
 156{
 157    return hasvalue(rb, ra) ? CRF_GT : 0;
 158}
 159
 160#undef pattern
 161#undef haszero
 162#undef hasvalue
 163
 164/*
 165 * Return a random number.
 166 */
 167uint64_t helper_darn32(void)
 168{
 169    Error *err = NULL;
 170    uint32_t ret;
 171
 172    if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
 173        qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
 174                      error_get_pretty(err));
 175        error_free(err);
 176        return -1;
 177    }
 178
 179    return ret;
 180}
 181
 182uint64_t helper_darn64(void)
 183{
 184    Error *err = NULL;
 185    uint64_t ret;
 186
 187    if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) {
 188        qemu_log_mask(LOG_UNIMP, "darn: Crypto failure: %s",
 189                      error_get_pretty(err));
 190        error_free(err);
 191        return -1;
 192    }
 193
 194    return ret;
 195}
 196
 197uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
 198{
 199    int i;
 200    uint64_t ra = 0;
 201
 202    for (i = 0; i < 8; i++) {
 203        int index = (rs >> (i * 8)) & 0xFF;
 204        if (index < 64) {
 205            if (rb & PPC_BIT(index)) {
 206                ra |= 1 << i;
 207            }
 208        }
 209    }
 210    return ra;
 211}
 212
 213#endif
 214
 215target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
 216{
 217    target_ulong mask = 0xff;
 218    target_ulong ra = 0;
 219    int i;
 220
 221    for (i = 0; i < sizeof(target_ulong); i++) {
 222        if ((rs & mask) == (rb & mask)) {
 223            ra |= mask;
 224        }
 225        mask <<= 8;
 226    }
 227    return ra;
 228}
 229
 230/* shift right arithmetic helper */
 231target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
 232                         target_ulong shift)
 233{
 234    int32_t ret;
 235
 236    if (likely(!(shift & 0x20))) {
 237        if (likely((uint32_t)shift != 0)) {
 238            shift &= 0x1f;
 239            ret = (int32_t)value >> shift;
 240            if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
 241                env->ca32 = env->ca = 0;
 242            } else {
 243                env->ca32 = env->ca = 1;
 244            }
 245        } else {
 246            ret = (int32_t)value;
 247            env->ca32 = env->ca = 0;
 248        }
 249    } else {
 250        ret = (int32_t)value >> 31;
 251        env->ca32 = env->ca = (ret != 0);
 252    }
 253    return (target_long)ret;
 254}
 255
 256#if defined(TARGET_PPC64)
 257target_ulong helper_srad(CPUPPCState *env, target_ulong value,
 258                         target_ulong shift)
 259{
 260    int64_t ret;
 261
 262    if (likely(!(shift & 0x40))) {
 263        if (likely((uint64_t)shift != 0)) {
 264            shift &= 0x3f;
 265            ret = (int64_t)value >> shift;
 266            if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
 267                env->ca32 = env->ca = 0;
 268            } else {
 269                env->ca32 = env->ca = 1;
 270            }
 271        } else {
 272            ret = (int64_t)value;
 273            env->ca32 = env->ca = 0;
 274        }
 275    } else {
 276        ret = (int64_t)value >> 63;
 277        env->ca32 = env->ca = (ret != 0);
 278    }
 279    return ret;
 280}
 281#endif
 282
 283#if defined(TARGET_PPC64)
 284target_ulong helper_popcntb(target_ulong val)
 285{
 286    /* Note that we don't fold past bytes */
 287    val = (val & 0x5555555555555555ULL) + ((val >>  1) &
 288                                           0x5555555555555555ULL);
 289    val = (val & 0x3333333333333333ULL) + ((val >>  2) &
 290                                           0x3333333333333333ULL);
 291    val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >>  4) &
 292                                           0x0f0f0f0f0f0f0f0fULL);
 293    return val;
 294}
 295
 296target_ulong helper_popcntw(target_ulong val)
 297{
 298    /* Note that we don't fold past words.  */
 299    val = (val & 0x5555555555555555ULL) + ((val >>  1) &
 300                                           0x5555555555555555ULL);
 301    val = (val & 0x3333333333333333ULL) + ((val >>  2) &
 302                                           0x3333333333333333ULL);
 303    val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >>  4) &
 304                                           0x0f0f0f0f0f0f0f0fULL);
 305    val = (val & 0x00ff00ff00ff00ffULL) + ((val >>  8) &
 306                                           0x00ff00ff00ff00ffULL);
 307    val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
 308                                           0x0000ffff0000ffffULL);
 309    return val;
 310}
 311#else
 312target_ulong helper_popcntb(target_ulong val)
 313{
 314    /* Note that we don't fold past bytes */
 315    val = (val & 0x55555555) + ((val >>  1) & 0x55555555);
 316    val = (val & 0x33333333) + ((val >>  2) & 0x33333333);
 317    val = (val & 0x0f0f0f0f) + ((val >>  4) & 0x0f0f0f0f);
 318    return val;
 319}
 320#endif
 321
 322/*****************************************************************************/
 323/* PowerPC 601 specific instructions (POWER bridge) */
 324target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
 325{
 326    uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
 327
 328    if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
 329        (int32_t)arg2 == 0) {
 330        env->spr[SPR_MQ] = 0;
 331        return INT32_MIN;
 332    } else {
 333        env->spr[SPR_MQ] = tmp % arg2;
 334        return  tmp / (int32_t)arg2;
 335    }
 336}
 337
 338target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
 339                         target_ulong arg2)
 340{
 341    uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
 342
 343    if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
 344        (int32_t)arg2 == 0) {
 345        env->so = env->ov = 1;
 346        env->spr[SPR_MQ] = 0;
 347        return INT32_MIN;
 348    } else {
 349        env->spr[SPR_MQ] = tmp % arg2;
 350        tmp /= (int32_t)arg2;
 351        if ((int32_t)tmp != tmp) {
 352            env->so = env->ov = 1;
 353        } else {
 354            env->ov = 0;
 355        }
 356        return tmp;
 357    }
 358}
 359
 360target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
 361                         target_ulong arg2)
 362{
 363    if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
 364        (int32_t)arg2 == 0) {
 365        env->spr[SPR_MQ] = 0;
 366        return INT32_MIN;
 367    } else {
 368        env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
 369        return (int32_t)arg1 / (int32_t)arg2;
 370    }
 371}
 372
 373target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
 374                          target_ulong arg2)
 375{
 376    if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
 377        (int32_t)arg2 == 0) {
 378        env->so = env->ov = 1;
 379        env->spr[SPR_MQ] = 0;
 380        return INT32_MIN;
 381    } else {
 382        env->ov = 0;
 383        env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
 384        return (int32_t)arg1 / (int32_t)arg2;
 385    }
 386}
 387
 388/*****************************************************************************/
 389/* 602 specific instructions */
 390/* mfrom is the most crazy instruction ever seen, imho ! */
 391/* Real implementation uses a ROM table. Do the same */
 392/*
 393 * Extremely decomposed:
 394 *                      -arg / 256
 395 * return 256 * log10(10           + 1.0) + 0.5
 396 */
 397#if !defined(CONFIG_USER_ONLY)
 398target_ulong helper_602_mfrom(target_ulong arg)
 399{
 400    if (likely(arg < 602)) {
 401#include "mfrom_table.inc.c"
 402        return mfrom_ROM_table[arg];
 403    } else {
 404        return 0;
 405    }
 406}
 407#endif
 408
 409/*****************************************************************************/
 410/* Altivec extension helpers */
 411#if defined(HOST_WORDS_BIGENDIAN)
 412#define VECTOR_FOR_INORDER_I(index, element)                    \
 413    for (index = 0; index < ARRAY_SIZE(r->element); index++)
 414#else
 415#define VECTOR_FOR_INORDER_I(index, element)                    \
 416    for (index = ARRAY_SIZE(r->element) - 1; index >= 0; index--)
 417#endif
 418
 419/* Saturating arithmetic helpers.  */
 420#define SATCVT(from, to, from_type, to_type, min, max)          \
 421    static inline to_type cvt##from##to(from_type x, int *sat)  \
 422    {                                                           \
 423        to_type r;                                              \
 424                                                                \
 425        if (x < (from_type)min) {                               \
 426            r = min;                                            \
 427            *sat = 1;                                           \
 428        } else if (x > (from_type)max) {                        \
 429            r = max;                                            \
 430            *sat = 1;                                           \
 431        } else {                                                \
 432            r = x;                                              \
 433        }                                                       \
 434        return r;                                               \
 435    }
 436#define SATCVTU(from, to, from_type, to_type, min, max)         \
 437    static inline to_type cvt##from##to(from_type x, int *sat)  \
 438    {                                                           \
 439        to_type r;                                              \
 440                                                                \
 441        if (x > (from_type)max) {                               \
 442            r = max;                                            \
 443            *sat = 1;                                           \
 444        } else {                                                \
 445            r = x;                                              \
 446        }                                                       \
 447        return r;                                               \
 448    }
 449SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
 450SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
 451SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
 452
 453SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
 454SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
 455SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
 456SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
 457SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
 458SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
 459#undef SATCVT
 460#undef SATCVTU
 461
 462void helper_mtvscr(CPUPPCState *env, uint32_t vscr)
 463{
 464    env->vscr = vscr & ~(1u << VSCR_SAT);
 465    /* Which bit we set is completely arbitrary, but clear the rest.  */
 466    env->vscr_sat.u64[0] = vscr & (1u << VSCR_SAT);
 467    env->vscr_sat.u64[1] = 0;
 468    set_flush_to_zero((vscr >> VSCR_NJ) & 1, &env->vec_status);
 469}
 470
 471uint32_t helper_mfvscr(CPUPPCState *env)
 472{
 473    uint32_t sat = (env->vscr_sat.u64[0] | env->vscr_sat.u64[1]) != 0;
 474    return env->vscr | (sat << VSCR_SAT);
 475}
 476
 477static inline void set_vscr_sat(CPUPPCState *env)
 478{
 479    /* The choice of non-zero value is arbitrary.  */
 480    env->vscr_sat.u32[0] = 1;
 481}
 482
 483void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
 484{
 485    int i;
 486
 487    for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
 488        r->u32[i] = ~a->u32[i] < b->u32[i];
 489    }
 490}
 491
 492/* vprtybw */
 493void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b)
 494{
 495    int i;
 496    for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
 497        uint64_t res = b->u32[i] ^ (b->u32[i] >> 16);
 498        res ^= res >> 8;
 499        r->u32[i] = res & 1;
 500    }
 501}
 502
 503/* vprtybd */
 504void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b)
 505{
 506    int i;
 507    for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
 508        uint64_t res = b->u64[i] ^ (b->u64[i] >> 32);
 509        res ^= res >> 16;
 510        res ^= res >> 8;
 511        r->u64[i] = res & 1;
 512    }
 513}
 514
 515/* vprtybq */
 516void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b)
 517{
 518    uint64_t res = b->u64[0] ^ b->u64[1];
 519    res ^= res >> 32;
 520    res ^= res >> 16;
 521    res ^= res >> 8;
 522    r->VsrD(1) = res & 1;
 523    r->VsrD(0) = 0;
 524}
 525
 526#define VARITH_DO(name, op, element)                                    \
 527    void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
 528    {                                                                   \
 529        int i;                                                          \
 530                                                                        \
 531        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
 532            r->element[i] = a->element[i] op b->element[i];             \
 533        }                                                               \
 534    }
 535VARITH_DO(muluwm, *, u32)
 536#undef VARITH_DO
 537#undef VARITH
 538
 539#define VARITHFP(suffix, func)                                          \
 540    void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
 541                          ppc_avr_t *b)                                 \
 542    {                                                                   \
 543        int i;                                                          \
 544                                                                        \
 545        for (i = 0; i < ARRAY_SIZE(r->f32); i++) {                      \
 546            r->f32[i] = func(a->f32[i], b->f32[i], &env->vec_status);   \
 547        }                                                               \
 548    }
 549VARITHFP(addfp, float32_add)
 550VARITHFP(subfp, float32_sub)
 551VARITHFP(minfp, float32_min)
 552VARITHFP(maxfp, float32_max)
 553#undef VARITHFP
 554
 555#define VARITHFPFMA(suffix, type)                                       \
 556    void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
 557                           ppc_avr_t *b, ppc_avr_t *c)                  \
 558    {                                                                   \
 559        int i;                                                          \
 560        for (i = 0; i < ARRAY_SIZE(r->f32); i++) {                      \
 561            r->f32[i] = float32_muladd(a->f32[i], c->f32[i], b->f32[i], \
 562                                       type, &env->vec_status);         \
 563        }                                                               \
 564    }
 565VARITHFPFMA(maddfp, 0);
 566VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
 567#undef VARITHFPFMA
 568
 569#define VARITHSAT_CASE(type, op, cvt, element)                          \
 570    {                                                                   \
 571        type result = (type)a->element[i] op (type)b->element[i];       \
 572        r->element[i] = cvt(result, &sat);                              \
 573    }
 574
 575#define VARITHSAT_DO(name, op, optype, cvt, element)                    \
 576    void helper_v##name(ppc_avr_t *r, ppc_avr_t *vscr_sat,              \
 577                        ppc_avr_t *a, ppc_avr_t *b, uint32_t desc)      \
 578    {                                                                   \
 579        int sat = 0;                                                    \
 580        int i;                                                          \
 581                                                                        \
 582        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
 583            VARITHSAT_CASE(optype, op, cvt, element);                   \
 584        }                                                               \
 585        if (sat) {                                                      \
 586            vscr_sat->u32[0] = 1;                                       \
 587        }                                                               \
 588    }
 589#define VARITHSAT_SIGNED(suffix, element, optype, cvt)          \
 590    VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element)      \
 591    VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
 592#define VARITHSAT_UNSIGNED(suffix, element, optype, cvt)        \
 593    VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element)      \
 594    VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
 595VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
 596VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
 597VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
 598VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
 599VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
 600VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
 601#undef VARITHSAT_CASE
 602#undef VARITHSAT_DO
 603#undef VARITHSAT_SIGNED
 604#undef VARITHSAT_UNSIGNED
 605
 606#define VAVG_DO(name, element, etype)                                   \
 607    void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
 608    {                                                                   \
 609        int i;                                                          \
 610                                                                        \
 611        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
 612            etype x = (etype)a->element[i] + (etype)b->element[i] + 1;  \
 613            r->element[i] = x >> 1;                                     \
 614        }                                                               \
 615    }
 616
 617#define VAVG(type, signed_element, signed_type, unsigned_element,       \
 618             unsigned_type)                                             \
 619    VAVG_DO(avgs##type, signed_element, signed_type)                    \
 620    VAVG_DO(avgu##type, unsigned_element, unsigned_type)
 621VAVG(b, s8, int16_t, u8, uint16_t)
 622VAVG(h, s16, int32_t, u16, uint32_t)
 623VAVG(w, s32, int64_t, u32, uint64_t)
 624#undef VAVG_DO
 625#undef VAVG
 626
 627#define VABSDU_DO(name, element)                                        \
 628void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)           \
 629{                                                                       \
 630    int i;                                                              \
 631                                                                        \
 632    for (i = 0; i < ARRAY_SIZE(r->element); i++) {                      \
 633        r->element[i] = (a->element[i] > b->element[i]) ?               \
 634            (a->element[i] - b->element[i]) :                           \
 635            (b->element[i] - a->element[i]);                            \
 636    }                                                                   \
 637}
 638
 639/*
 640 * VABSDU - Vector absolute difference unsigned
 641 *   name    - instruction mnemonic suffix (b: byte, h: halfword, w: word)
 642 *   element - element type to access from vector
 643 */
 644#define VABSDU(type, element)                   \
 645    VABSDU_DO(absdu##type, element)
 646VABSDU(b, u8)
 647VABSDU(h, u16)
 648VABSDU(w, u32)
 649#undef VABSDU_DO
 650#undef VABSDU
 651
 652#define VCF(suffix, cvt, element)                                       \
 653    void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r,             \
 654                            ppc_avr_t *b, uint32_t uim)                 \
 655    {                                                                   \
 656        int i;                                                          \
 657                                                                        \
 658        for (i = 0; i < ARRAY_SIZE(r->f32); i++) {                      \
 659            float32 t = cvt(b->element[i], &env->vec_status);           \
 660            r->f32[i] = float32_scalbn(t, -uim, &env->vec_status);      \
 661        }                                                               \
 662    }
 663VCF(ux, uint32_to_float32, u32)
 664VCF(sx, int32_to_float32, s32)
 665#undef VCF
 666
 667#define VCMP_DO(suffix, compare, element, record)                       \
 668    void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r,            \
 669                             ppc_avr_t *a, ppc_avr_t *b)                \
 670    {                                                                   \
 671        uint64_t ones = (uint64_t)-1;                                   \
 672        uint64_t all = ones;                                            \
 673        uint64_t none = 0;                                              \
 674        int i;                                                          \
 675                                                                        \
 676        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
 677            uint64_t result = (a->element[i] compare b->element[i] ?    \
 678                               ones : 0x0);                             \
 679            switch (sizeof(a->element[0])) {                            \
 680            case 8:                                                     \
 681                r->u64[i] = result;                                     \
 682                break;                                                  \
 683            case 4:                                                     \
 684                r->u32[i] = result;                                     \
 685                break;                                                  \
 686            case 2:                                                     \
 687                r->u16[i] = result;                                     \
 688                break;                                                  \
 689            case 1:                                                     \
 690                r->u8[i] = result;                                      \
 691                break;                                                  \
 692            }                                                           \
 693            all &= result;                                              \
 694            none |= result;                                             \
 695        }                                                               \
 696        if (record) {                                                   \
 697            env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1);       \
 698        }                                                               \
 699    }
 700#define VCMP(suffix, compare, element)          \
 701    VCMP_DO(suffix, compare, element, 0)        \
 702    VCMP_DO(suffix##_dot, compare, element, 1)
 703VCMP(equb, ==, u8)
 704VCMP(equh, ==, u16)
 705VCMP(equw, ==, u32)
 706VCMP(equd, ==, u64)
 707VCMP(gtub, >, u8)
 708VCMP(gtuh, >, u16)
 709VCMP(gtuw, >, u32)
 710VCMP(gtud, >, u64)
 711VCMP(gtsb, >, s8)
 712VCMP(gtsh, >, s16)
 713VCMP(gtsw, >, s32)
 714VCMP(gtsd, >, s64)
 715#undef VCMP_DO
 716#undef VCMP
 717
 718#define VCMPNE_DO(suffix, element, etype, cmpzero, record)              \
 719void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r,              \
 720                            ppc_avr_t *a, ppc_avr_t *b)                 \
 721{                                                                       \
 722    etype ones = (etype)-1;                                             \
 723    etype all = ones;                                                   \
 724    etype result, none = 0;                                             \
 725    int i;                                                              \
 726                                                                        \
 727    for (i = 0; i < ARRAY_SIZE(r->element); i++) {                      \
 728        if (cmpzero) {                                                  \
 729            result = ((a->element[i] == 0)                              \
 730                           || (b->element[i] == 0)                      \
 731                           || (a->element[i] != b->element[i]) ?        \
 732                           ones : 0x0);                                 \
 733        } else {                                                        \
 734            result = (a->element[i] != b->element[i]) ? ones : 0x0;     \
 735        }                                                               \
 736        r->element[i] = result;                                         \
 737        all &= result;                                                  \
 738        none |= result;                                                 \
 739    }                                                                   \
 740    if (record) {                                                       \
 741        env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1);           \
 742    }                                                                   \
 743}
 744
 745/*
 746 * VCMPNEZ - Vector compare not equal to zero
 747 *   suffix  - instruction mnemonic suffix (b: byte, h: halfword, w: word)
 748 *   element - element type to access from vector
 749 */
 750#define VCMPNE(suffix, element, etype, cmpzero)         \
 751    VCMPNE_DO(suffix, element, etype, cmpzero, 0)       \
 752    VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1)
 753VCMPNE(zb, u8, uint8_t, 1)
 754VCMPNE(zh, u16, uint16_t, 1)
 755VCMPNE(zw, u32, uint32_t, 1)
 756VCMPNE(b, u8, uint8_t, 0)
 757VCMPNE(h, u16, uint16_t, 0)
 758VCMPNE(w, u32, uint32_t, 0)
 759#undef VCMPNE_DO
 760#undef VCMPNE
 761
 762#define VCMPFP_DO(suffix, compare, order, record)                       \
 763    void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r,            \
 764                             ppc_avr_t *a, ppc_avr_t *b)                \
 765    {                                                                   \
 766        uint32_t ones = (uint32_t)-1;                                   \
 767        uint32_t all = ones;                                            \
 768        uint32_t none = 0;                                              \
 769        int i;                                                          \
 770                                                                        \
 771        for (i = 0; i < ARRAY_SIZE(r->f32); i++) {                      \
 772            uint32_t result;                                            \
 773            int rel = float32_compare_quiet(a->f32[i], b->f32[i],       \
 774                                            &env->vec_status);          \
 775            if (rel == float_relation_unordered) {                      \
 776                result = 0;                                             \
 777            } else if (rel compare order) {                             \
 778                result = ones;                                          \
 779            } else {                                                    \
 780                result = 0;                                             \
 781            }                                                           \
 782            r->u32[i] = result;                                         \
 783            all &= result;                                              \
 784            none |= result;                                             \
 785        }                                                               \
 786        if (record) {                                                   \
 787            env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1);       \
 788        }                                                               \
 789    }
 790#define VCMPFP(suffix, compare, order)          \
 791    VCMPFP_DO(suffix, compare, order, 0)        \
 792    VCMPFP_DO(suffix##_dot, compare, order, 1)
 793VCMPFP(eqfp, ==, float_relation_equal)
 794VCMPFP(gefp, !=, float_relation_less)
 795VCMPFP(gtfp, ==, float_relation_greater)
 796#undef VCMPFP_DO
 797#undef VCMPFP
 798
 799static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
 800                                    ppc_avr_t *a, ppc_avr_t *b, int record)
 801{
 802    int i;
 803    int all_in = 0;
 804
 805    for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
 806        int le_rel = float32_compare_quiet(a->f32[i], b->f32[i],
 807                                           &env->vec_status);
 808        if (le_rel == float_relation_unordered) {
 809            r->u32[i] = 0xc0000000;
 810            all_in = 1;
 811        } else {
 812            float32 bneg = float32_chs(b->f32[i]);
 813            int ge_rel = float32_compare_quiet(a->f32[i], bneg,
 814                                               &env->vec_status);
 815            int le = le_rel != float_relation_greater;
 816            int ge = ge_rel != float_relation_less;
 817
 818            r->u32[i] = ((!le) << 31) | ((!ge) << 30);
 819            all_in |= (!le | !ge);
 820        }
 821    }
 822    if (record) {
 823        env->crf[6] = (all_in == 0) << 1;
 824    }
 825}
 826
 827void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
 828{
 829    vcmpbfp_internal(env, r, a, b, 0);
 830}
 831
 832void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
 833                        ppc_avr_t *b)
 834{
 835    vcmpbfp_internal(env, r, a, b, 1);
 836}
 837
 838#define VCT(suffix, satcvt, element)                                    \
 839    void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r,             \
 840                            ppc_avr_t *b, uint32_t uim)                 \
 841    {                                                                   \
 842        int i;                                                          \
 843        int sat = 0;                                                    \
 844        float_status s = env->vec_status;                               \
 845                                                                        \
 846        set_float_rounding_mode(float_round_to_zero, &s);               \
 847        for (i = 0; i < ARRAY_SIZE(r->f32); i++) {                      \
 848            if (float32_is_any_nan(b->f32[i])) {                        \
 849                r->element[i] = 0;                                      \
 850            } else {                                                    \
 851                float64 t = float32_to_float64(b->f32[i], &s);          \
 852                int64_t j;                                              \
 853                                                                        \
 854                t = float64_scalbn(t, uim, &s);                         \
 855                j = float64_to_int64(t, &s);                            \
 856                r->element[i] = satcvt(j, &sat);                        \
 857            }                                                           \
 858        }                                                               \
 859        if (sat) {                                                      \
 860            set_vscr_sat(env);                                          \
 861        }                                                               \
 862    }
 863VCT(uxs, cvtsduw, u32)
 864VCT(sxs, cvtsdsw, s32)
 865#undef VCT
 866
 867target_ulong helper_vclzlsbb(ppc_avr_t *r)
 868{
 869    target_ulong count = 0;
 870    int i;
 871    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
 872        if (r->VsrB(i) & 0x01) {
 873            break;
 874        }
 875        count++;
 876    }
 877    return count;
 878}
 879
 880target_ulong helper_vctzlsbb(ppc_avr_t *r)
 881{
 882    target_ulong count = 0;
 883    int i;
 884    for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
 885        if (r->VsrB(i) & 0x01) {
 886            break;
 887        }
 888        count++;
 889    }
 890    return count;
 891}
 892
 893void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
 894                      ppc_avr_t *b, ppc_avr_t *c)
 895{
 896    int sat = 0;
 897    int i;
 898
 899    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
 900        int32_t prod = a->s16[i] * b->s16[i];
 901        int32_t t = (int32_t)c->s16[i] + (prod >> 15);
 902
 903        r->s16[i] = cvtswsh(t, &sat);
 904    }
 905
 906    if (sat) {
 907        set_vscr_sat(env);
 908    }
 909}
 910
 911void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
 912                       ppc_avr_t *b, ppc_avr_t *c)
 913{
 914    int sat = 0;
 915    int i;
 916
 917    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
 918        int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
 919        int32_t t = (int32_t)c->s16[i] + (prod >> 15);
 920        r->s16[i] = cvtswsh(t, &sat);
 921    }
 922
 923    if (sat) {
 924        set_vscr_sat(env);
 925    }
 926}
 927
 928void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
 929{
 930    int i;
 931
 932    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
 933        int32_t prod = a->s16[i] * b->s16[i];
 934        r->s16[i] = (int16_t) (prod + c->s16[i]);
 935    }
 936}
 937
 938#define VMRG_DO(name, element, access, ofs)                                  \
 939    void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)            \
 940    {                                                                        \
 941        ppc_avr_t result;                                                    \
 942        int i, half = ARRAY_SIZE(r->element) / 2;                            \
 943                                                                             \
 944        for (i = 0; i < half; i++) {                                         \
 945            result.access(i * 2 + 0) = a->access(i + ofs);                   \
 946            result.access(i * 2 + 1) = b->access(i + ofs);                   \
 947        }                                                                    \
 948        *r = result;                                                         \
 949    }
 950
 951#define VMRG(suffix, element, access)          \
 952    VMRG_DO(mrgl##suffix, element, access, half)   \
 953    VMRG_DO(mrgh##suffix, element, access, 0)
 954VMRG(b, u8, VsrB)
 955VMRG(h, u16, VsrH)
 956VMRG(w, u32, VsrW)
 957#undef VMRG_DO
 958#undef VMRG
 959
 960void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
 961                     ppc_avr_t *b, ppc_avr_t *c)
 962{
 963    int32_t prod[16];
 964    int i;
 965
 966    for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
 967        prod[i] = (int32_t)a->s8[i] * b->u8[i];
 968    }
 969
 970    VECTOR_FOR_INORDER_I(i, s32) {
 971        r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
 972            prod[4 * i + 2] + prod[4 * i + 3];
 973    }
 974}
 975
 976void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
 977                     ppc_avr_t *b, ppc_avr_t *c)
 978{
 979    int32_t prod[8];
 980    int i;
 981
 982    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
 983        prod[i] = a->s16[i] * b->s16[i];
 984    }
 985
 986    VECTOR_FOR_INORDER_I(i, s32) {
 987        r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
 988    }
 989}
 990
 991void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
 992                     ppc_avr_t *b, ppc_avr_t *c)
 993{
 994    int32_t prod[8];
 995    int i;
 996    int sat = 0;
 997
 998    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
 999        prod[i] = (int32_t)a->s16[i] * b->s16[i];
1000    }
1001
1002    VECTOR_FOR_INORDER_I(i, s32) {
1003        int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1004
1005        r->u32[i] = cvtsdsw(t, &sat);
1006    }
1007
1008    if (sat) {
1009        set_vscr_sat(env);
1010    }
1011}
1012
1013void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1014                     ppc_avr_t *b, ppc_avr_t *c)
1015{
1016    uint16_t prod[16];
1017    int i;
1018
1019    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1020        prod[i] = a->u8[i] * b->u8[i];
1021    }
1022
1023    VECTOR_FOR_INORDER_I(i, u32) {
1024        r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
1025            prod[4 * i + 2] + prod[4 * i + 3];
1026    }
1027}
1028
1029void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1030                     ppc_avr_t *b, ppc_avr_t *c)
1031{
1032    uint32_t prod[8];
1033    int i;
1034
1035    for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1036        prod[i] = a->u16[i] * b->u16[i];
1037    }
1038
1039    VECTOR_FOR_INORDER_I(i, u32) {
1040        r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1041    }
1042}
1043
1044void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1045                     ppc_avr_t *b, ppc_avr_t *c)
1046{
1047    uint32_t prod[8];
1048    int i;
1049    int sat = 0;
1050
1051    for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1052        prod[i] = a->u16[i] * b->u16[i];
1053    }
1054
1055    VECTOR_FOR_INORDER_I(i, s32) {
1056        uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1057
1058        r->u32[i] = cvtuduw(t, &sat);
1059    }
1060
1061    if (sat) {
1062        set_vscr_sat(env);
1063    }
1064}
1065
1066#define VMUL_DO_EVN(name, mul_element, mul_access, prod_access, cast)   \
1067    void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
1068    {                                                                   \
1069        int i;                                                          \
1070                                                                        \
1071        for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) {           \
1072            r->prod_access(i >> 1) = (cast)a->mul_access(i) *           \
1073                                     (cast)b->mul_access(i);            \
1074        }                                                               \
1075    }
1076
1077#define VMUL_DO_ODD(name, mul_element, mul_access, prod_access, cast)   \
1078    void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
1079    {                                                                   \
1080        int i;                                                          \
1081                                                                        \
1082        for (i = 0; i < ARRAY_SIZE(r->mul_element); i += 2) {           \
1083            r->prod_access(i >> 1) = (cast)a->mul_access(i + 1) *       \
1084                                     (cast)b->mul_access(i + 1);        \
1085        }                                                               \
1086    }
1087
1088#define VMUL(suffix, mul_element, mul_access, prod_access, cast)       \
1089    VMUL_DO_EVN(mule##suffix, mul_element, mul_access, prod_access, cast)  \
1090    VMUL_DO_ODD(mulo##suffix, mul_element, mul_access, prod_access, cast)
1091VMUL(sb, s8, VsrSB, VsrSH, int16_t)
1092VMUL(sh, s16, VsrSH, VsrSW, int32_t)
1093VMUL(sw, s32, VsrSW, VsrSD, int64_t)
1094VMUL(ub, u8, VsrB, VsrH, uint16_t)
1095VMUL(uh, u16, VsrH, VsrW, uint32_t)
1096VMUL(uw, u32, VsrW, VsrD, uint64_t)
1097#undef VMUL_DO_EVN
1098#undef VMUL_DO_ODD
1099#undef VMUL
1100
1101void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1102                  ppc_avr_t *c)
1103{
1104    ppc_avr_t result;
1105    int i;
1106
1107    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1108        int s = c->VsrB(i) & 0x1f;
1109        int index = s & 0xf;
1110
1111        if (s & 0x10) {
1112            result.VsrB(i) = b->VsrB(index);
1113        } else {
1114            result.VsrB(i) = a->VsrB(index);
1115        }
1116    }
1117    *r = result;
1118}
1119
1120void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1121                  ppc_avr_t *c)
1122{
1123    ppc_avr_t result;
1124    int i;
1125
1126    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1127        int s = c->VsrB(i) & 0x1f;
1128        int index = 15 - (s & 0xf);
1129
1130        if (s & 0x10) {
1131            result.VsrB(i) = a->VsrB(index);
1132        } else {
1133            result.VsrB(i) = b->VsrB(index);
1134        }
1135    }
1136    *r = result;
1137}
1138
1139#if defined(HOST_WORDS_BIGENDIAN)
1140#define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1141#define VBPERMD_INDEX(i) (i)
1142#define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1143#define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
1144#else
1145#define VBPERMQ_INDEX(avr, i) ((avr)->u8[15 - (i)])
1146#define VBPERMD_INDEX(i) (1 - i)
1147#define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1148#define EXTRACT_BIT(avr, i, index) \
1149        (extract64((avr)->u64[1 - i], 63 - index, 1))
1150#endif
1151
1152void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1153{
1154    int i, j;
1155    ppc_avr_t result = { .u64 = { 0, 0 } };
1156    VECTOR_FOR_INORDER_I(i, u64) {
1157        for (j = 0; j < 8; j++) {
1158            int index = VBPERMQ_INDEX(b, (i * 8) + j);
1159            if (index < 64 && EXTRACT_BIT(a, i, index)) {
1160                result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1161            }
1162        }
1163    }
1164    *r = result;
1165}
1166
1167void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1168{
1169    int i;
1170    uint64_t perm = 0;
1171
1172    VECTOR_FOR_INORDER_I(i, u8) {
1173        int index = VBPERMQ_INDEX(b, i);
1174
1175        if (index < 128) {
1176            uint64_t mask = (1ull << (63 - (index & 0x3F)));
1177            if (a->u64[VBPERMQ_DW(index)] & mask) {
1178                perm |= (0x8000 >> i);
1179            }
1180        }
1181    }
1182
1183    r->VsrD(0) = perm;
1184    r->VsrD(1) = 0;
1185}
1186
1187#undef VBPERMQ_INDEX
1188#undef VBPERMQ_DW
1189
1190#define PMSUM(name, srcfld, trgfld, trgtyp)                   \
1191void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)  \
1192{                                                             \
1193    int i, j;                                                 \
1194    trgtyp prod[sizeof(ppc_avr_t) / sizeof(a->srcfld[0])];    \
1195                                                              \
1196    VECTOR_FOR_INORDER_I(i, srcfld) {                         \
1197        prod[i] = 0;                                          \
1198        for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) {      \
1199            if (a->srcfld[i] & (1ull << j)) {                 \
1200                prod[i] ^= ((trgtyp)b->srcfld[i] << j);       \
1201            }                                                 \
1202        }                                                     \
1203    }                                                         \
1204                                                              \
1205    VECTOR_FOR_INORDER_I(i, trgfld) {                         \
1206        r->trgfld[i] = prod[2 * i] ^ prod[2 * i + 1];         \
1207    }                                                         \
1208}
1209
1210PMSUM(vpmsumb, u8, u16, uint16_t)
1211PMSUM(vpmsumh, u16, u32, uint32_t)
1212PMSUM(vpmsumw, u32, u64, uint64_t)
1213
1214void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1215{
1216
1217#ifdef CONFIG_INT128
1218    int i, j;
1219    __uint128_t prod[2];
1220
1221    VECTOR_FOR_INORDER_I(i, u64) {
1222        prod[i] = 0;
1223        for (j = 0; j < 64; j++) {
1224            if (a->u64[i] & (1ull << j)) {
1225                prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1226            }
1227        }
1228    }
1229
1230    r->u128 = prod[0] ^ prod[1];
1231
1232#else
1233    int i, j;
1234    ppc_avr_t prod[2];
1235
1236    VECTOR_FOR_INORDER_I(i, u64) {
1237        prod[i].VsrD(1) = prod[i].VsrD(0) = 0;
1238        for (j = 0; j < 64; j++) {
1239            if (a->u64[i] & (1ull << j)) {
1240                ppc_avr_t bshift;
1241                if (j == 0) {
1242                    bshift.VsrD(0) = 0;
1243                    bshift.VsrD(1) = b->u64[i];
1244                } else {
1245                    bshift.VsrD(0) = b->u64[i] >> (64 - j);
1246                    bshift.VsrD(1) = b->u64[i] << j;
1247                }
1248                prod[i].VsrD(1) ^= bshift.VsrD(1);
1249                prod[i].VsrD(0) ^= bshift.VsrD(0);
1250            }
1251        }
1252    }
1253
1254    r->VsrD(1) = prod[0].VsrD(1) ^ prod[1].VsrD(1);
1255    r->VsrD(0) = prod[0].VsrD(0) ^ prod[1].VsrD(0);
1256#endif
1257}
1258
1259
1260#if defined(HOST_WORDS_BIGENDIAN)
1261#define PKBIG 1
1262#else
1263#define PKBIG 0
1264#endif
1265void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1266{
1267    int i, j;
1268    ppc_avr_t result;
1269#if defined(HOST_WORDS_BIGENDIAN)
1270    const ppc_avr_t *x[2] = { a, b };
1271#else
1272    const ppc_avr_t *x[2] = { b, a };
1273#endif
1274
1275    VECTOR_FOR_INORDER_I(i, u64) {
1276        VECTOR_FOR_INORDER_I(j, u32) {
1277            uint32_t e = x[i]->u32[j];
1278
1279            result.u16[4 * i + j] = (((e >> 9) & 0xfc00) |
1280                                     ((e >> 6) & 0x3e0) |
1281                                     ((e >> 3) & 0x1f));
1282        }
1283    }
1284    *r = result;
1285}
1286
1287#define VPK(suffix, from, to, cvt, dosat)                               \
1288    void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r,             \
1289                            ppc_avr_t *a, ppc_avr_t *b)                 \
1290    {                                                                   \
1291        int i;                                                          \
1292        int sat = 0;                                                    \
1293        ppc_avr_t result;                                               \
1294        ppc_avr_t *a0 = PKBIG ? a : b;                                  \
1295        ppc_avr_t *a1 = PKBIG ? b : a;                                  \
1296                                                                        \
1297        VECTOR_FOR_INORDER_I(i, from) {                                 \
1298            result.to[i] = cvt(a0->from[i], &sat);                      \
1299            result.to[i + ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);\
1300        }                                                               \
1301        *r = result;                                                    \
1302        if (dosat && sat) {                                             \
1303            set_vscr_sat(env);                                          \
1304        }                                                               \
1305    }
1306#define I(x, y) (x)
1307VPK(shss, s16, s8, cvtshsb, 1)
1308VPK(shus, s16, u8, cvtshub, 1)
1309VPK(swss, s32, s16, cvtswsh, 1)
1310VPK(swus, s32, u16, cvtswuh, 1)
1311VPK(sdss, s64, s32, cvtsdsw, 1)
1312VPK(sdus, s64, u32, cvtsduw, 1)
1313VPK(uhus, u16, u8, cvtuhub, 1)
1314VPK(uwus, u32, u16, cvtuwuh, 1)
1315VPK(udus, u64, u32, cvtuduw, 1)
1316VPK(uhum, u16, u8, I, 0)
1317VPK(uwum, u32, u16, I, 0)
1318VPK(udum, u64, u32, I, 0)
1319#undef I
1320#undef VPK
1321#undef PKBIG
1322
1323void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1324{
1325    int i;
1326
1327    for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1328        r->f32[i] = float32_div(float32_one, b->f32[i], &env->vec_status);
1329    }
1330}
1331
1332#define VRFI(suffix, rounding)                                  \
1333    void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r,    \
1334                             ppc_avr_t *b)                      \
1335    {                                                           \
1336        int i;                                                  \
1337        float_status s = env->vec_status;                       \
1338                                                                \
1339        set_float_rounding_mode(rounding, &s);                  \
1340        for (i = 0; i < ARRAY_SIZE(r->f32); i++) {              \
1341            r->f32[i] = float32_round_to_int (b->f32[i], &s);   \
1342        }                                                       \
1343    }
1344VRFI(n, float_round_nearest_even)
1345VRFI(m, float_round_down)
1346VRFI(p, float_round_up)
1347VRFI(z, float_round_to_zero)
1348#undef VRFI
1349
1350#define VROTATE(suffix, element, mask)                                  \
1351    void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)   \
1352    {                                                                   \
1353        int i;                                                          \
1354                                                                        \
1355        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
1356            unsigned int shift = b->element[i] & mask;                  \
1357            r->element[i] = (a->element[i] << shift) |                  \
1358                (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1359        }                                                               \
1360    }
1361VROTATE(b, u8, 0x7)
1362VROTATE(h, u16, 0xF)
1363VROTATE(w, u32, 0x1F)
1364VROTATE(d, u64, 0x3F)
1365#undef VROTATE
1366
1367void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1368{
1369    int i;
1370
1371    for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1372        float32 t = float32_sqrt(b->f32[i], &env->vec_status);
1373
1374        r->f32[i] = float32_div(float32_one, t, &env->vec_status);
1375    }
1376}
1377
1378#define VRLMI(name, size, element, insert)                            \
1379void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)          \
1380{                                                                     \
1381    int i;                                                            \
1382    for (i = 0; i < ARRAY_SIZE(r->element); i++) {                    \
1383        uint##size##_t src1 = a->element[i];                          \
1384        uint##size##_t src2 = b->element[i];                          \
1385        uint##size##_t src3 = r->element[i];                          \
1386        uint##size##_t begin, end, shift, mask, rot_val;              \
1387                                                                      \
1388        shift = extract##size(src2, 0, 6);                            \
1389        end   = extract##size(src2, 8, 6);                            \
1390        begin = extract##size(src2, 16, 6);                           \
1391        rot_val = rol##size(src1, shift);                             \
1392        mask = mask_u##size(begin, end);                              \
1393        if (insert) {                                                 \
1394            r->element[i] = (rot_val & mask) | (src3 & ~mask);        \
1395        } else {                                                      \
1396            r->element[i] = (rot_val & mask);                         \
1397        }                                                             \
1398    }                                                                 \
1399}
1400
1401VRLMI(vrldmi, 64, u64, 1);
1402VRLMI(vrlwmi, 32, u32, 1);
1403VRLMI(vrldnm, 64, u64, 0);
1404VRLMI(vrlwnm, 32, u32, 0);
1405
1406void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1407                 ppc_avr_t *c)
1408{
1409    r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1410    r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1411}
1412
1413void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1414{
1415    int i;
1416
1417    for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1418        r->f32[i] = float32_exp2(b->f32[i], &env->vec_status);
1419    }
1420}
1421
1422void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1423{
1424    int i;
1425
1426    for (i = 0; i < ARRAY_SIZE(r->f32); i++) {
1427        r->f32[i] = float32_log2(b->f32[i], &env->vec_status);
1428    }
1429}
1430
1431#if defined(HOST_WORDS_BIGENDIAN)
1432#define VEXTU_X_DO(name, size, left)                                \
1433    target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b)  \
1434    {                                                               \
1435        int index;                                                  \
1436        if (left) {                                                 \
1437            index = (a & 0xf) * 8;                                  \
1438        } else {                                                    \
1439            index = ((15 - (a & 0xf) + 1) * 8) - size;              \
1440        }                                                           \
1441        return int128_getlo(int128_rshift(b->s128, index)) &        \
1442            MAKE_64BIT_MASK(0, size);                               \
1443    }
1444#else
1445#define VEXTU_X_DO(name, size, left)                                \
1446    target_ulong glue(helper_, name)(target_ulong a, ppc_avr_t *b)  \
1447    {                                                               \
1448        int index;                                                  \
1449        if (left) {                                                 \
1450            index = ((15 - (a & 0xf) + 1) * 8) - size;              \
1451        } else {                                                    \
1452            index = (a & 0xf) * 8;                                  \
1453        }                                                           \
1454        return int128_getlo(int128_rshift(b->s128, index)) &        \
1455            MAKE_64BIT_MASK(0, size);                               \
1456    }
1457#endif
1458
1459VEXTU_X_DO(vextublx,  8, 1)
1460VEXTU_X_DO(vextuhlx, 16, 1)
1461VEXTU_X_DO(vextuwlx, 32, 1)
1462VEXTU_X_DO(vextubrx,  8, 0)
1463VEXTU_X_DO(vextuhrx, 16, 0)
1464VEXTU_X_DO(vextuwrx, 32, 0)
1465#undef VEXTU_X_DO
1466
1467void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1468{
1469    int i;
1470    unsigned int shift, bytes, size;
1471
1472    size = ARRAY_SIZE(r->u8);
1473    for (i = 0; i < size; i++) {
1474        shift = b->VsrB(i) & 0x7;             /* extract shift value */
1475        bytes = (a->VsrB(i) << 8) +           /* extract adjacent bytes */
1476            (((i + 1) < size) ? a->VsrB(i + 1) : 0);
1477        r->VsrB(i) = (bytes << shift) >> 8;   /* shift and store result */
1478    }
1479}
1480
1481void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1482{
1483    int i;
1484    unsigned int shift, bytes;
1485
1486    /*
1487     * Use reverse order, as destination and source register can be
1488     * same. Its being modified in place saving temporary, reverse
1489     * order will guarantee that computed result is not fed back.
1490     */
1491    for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1492        shift = b->VsrB(i) & 0x7;               /* extract shift value */
1493        bytes = ((i ? a->VsrB(i - 1) : 0) << 8) + a->VsrB(i);
1494                                                /* extract adjacent bytes */
1495        r->VsrB(i) = (bytes >> shift) & 0xFF;   /* shift and store result */
1496    }
1497}
1498
1499void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1500{
1501    int sh = shift & 0xf;
1502    int i;
1503    ppc_avr_t result;
1504
1505    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1506        int index = sh + i;
1507        if (index > 0xf) {
1508            result.VsrB(i) = b->VsrB(index - 0x10);
1509        } else {
1510            result.VsrB(i) = a->VsrB(index);
1511        }
1512    }
1513    *r = result;
1514}
1515
1516void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1517{
1518    int sh = (b->VsrB(0xf) >> 3) & 0xf;
1519
1520#if defined(HOST_WORDS_BIGENDIAN)
1521    memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1522    memset(&r->u8[16 - sh], 0, sh);
1523#else
1524    memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1525    memset(&r->u8[0], 0, sh);
1526#endif
1527}
1528
1529#if defined(HOST_WORDS_BIGENDIAN)
1530#define VINSERT(suffix, element)                                            \
1531    void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1532    {                                                                       \
1533        memmove(&r->u8[index], &b->u8[8 - sizeof(r->element[0])],           \
1534               sizeof(r->element[0]));                                      \
1535    }
1536#else
1537#define VINSERT(suffix, element)                                            \
1538    void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1539    {                                                                       \
1540        uint32_t d = (16 - index) - sizeof(r->element[0]);                  \
1541        memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0]));               \
1542    }
1543#endif
1544VINSERT(b, u8)
1545VINSERT(h, u16)
1546VINSERT(w, u32)
1547VINSERT(d, u64)
1548#undef VINSERT
1549#if defined(HOST_WORDS_BIGENDIAN)
1550#define VEXTRACT(suffix, element)                                            \
1551    void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1552    {                                                                        \
1553        uint32_t es = sizeof(r->element[0]);                                 \
1554        memmove(&r->u8[8 - es], &b->u8[index], es);                          \
1555        memset(&r->u8[8], 0, 8);                                             \
1556        memset(&r->u8[0], 0, 8 - es);                                        \
1557    }
1558#else
1559#define VEXTRACT(suffix, element)                                            \
1560    void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1561    {                                                                        \
1562        uint32_t es = sizeof(r->element[0]);                                 \
1563        uint32_t s = (16 - index) - es;                                      \
1564        memmove(&r->u8[8], &b->u8[s], es);                                   \
1565        memset(&r->u8[0], 0, 8);                                             \
1566        memset(&r->u8[8 + es], 0, 8 - es);                                   \
1567    }
1568#endif
1569VEXTRACT(ub, u8)
1570VEXTRACT(uh, u16)
1571VEXTRACT(uw, u32)
1572VEXTRACT(d, u64)
1573#undef VEXTRACT
1574
1575void helper_xxextractuw(CPUPPCState *env, ppc_vsr_t *xt,
1576                        ppc_vsr_t *xb, uint32_t index)
1577{
1578    ppc_vsr_t t = { };
1579    size_t es = sizeof(uint32_t);
1580    uint32_t ext_index;
1581    int i;
1582
1583    ext_index = index;
1584    for (i = 0; i < es; i++, ext_index++) {
1585        t.VsrB(8 - es + i) = xb->VsrB(ext_index % 16);
1586    }
1587
1588    *xt = t;
1589}
1590
1591void helper_xxinsertw(CPUPPCState *env, ppc_vsr_t *xt,
1592                      ppc_vsr_t *xb, uint32_t index)
1593{
1594    ppc_vsr_t t = *xt;
1595    size_t es = sizeof(uint32_t);
1596    int ins_index, i = 0;
1597
1598    ins_index = index;
1599    for (i = 0; i < es && ins_index < 16; i++, ins_index++) {
1600        t.VsrB(ins_index) = xb->VsrB(8 - es + i);
1601    }
1602
1603    *xt = t;
1604}
1605
1606#define VEXT_SIGNED(name, element, cast)                            \
1607void helper_##name(ppc_avr_t *r, ppc_avr_t *b)                      \
1608{                                                                   \
1609    int i;                                                          \
1610    for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
1611        r->element[i] = (cast)b->element[i];                        \
1612    }                                                               \
1613}
1614VEXT_SIGNED(vextsb2w, s32, int8_t)
1615VEXT_SIGNED(vextsb2d, s64, int8_t)
1616VEXT_SIGNED(vextsh2w, s32, int16_t)
1617VEXT_SIGNED(vextsh2d, s64, int16_t)
1618VEXT_SIGNED(vextsw2d, s64, int32_t)
1619#undef VEXT_SIGNED
1620
1621#define VNEG(name, element)                                         \
1622void helper_##name(ppc_avr_t *r, ppc_avr_t *b)                      \
1623{                                                                   \
1624    int i;                                                          \
1625    for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
1626        r->element[i] = -b->element[i];                             \
1627    }                                                               \
1628}
1629VNEG(vnegw, s32)
1630VNEG(vnegd, s64)
1631#undef VNEG
1632
1633void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1634{
1635    int sh = (b->VsrB(0xf) >> 3) & 0xf;
1636
1637#if defined(HOST_WORDS_BIGENDIAN)
1638    memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1639    memset(&r->u8[0], 0, sh);
1640#else
1641    memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1642    memset(&r->u8[16 - sh], 0, sh);
1643#endif
1644}
1645
1646void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1647{
1648    int i;
1649
1650    for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1651        r->u32[i] = a->u32[i] >= b->u32[i];
1652    }
1653}
1654
1655void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1656{
1657    int64_t t;
1658    int i, upper;
1659    ppc_avr_t result;
1660    int sat = 0;
1661
1662    upper = ARRAY_SIZE(r->s32) - 1;
1663    t = (int64_t)b->VsrSW(upper);
1664    for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1665        t += a->VsrSW(i);
1666        result.VsrSW(i) = 0;
1667    }
1668    result.VsrSW(upper) = cvtsdsw(t, &sat);
1669    *r = result;
1670
1671    if (sat) {
1672        set_vscr_sat(env);
1673    }
1674}
1675
1676void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1677{
1678    int i, j, upper;
1679    ppc_avr_t result;
1680    int sat = 0;
1681
1682    upper = 1;
1683    for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
1684        int64_t t = (int64_t)b->VsrSW(upper + i * 2);
1685
1686        result.VsrD(i) = 0;
1687        for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
1688            t += a->VsrSW(2 * i + j);
1689        }
1690        result.VsrSW(upper + i * 2) = cvtsdsw(t, &sat);
1691    }
1692
1693    *r = result;
1694    if (sat) {
1695        set_vscr_sat(env);
1696    }
1697}
1698
1699void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1700{
1701    int i, j;
1702    int sat = 0;
1703
1704    for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1705        int64_t t = (int64_t)b->s32[i];
1706
1707        for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
1708            t += a->s8[4 * i + j];
1709        }
1710        r->s32[i] = cvtsdsw(t, &sat);
1711    }
1712
1713    if (sat) {
1714        set_vscr_sat(env);
1715    }
1716}
1717
1718void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1719{
1720    int sat = 0;
1721    int i;
1722
1723    for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
1724        int64_t t = (int64_t)b->s32[i];
1725
1726        t += a->s16[2 * i] + a->s16[2 * i + 1];
1727        r->s32[i] = cvtsdsw(t, &sat);
1728    }
1729
1730    if (sat) {
1731        set_vscr_sat(env);
1732    }
1733}
1734
1735void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1736{
1737    int i, j;
1738    int sat = 0;
1739
1740    for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
1741        uint64_t t = (uint64_t)b->u32[i];
1742
1743        for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
1744            t += a->u8[4 * i + j];
1745        }
1746        r->u32[i] = cvtuduw(t, &sat);
1747    }
1748
1749    if (sat) {
1750        set_vscr_sat(env);
1751    }
1752}
1753
1754#if defined(HOST_WORDS_BIGENDIAN)
1755#define UPKHI 1
1756#define UPKLO 0
1757#else
1758#define UPKHI 0
1759#define UPKLO 1
1760#endif
1761#define VUPKPX(suffix, hi)                                              \
1762    void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b)                \
1763    {                                                                   \
1764        int i;                                                          \
1765        ppc_avr_t result;                                               \
1766                                                                        \
1767        for (i = 0; i < ARRAY_SIZE(r->u32); i++) {                      \
1768            uint16_t e = b->u16[hi ? i : i + 4];                        \
1769            uint8_t a = (e >> 15) ? 0xff : 0;                           \
1770            uint8_t r = (e >> 10) & 0x1f;                               \
1771            uint8_t g = (e >> 5) & 0x1f;                                \
1772            uint8_t b = e & 0x1f;                                       \
1773                                                                        \
1774            result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b;       \
1775        }                                                               \
1776        *r = result;                                                    \
1777    }
1778VUPKPX(lpx, UPKLO)
1779VUPKPX(hpx, UPKHI)
1780#undef VUPKPX
1781
1782#define VUPK(suffix, unpacked, packee, hi)                              \
1783    void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b)                \
1784    {                                                                   \
1785        int i;                                                          \
1786        ppc_avr_t result;                                               \
1787                                                                        \
1788        if (hi) {                                                       \
1789            for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) {             \
1790                result.unpacked[i] = b->packee[i];                      \
1791            }                                                           \
1792        } else {                                                        \
1793            for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
1794                 i++) {                                                 \
1795                result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
1796            }                                                           \
1797        }                                                               \
1798        *r = result;                                                    \
1799    }
1800VUPK(hsb, s16, s8, UPKHI)
1801VUPK(hsh, s32, s16, UPKHI)
1802VUPK(hsw, s64, s32, UPKHI)
1803VUPK(lsb, s16, s8, UPKLO)
1804VUPK(lsh, s32, s16, UPKLO)
1805VUPK(lsw, s64, s32, UPKLO)
1806#undef VUPK
1807#undef UPKHI
1808#undef UPKLO
1809
1810#define VGENERIC_DO(name, element)                                      \
1811    void helper_v##name(ppc_avr_t *r, ppc_avr_t *b)                     \
1812    {                                                                   \
1813        int i;                                                          \
1814                                                                        \
1815        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
1816            r->element[i] = name(b->element[i]);                        \
1817        }                                                               \
1818    }
1819
1820#define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
1821#define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
1822
1823VGENERIC_DO(clzb, u8)
1824VGENERIC_DO(clzh, u16)
1825
1826#undef clzb
1827#undef clzh
1828
1829#define ctzb(v) ((v) ? ctz32(v) : 8)
1830#define ctzh(v) ((v) ? ctz32(v) : 16)
1831#define ctzw(v) ctz32((v))
1832#define ctzd(v) ctz64((v))
1833
1834VGENERIC_DO(ctzb, u8)
1835VGENERIC_DO(ctzh, u16)
1836VGENERIC_DO(ctzw, u32)
1837VGENERIC_DO(ctzd, u64)
1838
1839#undef ctzb
1840#undef ctzh
1841#undef ctzw
1842#undef ctzd
1843
1844#define popcntb(v) ctpop8(v)
1845#define popcnth(v) ctpop16(v)
1846#define popcntw(v) ctpop32(v)
1847#define popcntd(v) ctpop64(v)
1848
1849VGENERIC_DO(popcntb, u8)
1850VGENERIC_DO(popcnth, u16)
1851VGENERIC_DO(popcntw, u32)
1852VGENERIC_DO(popcntd, u64)
1853
1854#undef popcntb
1855#undef popcnth
1856#undef popcntw
1857#undef popcntd
1858
1859#undef VGENERIC_DO
1860
1861#if defined(HOST_WORDS_BIGENDIAN)
1862#define QW_ONE { .u64 = { 0, 1 } }
1863#else
1864#define QW_ONE { .u64 = { 1, 0 } }
1865#endif
1866
1867#ifndef CONFIG_INT128
1868
1869static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
1870{
1871    t->u64[0] = ~a.u64[0];
1872    t->u64[1] = ~a.u64[1];
1873}
1874
1875static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
1876{
1877    if (a.VsrD(0) < b.VsrD(0)) {
1878        return -1;
1879    } else if (a.VsrD(0) > b.VsrD(0)) {
1880        return 1;
1881    } else if (a.VsrD(1) < b.VsrD(1)) {
1882        return -1;
1883    } else if (a.VsrD(1) > b.VsrD(1)) {
1884        return 1;
1885    } else {
1886        return 0;
1887    }
1888}
1889
1890static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1891{
1892    t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
1893    t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
1894                     (~a.VsrD(1) < b.VsrD(1));
1895}
1896
1897static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
1898{
1899    ppc_avr_t not_a;
1900    t->VsrD(1) = a.VsrD(1) + b.VsrD(1);
1901    t->VsrD(0) = a.VsrD(0) + b.VsrD(0) +
1902                     (~a.VsrD(1) < b.VsrD(1));
1903    avr_qw_not(&not_a, a);
1904    return avr_qw_cmpu(not_a, b) < 0;
1905}
1906
1907#endif
1908
1909void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1910{
1911#ifdef CONFIG_INT128
1912    r->u128 = a->u128 + b->u128;
1913#else
1914    avr_qw_add(r, *a, *b);
1915#endif
1916}
1917
1918void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1919{
1920#ifdef CONFIG_INT128
1921    r->u128 = a->u128 + b->u128 + (c->u128 & 1);
1922#else
1923
1924    if (c->VsrD(1) & 1) {
1925        ppc_avr_t tmp;
1926
1927        tmp.VsrD(0) = 0;
1928        tmp.VsrD(1) = c->VsrD(1) & 1;
1929        avr_qw_add(&tmp, *a, tmp);
1930        avr_qw_add(r, tmp, *b);
1931    } else {
1932        avr_qw_add(r, *a, *b);
1933    }
1934#endif
1935}
1936
1937void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1938{
1939#ifdef CONFIG_INT128
1940    r->u128 = (~a->u128 < b->u128);
1941#else
1942    ppc_avr_t not_a;
1943
1944    avr_qw_not(&not_a, *a);
1945
1946    r->VsrD(0) = 0;
1947    r->VsrD(1) = (avr_qw_cmpu(not_a, *b) < 0);
1948#endif
1949}
1950
1951void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1952{
1953#ifdef CONFIG_INT128
1954    int carry_out = (~a->u128 < b->u128);
1955    if (!carry_out && (c->u128 & 1)) {
1956        carry_out = ((a->u128 + b->u128 + 1) == 0) &&
1957                    ((a->u128 != 0) || (b->u128 != 0));
1958    }
1959    r->u128 = carry_out;
1960#else
1961
1962    int carry_in = c->VsrD(1) & 1;
1963    int carry_out = 0;
1964    ppc_avr_t tmp;
1965
1966    carry_out = avr_qw_addc(&tmp, *a, *b);
1967
1968    if (!carry_out && carry_in) {
1969        ppc_avr_t one = QW_ONE;
1970        carry_out = avr_qw_addc(&tmp, tmp, one);
1971    }
1972    r->VsrD(0) = 0;
1973    r->VsrD(1) = carry_out;
1974#endif
1975}
1976
1977void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1978{
1979#ifdef CONFIG_INT128
1980    r->u128 = a->u128 - b->u128;
1981#else
1982    ppc_avr_t tmp;
1983    ppc_avr_t one = QW_ONE;
1984
1985    avr_qw_not(&tmp, *b);
1986    avr_qw_add(&tmp, *a, tmp);
1987    avr_qw_add(r, tmp, one);
1988#endif
1989}
1990
1991void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1992{
1993#ifdef CONFIG_INT128
1994    r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
1995#else
1996    ppc_avr_t tmp, sum;
1997
1998    avr_qw_not(&tmp, *b);
1999    avr_qw_add(&sum, *a, tmp);
2000
2001    tmp.VsrD(0) = 0;
2002    tmp.VsrD(1) = c->VsrD(1) & 1;
2003    avr_qw_add(r, sum, tmp);
2004#endif
2005}
2006
2007void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2008{
2009#ifdef CONFIG_INT128
2010    r->u128 = (~a->u128 < ~b->u128) ||
2011                 (a->u128 + ~b->u128 == (__uint128_t)-1);
2012#else
2013    int carry = (avr_qw_cmpu(*a, *b) > 0);
2014    if (!carry) {
2015        ppc_avr_t tmp;
2016        avr_qw_not(&tmp, *b);
2017        avr_qw_add(&tmp, *a, tmp);
2018        carry = ((tmp.VsrSD(0) == -1ull) && (tmp.VsrSD(1) == -1ull));
2019    }
2020    r->VsrD(0) = 0;
2021    r->VsrD(1) = carry;
2022#endif
2023}
2024
2025void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2026{
2027#ifdef CONFIG_INT128
2028    r->u128 =
2029        (~a->u128 < ~b->u128) ||
2030        ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2031#else
2032    int carry_in = c->VsrD(1) & 1;
2033    int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2034    if (!carry_out && carry_in) {
2035        ppc_avr_t tmp;
2036        avr_qw_not(&tmp, *b);
2037        avr_qw_add(&tmp, *a, tmp);
2038        carry_out = ((tmp.VsrD(0) == -1ull) && (tmp.VsrD(1) == -1ull));
2039    }
2040
2041    r->VsrD(0) = 0;
2042    r->VsrD(1) = carry_out;
2043#endif
2044}
2045
2046#define BCD_PLUS_PREF_1 0xC
2047#define BCD_PLUS_PREF_2 0xF
2048#define BCD_PLUS_ALT_1  0xA
2049#define BCD_NEG_PREF    0xD
2050#define BCD_NEG_ALT     0xB
2051#define BCD_PLUS_ALT_2  0xE
2052#define NATIONAL_PLUS   0x2B
2053#define NATIONAL_NEG    0x2D
2054
2055#define BCD_DIG_BYTE(n) (15 - ((n) / 2))
2056
2057static int bcd_get_sgn(ppc_avr_t *bcd)
2058{
2059    switch (bcd->VsrB(BCD_DIG_BYTE(0)) & 0xF) {
2060    case BCD_PLUS_PREF_1:
2061    case BCD_PLUS_PREF_2:
2062    case BCD_PLUS_ALT_1:
2063    case BCD_PLUS_ALT_2:
2064    {
2065        return 1;
2066    }
2067
2068    case BCD_NEG_PREF:
2069    case BCD_NEG_ALT:
2070    {
2071        return -1;
2072    }
2073
2074    default:
2075    {
2076        return 0;
2077    }
2078    }
2079}
2080
2081static int bcd_preferred_sgn(int sgn, int ps)
2082{
2083    if (sgn >= 0) {
2084        return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2085    } else {
2086        return BCD_NEG_PREF;
2087    }
2088}
2089
2090static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2091{
2092    uint8_t result;
2093    if (n & 1) {
2094        result = bcd->VsrB(BCD_DIG_BYTE(n)) >> 4;
2095    } else {
2096       result = bcd->VsrB(BCD_DIG_BYTE(n)) & 0xF;
2097    }
2098
2099    if (unlikely(result > 9)) {
2100        *invalid = true;
2101    }
2102    return result;
2103}
2104
2105static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2106{
2107    if (n & 1) {
2108        bcd->VsrB(BCD_DIG_BYTE(n)) &= 0x0F;
2109        bcd->VsrB(BCD_DIG_BYTE(n)) |= (digit << 4);
2110    } else {
2111        bcd->VsrB(BCD_DIG_BYTE(n)) &= 0xF0;
2112        bcd->VsrB(BCD_DIG_BYTE(n)) |= digit;
2113    }
2114}
2115
2116static bool bcd_is_valid(ppc_avr_t *bcd)
2117{
2118    int i;
2119    int invalid = 0;
2120
2121    if (bcd_get_sgn(bcd) == 0) {
2122        return false;
2123    }
2124
2125    for (i = 1; i < 32; i++) {
2126        bcd_get_digit(bcd, i, &invalid);
2127        if (unlikely(invalid)) {
2128            return false;
2129        }
2130    }
2131    return true;
2132}
2133
2134static int bcd_cmp_zero(ppc_avr_t *bcd)
2135{
2136    if (bcd->VsrD(0) == 0 && (bcd->VsrD(1) >> 4) == 0) {
2137        return CRF_EQ;
2138    } else {
2139        return (bcd_get_sgn(bcd) == 1) ? CRF_GT : CRF_LT;
2140    }
2141}
2142
2143static uint16_t get_national_digit(ppc_avr_t *reg, int n)
2144{
2145    return reg->VsrH(7 - n);
2146}
2147
2148static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
2149{
2150    reg->VsrH(7 - n) = val;
2151}
2152
2153static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2154{
2155    int i;
2156    int invalid = 0;
2157    for (i = 31; i > 0; i--) {
2158        uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2159        uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2160        if (unlikely(invalid)) {
2161            return 0; /* doesn't matter */
2162        } else if (dig_a > dig_b) {
2163            return 1;
2164        } else if (dig_a < dig_b) {
2165            return -1;
2166        }
2167    }
2168
2169    return 0;
2170}
2171
2172static void bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2173                       int *overflow)
2174{
2175    int carry = 0;
2176    int i;
2177    for (i = 1; i <= 31; i++) {
2178        uint8_t digit = bcd_get_digit(a, i, invalid) +
2179                        bcd_get_digit(b, i, invalid) + carry;
2180        if (digit > 9) {
2181            carry = 1;
2182            digit -= 10;
2183        } else {
2184            carry = 0;
2185        }
2186
2187        bcd_put_digit(t, digit, i);
2188    }
2189
2190    *overflow = carry;
2191}
2192
2193static void bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2194                       int *overflow)
2195{
2196    int carry = 0;
2197    int i;
2198
2199    for (i = 1; i <= 31; i++) {
2200        uint8_t digit = bcd_get_digit(a, i, invalid) -
2201                        bcd_get_digit(b, i, invalid) + carry;
2202        if (digit & 0x80) {
2203            carry = -1;
2204            digit += 10;
2205        } else {
2206            carry = 0;
2207        }
2208
2209        bcd_put_digit(t, digit, i);
2210    }
2211
2212    *overflow = carry;
2213}
2214
2215uint32_t helper_bcdadd(ppc_avr_t *r,  ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2216{
2217
2218    int sgna = bcd_get_sgn(a);
2219    int sgnb = bcd_get_sgn(b);
2220    int invalid = (sgna == 0) || (sgnb == 0);
2221    int overflow = 0;
2222    uint32_t cr = 0;
2223    ppc_avr_t result = { .u64 = { 0, 0 } };
2224
2225    if (!invalid) {
2226        if (sgna == sgnb) {
2227            result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
2228            bcd_add_mag(&result, a, b, &invalid, &overflow);
2229            cr = bcd_cmp_zero(&result);
2230        } else {
2231            int magnitude = bcd_cmp_mag(a, b);
2232            if (magnitude > 0) {
2233                result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgna, ps);
2234                bcd_sub_mag(&result, a, b, &invalid, &overflow);
2235                cr = (sgna > 0) ? CRF_GT : CRF_LT;
2236            } else if (magnitude < 0) {
2237                result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(sgnb, ps);
2238                bcd_sub_mag(&result, b, a, &invalid, &overflow);
2239                cr = (sgnb > 0) ? CRF_GT : CRF_LT;
2240            } else {
2241                result.VsrB(BCD_DIG_BYTE(0)) = bcd_preferred_sgn(0, ps);
2242                cr = CRF_EQ;
2243            }
2244        }
2245    }
2246
2247    if (unlikely(invalid)) {
2248        result.VsrD(0) = result.VsrD(1) = -1;
2249        cr = CRF_SO;
2250    } else if (overflow) {
2251        cr |= CRF_SO;
2252    }
2253
2254    *r = result;
2255
2256    return cr;
2257}
2258
2259uint32_t helper_bcdsub(ppc_avr_t *r,  ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2260{
2261    ppc_avr_t bcopy = *b;
2262    int sgnb = bcd_get_sgn(b);
2263    if (sgnb < 0) {
2264        bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2265    } else if (sgnb > 0) {
2266        bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2267    }
2268    /* else invalid ... defer to bcdadd code for proper handling */
2269
2270    return helper_bcdadd(r, a, &bcopy, ps);
2271}
2272
2273uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2274{
2275    int i;
2276    int cr = 0;
2277    uint16_t national = 0;
2278    uint16_t sgnb = get_national_digit(b, 0);
2279    ppc_avr_t ret = { .u64 = { 0, 0 } };
2280    int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
2281
2282    for (i = 1; i < 8; i++) {
2283        national = get_national_digit(b, i);
2284        if (unlikely(national < 0x30 || national > 0x39)) {
2285            invalid = 1;
2286            break;
2287        }
2288
2289        bcd_put_digit(&ret, national & 0xf, i);
2290    }
2291
2292    if (sgnb == NATIONAL_PLUS) {
2293        bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
2294    } else {
2295        bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2296    }
2297
2298    cr = bcd_cmp_zero(&ret);
2299
2300    if (unlikely(invalid)) {
2301        cr = CRF_SO;
2302    }
2303
2304    *r = ret;
2305
2306    return cr;
2307}
2308
2309uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2310{
2311    int i;
2312    int cr = 0;
2313    int sgnb = bcd_get_sgn(b);
2314    int invalid = (sgnb == 0);
2315    ppc_avr_t ret = { .u64 = { 0, 0 } };
2316
2317    int ox_flag = (b->VsrD(0) != 0) || ((b->VsrD(1) >> 32) != 0);
2318
2319    for (i = 1; i < 8; i++) {
2320        set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
2321
2322        if (unlikely(invalid)) {
2323            break;
2324        }
2325    }
2326    set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
2327
2328    cr = bcd_cmp_zero(b);
2329
2330    if (ox_flag) {
2331        cr |= CRF_SO;
2332    }
2333
2334    if (unlikely(invalid)) {
2335        cr = CRF_SO;
2336    }
2337
2338    *r = ret;
2339
2340    return cr;
2341}
2342
2343uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2344{
2345    int i;
2346    int cr = 0;
2347    int invalid = 0;
2348    int zone_digit = 0;
2349    int zone_lead = ps ? 0xF : 0x3;
2350    int digit = 0;
2351    ppc_avr_t ret = { .u64 = { 0, 0 } };
2352    int sgnb = b->VsrB(BCD_DIG_BYTE(0)) >> 4;
2353
2354    if (unlikely((sgnb < 0xA) && ps)) {
2355        invalid = 1;
2356    }
2357
2358    for (i = 0; i < 16; i++) {
2359        zone_digit = i ? b->VsrB(BCD_DIG_BYTE(i * 2)) >> 4 : zone_lead;
2360        digit = b->VsrB(BCD_DIG_BYTE(i * 2)) & 0xF;
2361        if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
2362            invalid = 1;
2363            break;
2364        }
2365
2366        bcd_put_digit(&ret, digit, i + 1);
2367    }
2368
2369    if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
2370            (!ps && (sgnb & 0x4))) {
2371        bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2372    } else {
2373        bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
2374    }
2375
2376    cr = bcd_cmp_zero(&ret);
2377
2378    if (unlikely(invalid)) {
2379        cr = CRF_SO;
2380    }
2381
2382    *r = ret;
2383
2384    return cr;
2385}
2386
2387uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2388{
2389    int i;
2390    int cr = 0;
2391    uint8_t digit = 0;
2392    int sgnb = bcd_get_sgn(b);
2393    int zone_lead = (ps) ? 0xF0 : 0x30;
2394    int invalid = (sgnb == 0);
2395    ppc_avr_t ret = { .u64 = { 0, 0 } };
2396
2397    int ox_flag = ((b->VsrD(0) >> 4) != 0);
2398
2399    for (i = 0; i < 16; i++) {
2400        digit = bcd_get_digit(b, i + 1, &invalid);
2401
2402        if (unlikely(invalid)) {
2403            break;
2404        }
2405
2406        ret.VsrB(BCD_DIG_BYTE(i * 2)) = zone_lead + digit;
2407    }
2408
2409    if (ps) {
2410        bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
2411    } else {
2412        bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
2413    }
2414
2415    cr = bcd_cmp_zero(b);
2416
2417    if (ox_flag) {
2418        cr |= CRF_SO;
2419    }
2420
2421    if (unlikely(invalid)) {
2422        cr = CRF_SO;
2423    }
2424
2425    *r = ret;
2426
2427    return cr;
2428}
2429
2430uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2431{
2432    int i;
2433    int cr = 0;
2434    uint64_t lo_value;
2435    uint64_t hi_value;
2436    ppc_avr_t ret = { .u64 = { 0, 0 } };
2437
2438    if (b->VsrSD(0) < 0) {
2439        lo_value = -b->VsrSD(1);
2440        hi_value = ~b->VsrD(0) + !lo_value;
2441        bcd_put_digit(&ret, 0xD, 0);
2442    } else {
2443        lo_value = b->VsrD(1);
2444        hi_value = b->VsrD(0);
2445        bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0);
2446    }
2447
2448    if (divu128(&lo_value, &hi_value, 1000000000000000ULL) ||
2449            lo_value > 9999999999999999ULL) {
2450        cr = CRF_SO;
2451    }
2452
2453    for (i = 1; i < 16; hi_value /= 10, i++) {
2454        bcd_put_digit(&ret, hi_value % 10, i);
2455    }
2456
2457    for (; i < 32; lo_value /= 10, i++) {
2458        bcd_put_digit(&ret, lo_value % 10, i);
2459    }
2460
2461    cr |= bcd_cmp_zero(&ret);
2462
2463    *r = ret;
2464
2465    return cr;
2466}
2467
2468uint32_t helper_bcdctsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2469{
2470    uint8_t i;
2471    int cr;
2472    uint64_t carry;
2473    uint64_t unused;
2474    uint64_t lo_value;
2475    uint64_t hi_value = 0;
2476    int sgnb = bcd_get_sgn(b);
2477    int invalid = (sgnb == 0);
2478
2479    lo_value = bcd_get_digit(b, 31, &invalid);
2480    for (i = 30; i > 0; i--) {
2481        mulu64(&lo_value, &carry, lo_value, 10ULL);
2482        mulu64(&hi_value, &unused, hi_value, 10ULL);
2483        lo_value += bcd_get_digit(b, i, &invalid);
2484        hi_value += carry;
2485
2486        if (unlikely(invalid)) {
2487            break;
2488        }
2489    }
2490
2491    if (sgnb == -1) {
2492        r->VsrSD(1) = -lo_value;
2493        r->VsrSD(0) = ~hi_value + !r->VsrSD(1);
2494    } else {
2495        r->VsrSD(1) = lo_value;
2496        r->VsrSD(0) = hi_value;
2497    }
2498
2499    cr = bcd_cmp_zero(b);
2500
2501    if (unlikely(invalid)) {
2502        cr = CRF_SO;
2503    }
2504
2505    return cr;
2506}
2507
2508uint32_t helper_bcdcpsgn(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2509{
2510    int i;
2511    int invalid = 0;
2512
2513    if (bcd_get_sgn(a) == 0 || bcd_get_sgn(b) == 0) {
2514        return CRF_SO;
2515    }
2516
2517    *r = *a;
2518    bcd_put_digit(r, b->VsrB(BCD_DIG_BYTE(0)) & 0xF, 0);
2519
2520    for (i = 1; i < 32; i++) {
2521        bcd_get_digit(a, i, &invalid);
2522        bcd_get_digit(b, i, &invalid);
2523        if (unlikely(invalid)) {
2524            return CRF_SO;
2525        }
2526    }
2527
2528    return bcd_cmp_zero(r);
2529}
2530
2531uint32_t helper_bcdsetsgn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2532{
2533    int sgnb = bcd_get_sgn(b);
2534
2535    *r = *b;
2536    bcd_put_digit(r, bcd_preferred_sgn(sgnb, ps), 0);
2537
2538    if (bcd_is_valid(b) == false) {
2539        return CRF_SO;
2540    }
2541
2542    return bcd_cmp_zero(r);
2543}
2544
2545uint32_t helper_bcds(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2546{
2547    int cr;
2548    int i = a->VsrSB(7);
2549    bool ox_flag = false;
2550    int sgnb = bcd_get_sgn(b);
2551    ppc_avr_t ret = *b;
2552    ret.VsrD(1) &= ~0xf;
2553
2554    if (bcd_is_valid(b) == false) {
2555        return CRF_SO;
2556    }
2557
2558    if (unlikely(i > 31)) {
2559        i = 31;
2560    } else if (unlikely(i < -31)) {
2561        i = -31;
2562    }
2563
2564    if (i > 0) {
2565        ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2566    } else {
2567        urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2568    }
2569    bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2570
2571    *r = ret;
2572
2573    cr = bcd_cmp_zero(r);
2574    if (ox_flag) {
2575        cr |= CRF_SO;
2576    }
2577
2578    return cr;
2579}
2580
2581uint32_t helper_bcdus(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2582{
2583    int cr;
2584    int i;
2585    int invalid = 0;
2586    bool ox_flag = false;
2587    ppc_avr_t ret = *b;
2588
2589    for (i = 0; i < 32; i++) {
2590        bcd_get_digit(b, i, &invalid);
2591
2592        if (unlikely(invalid)) {
2593            return CRF_SO;
2594        }
2595    }
2596
2597    i = a->VsrSB(7);
2598    if (i >= 32) {
2599        ox_flag = true;
2600        ret.VsrD(1) = ret.VsrD(0) = 0;
2601    } else if (i <= -32) {
2602        ret.VsrD(1) = ret.VsrD(0) = 0;
2603    } else if (i > 0) {
2604        ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2605    } else {
2606        urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2607    }
2608    *r = ret;
2609
2610    cr = bcd_cmp_zero(r);
2611    if (ox_flag) {
2612        cr |= CRF_SO;
2613    }
2614
2615    return cr;
2616}
2617
2618uint32_t helper_bcdsr(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2619{
2620    int cr;
2621    int unused = 0;
2622    int invalid = 0;
2623    bool ox_flag = false;
2624    int sgnb = bcd_get_sgn(b);
2625    ppc_avr_t ret = *b;
2626    ret.VsrD(1) &= ~0xf;
2627
2628    int i = a->VsrSB(7);
2629    ppc_avr_t bcd_one;
2630
2631    bcd_one.VsrD(0) = 0;
2632    bcd_one.VsrD(1) = 0x10;
2633
2634    if (bcd_is_valid(b) == false) {
2635        return CRF_SO;
2636    }
2637
2638    if (unlikely(i > 31)) {
2639        i = 31;
2640    } else if (unlikely(i < -31)) {
2641        i = -31;
2642    }
2643
2644    if (i > 0) {
2645        ulshift(&ret.VsrD(1), &ret.VsrD(0), i * 4, &ox_flag);
2646    } else {
2647        urshift(&ret.VsrD(1), &ret.VsrD(0), -i * 4);
2648
2649        if (bcd_get_digit(&ret, 0, &invalid) >= 5) {
2650            bcd_add_mag(&ret, &ret, &bcd_one, &invalid, &unused);
2651        }
2652    }
2653    bcd_put_digit(&ret, bcd_preferred_sgn(sgnb, ps), 0);
2654
2655    cr = bcd_cmp_zero(&ret);
2656    if (ox_flag) {
2657        cr |= CRF_SO;
2658    }
2659    *r = ret;
2660
2661    return cr;
2662}
2663
2664uint32_t helper_bcdtrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2665{
2666    uint64_t mask;
2667    uint32_t ox_flag = 0;
2668    int i = a->VsrSH(3) + 1;
2669    ppc_avr_t ret = *b;
2670
2671    if (bcd_is_valid(b) == false) {
2672        return CRF_SO;
2673    }
2674
2675    if (i > 16 && i < 32) {
2676        mask = (uint64_t)-1 >> (128 - i * 4);
2677        if (ret.VsrD(0) & ~mask) {
2678            ox_flag = CRF_SO;
2679        }
2680
2681        ret.VsrD(0) &= mask;
2682    } else if (i >= 0 && i <= 16) {
2683        mask = (uint64_t)-1 >> (64 - i * 4);
2684        if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
2685            ox_flag = CRF_SO;
2686        }
2687
2688        ret.VsrD(1) &= mask;
2689        ret.VsrD(0) = 0;
2690    }
2691    bcd_put_digit(&ret, bcd_preferred_sgn(bcd_get_sgn(b), ps), 0);
2692    *r = ret;
2693
2694    return bcd_cmp_zero(&ret) | ox_flag;
2695}
2696
2697uint32_t helper_bcdutrunc(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2698{
2699    int i;
2700    uint64_t mask;
2701    uint32_t ox_flag = 0;
2702    int invalid = 0;
2703    ppc_avr_t ret = *b;
2704
2705    for (i = 0; i < 32; i++) {
2706        bcd_get_digit(b, i, &invalid);
2707
2708        if (unlikely(invalid)) {
2709            return CRF_SO;
2710        }
2711    }
2712
2713    i = a->VsrSH(3);
2714    if (i > 16 && i < 33) {
2715        mask = (uint64_t)-1 >> (128 - i * 4);
2716        if (ret.VsrD(0) & ~mask) {
2717            ox_flag = CRF_SO;
2718        }
2719
2720        ret.VsrD(0) &= mask;
2721    } else if (i > 0 && i <= 16) {
2722        mask = (uint64_t)-1 >> (64 - i * 4);
2723        if (ret.VsrD(0) || (ret.VsrD(1) & ~mask)) {
2724            ox_flag = CRF_SO;
2725        }
2726
2727        ret.VsrD(1) &= mask;
2728        ret.VsrD(0) = 0;
2729    } else if (i == 0) {
2730        if (ret.VsrD(0) || ret.VsrD(1)) {
2731            ox_flag = CRF_SO;
2732        }
2733        ret.VsrD(0) = ret.VsrD(1) = 0;
2734    }
2735
2736    *r = ret;
2737    if (r->VsrD(0) == 0 && r->VsrD(1) == 0) {
2738        return ox_flag | CRF_EQ;
2739    }
2740
2741    return ox_flag | CRF_GT;
2742}
2743
2744void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
2745{
2746    int i;
2747    VECTOR_FOR_INORDER_I(i, u8) {
2748        r->u8[i] = AES_sbox[a->u8[i]];
2749    }
2750}
2751
2752void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2753{
2754    ppc_avr_t result;
2755    int i;
2756
2757    VECTOR_FOR_INORDER_I(i, u32) {
2758        result.VsrW(i) = b->VsrW(i) ^
2759            (AES_Te0[a->VsrB(AES_shifts[4 * i + 0])] ^
2760             AES_Te1[a->VsrB(AES_shifts[4 * i + 1])] ^
2761             AES_Te2[a->VsrB(AES_shifts[4 * i + 2])] ^
2762             AES_Te3[a->VsrB(AES_shifts[4 * i + 3])]);
2763    }
2764    *r = result;
2765}
2766
2767void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2768{
2769    ppc_avr_t result;
2770    int i;
2771
2772    VECTOR_FOR_INORDER_I(i, u8) {
2773        result.VsrB(i) = b->VsrB(i) ^ (AES_sbox[a->VsrB(AES_shifts[i])]);
2774    }
2775    *r = result;
2776}
2777
2778void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2779{
2780    /* This differs from what is written in ISA V2.07.  The RTL is */
2781    /* incorrect and will be fixed in V2.07B.                      */
2782    int i;
2783    ppc_avr_t tmp;
2784
2785    VECTOR_FOR_INORDER_I(i, u8) {
2786        tmp.VsrB(i) = b->VsrB(i) ^ AES_isbox[a->VsrB(AES_ishifts[i])];
2787    }
2788
2789    VECTOR_FOR_INORDER_I(i, u32) {
2790        r->VsrW(i) =
2791            AES_imc[tmp.VsrB(4 * i + 0)][0] ^
2792            AES_imc[tmp.VsrB(4 * i + 1)][1] ^
2793            AES_imc[tmp.VsrB(4 * i + 2)][2] ^
2794            AES_imc[tmp.VsrB(4 * i + 3)][3];
2795    }
2796}
2797
2798void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2799{
2800    ppc_avr_t result;
2801    int i;
2802
2803    VECTOR_FOR_INORDER_I(i, u8) {
2804        result.VsrB(i) = b->VsrB(i) ^ (AES_isbox[a->VsrB(AES_ishifts[i])]);
2805    }
2806    *r = result;
2807}
2808
2809void helper_vshasigmaw(ppc_avr_t *r,  ppc_avr_t *a, uint32_t st_six)
2810{
2811    int st = (st_six & 0x10) != 0;
2812    int six = st_six & 0xF;
2813    int i;
2814
2815    for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2816        if (st == 0) {
2817            if ((six & (0x8 >> i)) == 0) {
2818                r->VsrW(i) = ror32(a->VsrW(i), 7) ^
2819                             ror32(a->VsrW(i), 18) ^
2820                             (a->VsrW(i) >> 3);
2821            } else { /* six.bit[i] == 1 */
2822                r->VsrW(i) = ror32(a->VsrW(i), 17) ^
2823                             ror32(a->VsrW(i), 19) ^
2824                             (a->VsrW(i) >> 10);
2825            }
2826        } else { /* st == 1 */
2827            if ((six & (0x8 >> i)) == 0) {
2828                r->VsrW(i) = ror32(a->VsrW(i), 2) ^
2829                             ror32(a->VsrW(i), 13) ^
2830                             ror32(a->VsrW(i), 22);
2831            } else { /* six.bit[i] == 1 */
2832                r->VsrW(i) = ror32(a->VsrW(i), 6) ^
2833                             ror32(a->VsrW(i), 11) ^
2834                             ror32(a->VsrW(i), 25);
2835            }
2836        }
2837    }
2838}
2839
2840void helper_vshasigmad(ppc_avr_t *r,  ppc_avr_t *a, uint32_t st_six)
2841{
2842    int st = (st_six & 0x10) != 0;
2843    int six = st_six & 0xF;
2844    int i;
2845
2846    for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
2847        if (st == 0) {
2848            if ((six & (0x8 >> (2 * i))) == 0) {
2849                r->VsrD(i) = ror64(a->VsrD(i), 1) ^
2850                             ror64(a->VsrD(i), 8) ^
2851                             (a->VsrD(i) >> 7);
2852            } else { /* six.bit[2*i] == 1 */
2853                r->VsrD(i) = ror64(a->VsrD(i), 19) ^
2854                             ror64(a->VsrD(i), 61) ^
2855                             (a->VsrD(i) >> 6);
2856            }
2857        } else { /* st == 1 */
2858            if ((six & (0x8 >> (2 * i))) == 0) {
2859                r->VsrD(i) = ror64(a->VsrD(i), 28) ^
2860                             ror64(a->VsrD(i), 34) ^
2861                             ror64(a->VsrD(i), 39);
2862            } else { /* six.bit[2*i] == 1 */
2863                r->VsrD(i) = ror64(a->VsrD(i), 14) ^
2864                             ror64(a->VsrD(i), 18) ^
2865                             ror64(a->VsrD(i), 41);
2866            }
2867        }
2868    }
2869}
2870
2871void helper_vpermxor(ppc_avr_t *r,  ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2872{
2873    ppc_avr_t result;
2874    int i;
2875
2876    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
2877        int indexA = c->VsrB(i) >> 4;
2878        int indexB = c->VsrB(i) & 0xF;
2879
2880        result.VsrB(i) = a->VsrB(indexA) ^ b->VsrB(indexB);
2881    }
2882    *r = result;
2883}
2884
2885#undef VECTOR_FOR_INORDER_I
2886
2887/*****************************************************************************/
2888/* SPE extension helpers */
2889/* Use a table to make this quicker */
2890static const uint8_t hbrev[16] = {
2891    0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
2892    0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
2893};
2894
2895static inline uint8_t byte_reverse(uint8_t val)
2896{
2897    return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
2898}
2899
2900static inline uint32_t word_reverse(uint32_t val)
2901{
2902    return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
2903        (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
2904}
2905
2906#define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
2907target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
2908{
2909    uint32_t a, b, d, mask;
2910
2911    mask = UINT32_MAX >> (32 - MASKBITS);
2912    a = arg1 & mask;
2913    b = arg2 & mask;
2914    d = word_reverse(1 + word_reverse(a | ~b));
2915    return (arg1 & ~mask) | (d & b);
2916}
2917
2918uint32_t helper_cntlsw32(uint32_t val)
2919{
2920    if (val & 0x80000000) {
2921        return clz32(~val);
2922    } else {
2923        return clz32(val);
2924    }
2925}
2926
2927uint32_t helper_cntlzw32(uint32_t val)
2928{
2929    return clz32(val);
2930}
2931
2932/* 440 specific */
2933target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
2934                          target_ulong low, uint32_t update_Rc)
2935{
2936    target_ulong mask;
2937    int i;
2938
2939    i = 1;
2940    for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2941        if ((high & mask) == 0) {
2942            if (update_Rc) {
2943                env->crf[0] = 0x4;
2944            }
2945            goto done;
2946        }
2947        i++;
2948    }
2949    for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
2950        if ((low & mask) == 0) {
2951            if (update_Rc) {
2952                env->crf[0] = 0x8;
2953            }
2954            goto done;
2955        }
2956        i++;
2957    }
2958    i = 8;
2959    if (update_Rc) {
2960        env->crf[0] = 0x2;
2961    }
2962 done:
2963    env->xer = (env->xer & ~0x7F) | i;
2964    if (update_Rc) {
2965        env->crf[0] |= xer_so;
2966    }
2967    return i;
2968}
2969