qemu/target-ppc/int_helper.c
<<
>>
Prefs
   1/*
   2 *  PowerPC integer and vector emulation helpers for QEMU.
   3 *
   4 *  Copyright (c) 2003-2007 Jocelyn Mayer
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include "qemu/osdep.h"
  20#include "cpu.h"
  21#include "internal.h"
  22#include "exec/exec-all.h"
  23#include "qemu/host-utils.h"
  24#include "exec/helper-proto.h"
  25#include "crypto/aes.h"
  26
  27#include "helper_regs.h"
  28/*****************************************************************************/
  29/* Fixed point operations helpers */
  30
  31target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
  32                           uint32_t oe)
  33{
  34    uint64_t rt = 0;
  35    int overflow = 0;
  36
  37    uint64_t dividend = (uint64_t)ra << 32;
  38    uint64_t divisor = (uint32_t)rb;
  39
  40    if (unlikely(divisor == 0)) {
  41        overflow = 1;
  42    } else {
  43        rt = dividend / divisor;
  44        overflow = rt > UINT32_MAX;
  45    }
  46
  47    if (unlikely(overflow)) {
  48        rt = 0; /* Undefined */
  49    }
  50
  51    if (oe) {
  52        if (unlikely(overflow)) {
  53            env->so = env->ov = 1;
  54        } else {
  55            env->ov = 0;
  56        }
  57    }
  58
  59    return (target_ulong)rt;
  60}
  61
  62target_ulong helper_divwe(CPUPPCState *env, target_ulong ra, target_ulong rb,
  63                          uint32_t oe)
  64{
  65    int64_t rt = 0;
  66    int overflow = 0;
  67
  68    int64_t dividend = (int64_t)ra << 32;
  69    int64_t divisor = (int64_t)((int32_t)rb);
  70
  71    if (unlikely((divisor == 0) ||
  72                 ((divisor == -1ull) && (dividend == INT64_MIN)))) {
  73        overflow = 1;
  74    } else {
  75        rt = dividend / divisor;
  76        overflow = rt != (int32_t)rt;
  77    }
  78
  79    if (unlikely(overflow)) {
  80        rt = 0; /* Undefined */
  81    }
  82
  83    if (oe) {
  84        if (unlikely(overflow)) {
  85            env->so = env->ov = 1;
  86        } else {
  87            env->ov = 0;
  88        }
  89    }
  90
  91    return (target_ulong)rt;
  92}
  93
  94#if defined(TARGET_PPC64)
  95
  96uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
  97{
  98    uint64_t rt = 0;
  99    int overflow = 0;
 100
 101    overflow = divu128(&rt, &ra, rb);
 102
 103    if (unlikely(overflow)) {
 104        rt = 0; /* Undefined */
 105    }
 106
 107    if (oe) {
 108        if (unlikely(overflow)) {
 109            env->so = env->ov = 1;
 110        } else {
 111            env->ov = 0;
 112        }
 113    }
 114
 115    return rt;
 116}
 117
 118uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
 119{
 120    int64_t rt = 0;
 121    int64_t ra = (int64_t)rau;
 122    int64_t rb = (int64_t)rbu;
 123    int overflow = divs128(&rt, &ra, rb);
 124
 125    if (unlikely(overflow)) {
 126        rt = 0; /* Undefined */
 127    }
 128
 129    if (oe) {
 130
 131        if (unlikely(overflow)) {
 132            env->so = env->ov = 1;
 133        } else {
 134            env->ov = 0;
 135        }
 136    }
 137
 138    return rt;
 139}
 140
 141#endif
 142
 143
 144target_ulong helper_cntlzw(target_ulong t)
 145{
 146    return clz32(t);
 147}
 148
 149target_ulong helper_cnttzw(target_ulong t)
 150{
 151    return ctz32(t);
 152}
 153
 154#if defined(TARGET_PPC64)
 155/* if x = 0xab, returns 0xababababababababa */
 156#define pattern(x) (((x) & 0xff) * (~(target_ulong)0 / 0xff))
 157
 158/* substract 1 from each byte, and with inverse, check if MSB is set at each
 159 * byte.
 160 * i.e. ((0x00 - 0x01) & ~(0x00)) & 0x80
 161 *      (0xFF & 0xFF) & 0x80 = 0x80 (zero found)
 162 */
 163#define haszero(v) (((v) - pattern(0x01)) & ~(v) & pattern(0x80))
 164
 165/* When you XOR the pattern and there is a match, that byte will be zero */
 166#define hasvalue(x, n)  (haszero((x) ^ pattern(n)))
 167
 168uint32_t helper_cmpeqb(target_ulong ra, target_ulong rb)
 169{
 170    return hasvalue(rb, ra) ? 1 << CRF_GT : 0;
 171}
 172
 173#undef pattern
 174#undef haszero
 175#undef hasvalue
 176
 177target_ulong helper_cntlzd(target_ulong t)
 178{
 179    return clz64(t);
 180}
 181
 182target_ulong helper_cnttzd(target_ulong t)
 183{
 184    return ctz64(t);
 185}
 186
 187/* Return invalid random number.
 188 *
 189 * FIXME: Add rng backend or other mechanism to get cryptographically suitable
 190 * random number
 191 */
 192target_ulong helper_darn32(void)
 193{
 194    return -1;
 195}
 196
 197target_ulong helper_darn64(void)
 198{
 199    return -1;
 200}
 201
 202#endif
 203
 204#if defined(TARGET_PPC64)
 205
 206uint64_t helper_bpermd(uint64_t rs, uint64_t rb)
 207{
 208    int i;
 209    uint64_t ra = 0;
 210
 211    for (i = 0; i < 8; i++) {
 212        int index = (rs >> (i*8)) & 0xFF;
 213        if (index < 64) {
 214            if (rb & (1ull << (63-index))) {
 215                ra |= 1 << i;
 216            }
 217        }
 218    }
 219    return ra;
 220}
 221
 222#endif
 223
 224target_ulong helper_cmpb(target_ulong rs, target_ulong rb)
 225{
 226    target_ulong mask = 0xff;
 227    target_ulong ra = 0;
 228    int i;
 229
 230    for (i = 0; i < sizeof(target_ulong); i++) {
 231        if ((rs & mask) == (rb & mask)) {
 232            ra |= mask;
 233        }
 234        mask <<= 8;
 235    }
 236    return ra;
 237}
 238
 239/* shift right arithmetic helper */
 240target_ulong helper_sraw(CPUPPCState *env, target_ulong value,
 241                         target_ulong shift)
 242{
 243    int32_t ret;
 244
 245    if (likely(!(shift & 0x20))) {
 246        if (likely((uint32_t)shift != 0)) {
 247            shift &= 0x1f;
 248            ret = (int32_t)value >> shift;
 249            if (likely(ret >= 0 || (value & ((1 << shift) - 1)) == 0)) {
 250                env->ca = 0;
 251            } else {
 252                env->ca = 1;
 253            }
 254        } else {
 255            ret = (int32_t)value;
 256            env->ca = 0;
 257        }
 258    } else {
 259        ret = (int32_t)value >> 31;
 260        env->ca = (ret != 0);
 261    }
 262    return (target_long)ret;
 263}
 264
 265#if defined(TARGET_PPC64)
 266target_ulong helper_srad(CPUPPCState *env, target_ulong value,
 267                         target_ulong shift)
 268{
 269    int64_t ret;
 270
 271    if (likely(!(shift & 0x40))) {
 272        if (likely((uint64_t)shift != 0)) {
 273            shift &= 0x3f;
 274            ret = (int64_t)value >> shift;
 275            if (likely(ret >= 0 || (value & ((1ULL << shift) - 1)) == 0)) {
 276                env->ca = 0;
 277            } else {
 278                env->ca = 1;
 279            }
 280        } else {
 281            ret = (int64_t)value;
 282            env->ca = 0;
 283        }
 284    } else {
 285        ret = (int64_t)value >> 63;
 286        env->ca = (ret != 0);
 287    }
 288    return ret;
 289}
 290#endif
 291
 292#if defined(TARGET_PPC64)
 293target_ulong helper_popcntb(target_ulong val)
 294{
 295    val = (val & 0x5555555555555555ULL) + ((val >>  1) &
 296                                           0x5555555555555555ULL);
 297    val = (val & 0x3333333333333333ULL) + ((val >>  2) &
 298                                           0x3333333333333333ULL);
 299    val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >>  4) &
 300                                           0x0f0f0f0f0f0f0f0fULL);
 301    return val;
 302}
 303
 304target_ulong helper_popcntw(target_ulong val)
 305{
 306    val = (val & 0x5555555555555555ULL) + ((val >>  1) &
 307                                           0x5555555555555555ULL);
 308    val = (val & 0x3333333333333333ULL) + ((val >>  2) &
 309                                           0x3333333333333333ULL);
 310    val = (val & 0x0f0f0f0f0f0f0f0fULL) + ((val >>  4) &
 311                                           0x0f0f0f0f0f0f0f0fULL);
 312    val = (val & 0x00ff00ff00ff00ffULL) + ((val >>  8) &
 313                                           0x00ff00ff00ff00ffULL);
 314    val = (val & 0x0000ffff0000ffffULL) + ((val >> 16) &
 315                                           0x0000ffff0000ffffULL);
 316    return val;
 317}
 318
 319target_ulong helper_popcntd(target_ulong val)
 320{
 321    return ctpop64(val);
 322}
 323#else
 324target_ulong helper_popcntb(target_ulong val)
 325{
 326    val = (val & 0x55555555) + ((val >>  1) & 0x55555555);
 327    val = (val & 0x33333333) + ((val >>  2) & 0x33333333);
 328    val = (val & 0x0f0f0f0f) + ((val >>  4) & 0x0f0f0f0f);
 329    return val;
 330}
 331
 332target_ulong helper_popcntw(target_ulong val)
 333{
 334    val = (val & 0x55555555) + ((val >>  1) & 0x55555555);
 335    val = (val & 0x33333333) + ((val >>  2) & 0x33333333);
 336    val = (val & 0x0f0f0f0f) + ((val >>  4) & 0x0f0f0f0f);
 337    val = (val & 0x00ff00ff) + ((val >>  8) & 0x00ff00ff);
 338    val = (val & 0x0000ffff) + ((val >> 16) & 0x0000ffff);
 339    return val;
 340}
 341#endif
 342
 343/*****************************************************************************/
 344/* PowerPC 601 specific instructions (POWER bridge) */
 345target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong arg2)
 346{
 347    uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
 348
 349    if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
 350        (int32_t)arg2 == 0) {
 351        env->spr[SPR_MQ] = 0;
 352        return INT32_MIN;
 353    } else {
 354        env->spr[SPR_MQ] = tmp % arg2;
 355        return  tmp / (int32_t)arg2;
 356    }
 357}
 358
 359target_ulong helper_divo(CPUPPCState *env, target_ulong arg1,
 360                         target_ulong arg2)
 361{
 362    uint64_t tmp = (uint64_t)arg1 << 32 | env->spr[SPR_MQ];
 363
 364    if (((int32_t)tmp == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
 365        (int32_t)arg2 == 0) {
 366        env->so = env->ov = 1;
 367        env->spr[SPR_MQ] = 0;
 368        return INT32_MIN;
 369    } else {
 370        env->spr[SPR_MQ] = tmp % arg2;
 371        tmp /= (int32_t)arg2;
 372        if ((int32_t)tmp != tmp) {
 373            env->so = env->ov = 1;
 374        } else {
 375            env->ov = 0;
 376        }
 377        return tmp;
 378    }
 379}
 380
 381target_ulong helper_divs(CPUPPCState *env, target_ulong arg1,
 382                         target_ulong arg2)
 383{
 384    if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
 385        (int32_t)arg2 == 0) {
 386        env->spr[SPR_MQ] = 0;
 387        return INT32_MIN;
 388    } else {
 389        env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
 390        return (int32_t)arg1 / (int32_t)arg2;
 391    }
 392}
 393
 394target_ulong helper_divso(CPUPPCState *env, target_ulong arg1,
 395                          target_ulong arg2)
 396{
 397    if (((int32_t)arg1 == INT32_MIN && (int32_t)arg2 == (int32_t)-1) ||
 398        (int32_t)arg2 == 0) {
 399        env->so = env->ov = 1;
 400        env->spr[SPR_MQ] = 0;
 401        return INT32_MIN;
 402    } else {
 403        env->ov = 0;
 404        env->spr[SPR_MQ] = (int32_t)arg1 % (int32_t)arg2;
 405        return (int32_t)arg1 / (int32_t)arg2;
 406    }
 407}
 408
 409/*****************************************************************************/
 410/* 602 specific instructions */
 411/* mfrom is the most crazy instruction ever seen, imho ! */
 412/* Real implementation uses a ROM table. Do the same */
 413/* Extremely decomposed:
 414 *                      -arg / 256
 415 * return 256 * log10(10           + 1.0) + 0.5
 416 */
 417#if !defined(CONFIG_USER_ONLY)
 418target_ulong helper_602_mfrom(target_ulong arg)
 419{
 420    if (likely(arg < 602)) {
 421#include "mfrom_table.c"
 422        return mfrom_ROM_table[arg];
 423    } else {
 424        return 0;
 425    }
 426}
 427#endif
 428
 429/*****************************************************************************/
 430/* Altivec extension helpers */
 431#if defined(HOST_WORDS_BIGENDIAN)
 432#define HI_IDX 0
 433#define LO_IDX 1
 434#define AVRB(i) u8[i]
 435#define AVRW(i) u32[i]
 436#else
 437#define HI_IDX 1
 438#define LO_IDX 0
 439#define AVRB(i) u8[15-(i)]
 440#define AVRW(i) u32[3-(i)]
 441#endif
 442
 443#if defined(HOST_WORDS_BIGENDIAN)
 444#define VECTOR_FOR_INORDER_I(index, element)                    \
 445    for (index = 0; index < ARRAY_SIZE(r->element); index++)
 446#else
 447#define VECTOR_FOR_INORDER_I(index, element)                    \
 448    for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--)
 449#endif
 450
 451/* Saturating arithmetic helpers.  */
 452#define SATCVT(from, to, from_type, to_type, min, max)          \
 453    static inline to_type cvt##from##to(from_type x, int *sat)  \
 454    {                                                           \
 455        to_type r;                                              \
 456                                                                \
 457        if (x < (from_type)min) {                               \
 458            r = min;                                            \
 459            *sat = 1;                                           \
 460        } else if (x > (from_type)max) {                        \
 461            r = max;                                            \
 462            *sat = 1;                                           \
 463        } else {                                                \
 464            r = x;                                              \
 465        }                                                       \
 466        return r;                                               \
 467    }
 468#define SATCVTU(from, to, from_type, to_type, min, max)         \
 469    static inline to_type cvt##from##to(from_type x, int *sat)  \
 470    {                                                           \
 471        to_type r;                                              \
 472                                                                \
 473        if (x > (from_type)max) {                               \
 474            r = max;                                            \
 475            *sat = 1;                                           \
 476        } else {                                                \
 477            r = x;                                              \
 478        }                                                       \
 479        return r;                                               \
 480    }
 481SATCVT(sh, sb, int16_t, int8_t, INT8_MIN, INT8_MAX)
 482SATCVT(sw, sh, int32_t, int16_t, INT16_MIN, INT16_MAX)
 483SATCVT(sd, sw, int64_t, int32_t, INT32_MIN, INT32_MAX)
 484
 485SATCVTU(uh, ub, uint16_t, uint8_t, 0, UINT8_MAX)
 486SATCVTU(uw, uh, uint32_t, uint16_t, 0, UINT16_MAX)
 487SATCVTU(ud, uw, uint64_t, uint32_t, 0, UINT32_MAX)
 488SATCVT(sh, ub, int16_t, uint8_t, 0, UINT8_MAX)
 489SATCVT(sw, uh, int32_t, uint16_t, 0, UINT16_MAX)
 490SATCVT(sd, uw, int64_t, uint32_t, 0, UINT32_MAX)
 491#undef SATCVT
 492#undef SATCVTU
 493
 494void helper_lvsl(ppc_avr_t *r, target_ulong sh)
 495{
 496    int i, j = (sh & 0xf);
 497
 498    VECTOR_FOR_INORDER_I(i, u8) {
 499        r->u8[i] = j++;
 500    }
 501}
 502
 503void helper_lvsr(ppc_avr_t *r, target_ulong sh)
 504{
 505    int i, j = 0x10 - (sh & 0xf);
 506
 507    VECTOR_FOR_INORDER_I(i, u8) {
 508        r->u8[i] = j++;
 509    }
 510}
 511
 512void helper_mtvscr(CPUPPCState *env, ppc_avr_t *r)
 513{
 514#if defined(HOST_WORDS_BIGENDIAN)
 515    env->vscr = r->u32[3];
 516#else
 517    env->vscr = r->u32[0];
 518#endif
 519    set_flush_to_zero(vscr_nj, &env->vec_status);
 520}
 521
 522void helper_vaddcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
 523{
 524    int i;
 525
 526    for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
 527        r->u32[i] = ~a->u32[i] < b->u32[i];
 528    }
 529}
 530
 531/* vprtybw */
 532void helper_vprtybw(ppc_avr_t *r, ppc_avr_t *b)
 533{
 534    int i;
 535    for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
 536        uint64_t res = b->u32[i] ^ (b->u32[i] >> 16);
 537        res ^= res >> 8;
 538        r->u32[i] = res & 1;
 539    }
 540}
 541
 542/* vprtybd */
 543void helper_vprtybd(ppc_avr_t *r, ppc_avr_t *b)
 544{
 545    int i;
 546    for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
 547        uint64_t res = b->u64[i] ^ (b->u64[i] >> 32);
 548        res ^= res >> 16;
 549        res ^= res >> 8;
 550        r->u64[i] = res & 1;
 551    }
 552}
 553
 554/* vprtybq */
 555void helper_vprtybq(ppc_avr_t *r, ppc_avr_t *b)
 556{
 557    uint64_t res = b->u64[0] ^ b->u64[1];
 558    res ^= res >> 32;
 559    res ^= res >> 16;
 560    res ^= res >> 8;
 561    r->u64[LO_IDX] = res & 1;
 562    r->u64[HI_IDX] = 0;
 563}
 564
 565#define VARITH_DO(name, op, element)                                    \
 566    void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
 567    {                                                                   \
 568        int i;                                                          \
 569                                                                        \
 570        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
 571            r->element[i] = a->element[i] op b->element[i];             \
 572        }                                                               \
 573    }
 574#define VARITH(suffix, element)                 \
 575    VARITH_DO(add##suffix, +, element)          \
 576    VARITH_DO(sub##suffix, -, element)
 577VARITH(ubm, u8)
 578VARITH(uhm, u16)
 579VARITH(uwm, u32)
 580VARITH(udm, u64)
 581VARITH_DO(muluwm, *, u32)
 582#undef VARITH_DO
 583#undef VARITH
 584
 585#define VARITHFP(suffix, func)                                          \
 586    void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
 587                          ppc_avr_t *b)                                 \
 588    {                                                                   \
 589        int i;                                                          \
 590                                                                        \
 591        for (i = 0; i < ARRAY_SIZE(r->f); i++) {                        \
 592            r->f[i] = func(a->f[i], b->f[i], &env->vec_status);         \
 593        }                                                               \
 594    }
 595VARITHFP(addfp, float32_add)
 596VARITHFP(subfp, float32_sub)
 597VARITHFP(minfp, float32_min)
 598VARITHFP(maxfp, float32_max)
 599#undef VARITHFP
 600
 601#define VARITHFPFMA(suffix, type)                                       \
 602    void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
 603                           ppc_avr_t *b, ppc_avr_t *c)                  \
 604    {                                                                   \
 605        int i;                                                          \
 606        for (i = 0; i < ARRAY_SIZE(r->f); i++) {                        \
 607            r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i],         \
 608                                     type, &env->vec_status);           \
 609        }                                                               \
 610    }
 611VARITHFPFMA(maddfp, 0);
 612VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
 613#undef VARITHFPFMA
 614
 615#define VARITHSAT_CASE(type, op, cvt, element)                          \
 616    {                                                                   \
 617        type result = (type)a->element[i] op (type)b->element[i];       \
 618        r->element[i] = cvt(result, &sat);                              \
 619    }
 620
 621#define VARITHSAT_DO(name, op, optype, cvt, element)                    \
 622    void helper_v##name(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,   \
 623                        ppc_avr_t *b)                                   \
 624    {                                                                   \
 625        int sat = 0;                                                    \
 626        int i;                                                          \
 627                                                                        \
 628        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
 629            switch (sizeof(r->element[0])) {                            \
 630            case 1:                                                     \
 631                VARITHSAT_CASE(optype, op, cvt, element);               \
 632                break;                                                  \
 633            case 2:                                                     \
 634                VARITHSAT_CASE(optype, op, cvt, element);               \
 635                break;                                                  \
 636            case 4:                                                     \
 637                VARITHSAT_CASE(optype, op, cvt, element);               \
 638                break;                                                  \
 639            }                                                           \
 640        }                                                               \
 641        if (sat) {                                                      \
 642            env->vscr |= (1 << VSCR_SAT);                               \
 643        }                                                               \
 644    }
 645#define VARITHSAT_SIGNED(suffix, element, optype, cvt)          \
 646    VARITHSAT_DO(adds##suffix##s, +, optype, cvt, element)      \
 647    VARITHSAT_DO(subs##suffix##s, -, optype, cvt, element)
 648#define VARITHSAT_UNSIGNED(suffix, element, optype, cvt)        \
 649    VARITHSAT_DO(addu##suffix##s, +, optype, cvt, element)      \
 650    VARITHSAT_DO(subu##suffix##s, -, optype, cvt, element)
 651VARITHSAT_SIGNED(b, s8, int16_t, cvtshsb)
 652VARITHSAT_SIGNED(h, s16, int32_t, cvtswsh)
 653VARITHSAT_SIGNED(w, s32, int64_t, cvtsdsw)
 654VARITHSAT_UNSIGNED(b, u8, uint16_t, cvtshub)
 655VARITHSAT_UNSIGNED(h, u16, uint32_t, cvtswuh)
 656VARITHSAT_UNSIGNED(w, u32, uint64_t, cvtsduw)
 657#undef VARITHSAT_CASE
 658#undef VARITHSAT_DO
 659#undef VARITHSAT_SIGNED
 660#undef VARITHSAT_UNSIGNED
 661
 662#define VAVG_DO(name, element, etype)                                   \
 663    void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
 664    {                                                                   \
 665        int i;                                                          \
 666                                                                        \
 667        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
 668            etype x = (etype)a->element[i] + (etype)b->element[i] + 1;  \
 669            r->element[i] = x >> 1;                                     \
 670        }                                                               \
 671    }
 672
 673#define VAVG(type, signed_element, signed_type, unsigned_element,       \
 674             unsigned_type)                                             \
 675    VAVG_DO(avgs##type, signed_element, signed_type)                    \
 676    VAVG_DO(avgu##type, unsigned_element, unsigned_type)
 677VAVG(b, s8, int16_t, u8, uint16_t)
 678VAVG(h, s16, int32_t, u16, uint32_t)
 679VAVG(w, s32, int64_t, u32, uint64_t)
 680#undef VAVG_DO
 681#undef VAVG
 682
 683#define VABSDU_DO(name, element)                                        \
 684void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)           \
 685{                                                                       \
 686    int i;                                                              \
 687                                                                        \
 688    for (i = 0; i < ARRAY_SIZE(r->element); i++) {                      \
 689        r->element[i] = (a->element[i] > b->element[i]) ?               \
 690            (a->element[i] - b->element[i]) :                           \
 691            (b->element[i] - a->element[i]);                            \
 692    }                                                                   \
 693}
 694
 695/* VABSDU - Vector absolute difference unsigned
 696 *   name    - instruction mnemonic suffix (b: byte, h: halfword, w: word)
 697 *   element - element type to access from vector
 698 */
 699#define VABSDU(type, element)                   \
 700    VABSDU_DO(absdu##type, element)
 701VABSDU(b, u8)
 702VABSDU(h, u16)
 703VABSDU(w, u32)
 704#undef VABSDU_DO
 705#undef VABSDU
 706
 707#define VCF(suffix, cvt, element)                                       \
 708    void helper_vcf##suffix(CPUPPCState *env, ppc_avr_t *r,             \
 709                            ppc_avr_t *b, uint32_t uim)                 \
 710    {                                                                   \
 711        int i;                                                          \
 712                                                                        \
 713        for (i = 0; i < ARRAY_SIZE(r->f); i++) {                        \
 714            float32 t = cvt(b->element[i], &env->vec_status);           \
 715            r->f[i] = float32_scalbn(t, -uim, &env->vec_status);        \
 716        }                                                               \
 717    }
 718VCF(ux, uint32_to_float32, u32)
 719VCF(sx, int32_to_float32, s32)
 720#undef VCF
 721
 722#define VCMP_DO(suffix, compare, element, record)                       \
 723    void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r,            \
 724                             ppc_avr_t *a, ppc_avr_t *b)                \
 725    {                                                                   \
 726        uint64_t ones = (uint64_t)-1;                                   \
 727        uint64_t all = ones;                                            \
 728        uint64_t none = 0;                                              \
 729        int i;                                                          \
 730                                                                        \
 731        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
 732            uint64_t result = (a->element[i] compare b->element[i] ?    \
 733                               ones : 0x0);                             \
 734            switch (sizeof(a->element[0])) {                            \
 735            case 8:                                                     \
 736                r->u64[i] = result;                                     \
 737                break;                                                  \
 738            case 4:                                                     \
 739                r->u32[i] = result;                                     \
 740                break;                                                  \
 741            case 2:                                                     \
 742                r->u16[i] = result;                                     \
 743                break;                                                  \
 744            case 1:                                                     \
 745                r->u8[i] = result;                                      \
 746                break;                                                  \
 747            }                                                           \
 748            all &= result;                                              \
 749            none |= result;                                             \
 750        }                                                               \
 751        if (record) {                                                   \
 752            env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1);       \
 753        }                                                               \
 754    }
 755#define VCMP(suffix, compare, element)          \
 756    VCMP_DO(suffix, compare, element, 0)        \
 757    VCMP_DO(suffix##_dot, compare, element, 1)
 758VCMP(equb, ==, u8)
 759VCMP(equh, ==, u16)
 760VCMP(equw, ==, u32)
 761VCMP(equd, ==, u64)
 762VCMP(gtub, >, u8)
 763VCMP(gtuh, >, u16)
 764VCMP(gtuw, >, u32)
 765VCMP(gtud, >, u64)
 766VCMP(gtsb, >, s8)
 767VCMP(gtsh, >, s16)
 768VCMP(gtsw, >, s32)
 769VCMP(gtsd, >, s64)
 770#undef VCMP_DO
 771#undef VCMP
 772
 773#define VCMPNE_DO(suffix, element, etype, cmpzero, record)              \
 774void helper_vcmpne##suffix(CPUPPCState *env, ppc_avr_t *r,              \
 775                            ppc_avr_t *a, ppc_avr_t *b)                 \
 776{                                                                       \
 777    etype ones = (etype)-1;                                             \
 778    etype all = ones;                                                   \
 779    etype result, none = 0;                                             \
 780    int i;                                                              \
 781                                                                        \
 782    for (i = 0; i < ARRAY_SIZE(r->element); i++) {                      \
 783        if (cmpzero) {                                                  \
 784            result = ((a->element[i] == 0)                              \
 785                           || (b->element[i] == 0)                      \
 786                           || (a->element[i] != b->element[i]) ?        \
 787                           ones : 0x0);                                 \
 788        } else {                                                        \
 789            result = (a->element[i] != b->element[i]) ? ones : 0x0;     \
 790        }                                                               \
 791        r->element[i] = result;                                         \
 792        all &= result;                                                  \
 793        none |= result;                                                 \
 794    }                                                                   \
 795    if (record) {                                                       \
 796        env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1);           \
 797    }                                                                   \
 798}
 799
 800/* VCMPNEZ - Vector compare not equal to zero
 801 *   suffix  - instruction mnemonic suffix (b: byte, h: halfword, w: word)
 802 *   element - element type to access from vector
 803 */
 804#define VCMPNE(suffix, element, etype, cmpzero)         \
 805    VCMPNE_DO(suffix, element, etype, cmpzero, 0)       \
 806    VCMPNE_DO(suffix##_dot, element, etype, cmpzero, 1)
 807VCMPNE(zb, u8, uint8_t, 1)
 808VCMPNE(zh, u16, uint16_t, 1)
 809VCMPNE(zw, u32, uint32_t, 1)
 810VCMPNE(b, u8, uint8_t, 0)
 811VCMPNE(h, u16, uint16_t, 0)
 812VCMPNE(w, u32, uint32_t, 0)
 813#undef VCMPNE_DO
 814#undef VCMPNE
 815
 816#define VCMPFP_DO(suffix, compare, order, record)                       \
 817    void helper_vcmp##suffix(CPUPPCState *env, ppc_avr_t *r,            \
 818                             ppc_avr_t *a, ppc_avr_t *b)                \
 819    {                                                                   \
 820        uint32_t ones = (uint32_t)-1;                                   \
 821        uint32_t all = ones;                                            \
 822        uint32_t none = 0;                                              \
 823        int i;                                                          \
 824                                                                        \
 825        for (i = 0; i < ARRAY_SIZE(r->f); i++) {                        \
 826            uint32_t result;                                            \
 827            int rel = float32_compare_quiet(a->f[i], b->f[i],           \
 828                                            &env->vec_status);          \
 829            if (rel == float_relation_unordered) {                      \
 830                result = 0;                                             \
 831            } else if (rel compare order) {                             \
 832                result = ones;                                          \
 833            } else {                                                    \
 834                result = 0;                                             \
 835            }                                                           \
 836            r->u32[i] = result;                                         \
 837            all &= result;                                              \
 838            none |= result;                                             \
 839        }                                                               \
 840        if (record) {                                                   \
 841            env->crf[6] = ((all != 0) << 3) | ((none == 0) << 1);       \
 842        }                                                               \
 843    }
 844#define VCMPFP(suffix, compare, order)          \
 845    VCMPFP_DO(suffix, compare, order, 0)        \
 846    VCMPFP_DO(suffix##_dot, compare, order, 1)
 847VCMPFP(eqfp, ==, float_relation_equal)
 848VCMPFP(gefp, !=, float_relation_less)
 849VCMPFP(gtfp, ==, float_relation_greater)
 850#undef VCMPFP_DO
 851#undef VCMPFP
 852
 853static inline void vcmpbfp_internal(CPUPPCState *env, ppc_avr_t *r,
 854                                    ppc_avr_t *a, ppc_avr_t *b, int record)
 855{
 856    int i;
 857    int all_in = 0;
 858
 859    for (i = 0; i < ARRAY_SIZE(r->f); i++) {
 860        int le_rel = float32_compare_quiet(a->f[i], b->f[i], &env->vec_status);
 861        if (le_rel == float_relation_unordered) {
 862            r->u32[i] = 0xc0000000;
 863            all_in = 1;
 864        } else {
 865            float32 bneg = float32_chs(b->f[i]);
 866            int ge_rel = float32_compare_quiet(a->f[i], bneg, &env->vec_status);
 867            int le = le_rel != float_relation_greater;
 868            int ge = ge_rel != float_relation_less;
 869
 870            r->u32[i] = ((!le) << 31) | ((!ge) << 30);
 871            all_in |= (!le | !ge);
 872        }
 873    }
 874    if (record) {
 875        env->crf[6] = (all_in == 0) << 1;
 876    }
 877}
 878
 879void helper_vcmpbfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
 880{
 881    vcmpbfp_internal(env, r, a, b, 0);
 882}
 883
 884void helper_vcmpbfp_dot(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
 885                        ppc_avr_t *b)
 886{
 887    vcmpbfp_internal(env, r, a, b, 1);
 888}
 889
 890#define VCT(suffix, satcvt, element)                                    \
 891    void helper_vct##suffix(CPUPPCState *env, ppc_avr_t *r,             \
 892                            ppc_avr_t *b, uint32_t uim)                 \
 893    {                                                                   \
 894        int i;                                                          \
 895        int sat = 0;                                                    \
 896        float_status s = env->vec_status;                               \
 897                                                                        \
 898        set_float_rounding_mode(float_round_to_zero, &s);               \
 899        for (i = 0; i < ARRAY_SIZE(r->f); i++) {                        \
 900            if (float32_is_any_nan(b->f[i])) {                          \
 901                r->element[i] = 0;                                      \
 902            } else {                                                    \
 903                float64 t = float32_to_float64(b->f[i], &s);            \
 904                int64_t j;                                              \
 905                                                                        \
 906                t = float64_scalbn(t, uim, &s);                         \
 907                j = float64_to_int64(t, &s);                            \
 908                r->element[i] = satcvt(j, &sat);                        \
 909            }                                                           \
 910        }                                                               \
 911        if (sat) {                                                      \
 912            env->vscr |= (1 << VSCR_SAT);                               \
 913        }                                                               \
 914    }
 915VCT(uxs, cvtsduw, u32)
 916VCT(sxs, cvtsdsw, s32)
 917#undef VCT
 918
 919target_ulong helper_vclzlsbb(ppc_avr_t *r)
 920{
 921    target_ulong count = 0;
 922    int i;
 923    VECTOR_FOR_INORDER_I(i, u8) {
 924        if (r->u8[i] & 0x01) {
 925            break;
 926        }
 927        count++;
 928    }
 929    return count;
 930}
 931
 932target_ulong helper_vctzlsbb(ppc_avr_t *r)
 933{
 934    target_ulong count = 0;
 935    int i;
 936#if defined(HOST_WORDS_BIGENDIAN)
 937    for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
 938#else
 939    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
 940#endif
 941        if (r->u8[i] & 0x01) {
 942            break;
 943        }
 944        count++;
 945    }
 946    return count;
 947}
 948
 949void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
 950                      ppc_avr_t *b, ppc_avr_t *c)
 951{
 952    int sat = 0;
 953    int i;
 954
 955    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
 956        int32_t prod = a->s16[i] * b->s16[i];
 957        int32_t t = (int32_t)c->s16[i] + (prod >> 15);
 958
 959        r->s16[i] = cvtswsh(t, &sat);
 960    }
 961
 962    if (sat) {
 963        env->vscr |= (1 << VSCR_SAT);
 964    }
 965}
 966
 967void helper_vmhraddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
 968                       ppc_avr_t *b, ppc_avr_t *c)
 969{
 970    int sat = 0;
 971    int i;
 972
 973    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
 974        int32_t prod = a->s16[i] * b->s16[i] + 0x00004000;
 975        int32_t t = (int32_t)c->s16[i] + (prod >> 15);
 976        r->s16[i] = cvtswsh(t, &sat);
 977    }
 978
 979    if (sat) {
 980        env->vscr |= (1 << VSCR_SAT);
 981    }
 982}
 983
 984#define VMINMAX_DO(name, compare, element)                              \
 985    void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
 986    {                                                                   \
 987        int i;                                                          \
 988                                                                        \
 989        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
 990            if (a->element[i] compare b->element[i]) {                  \
 991                r->element[i] = b->element[i];                          \
 992            } else {                                                    \
 993                r->element[i] = a->element[i];                          \
 994            }                                                           \
 995        }                                                               \
 996    }
 997#define VMINMAX(suffix, element)                \
 998    VMINMAX_DO(min##suffix, >, element)         \
 999    VMINMAX_DO(max##suffix, <, element)
1000VMINMAX(sb, s8)
1001VMINMAX(sh, s16)
1002VMINMAX(sw, s32)
1003VMINMAX(sd, s64)
1004VMINMAX(ub, u8)
1005VMINMAX(uh, u16)
1006VMINMAX(uw, u32)
1007VMINMAX(ud, u64)
1008#undef VMINMAX_DO
1009#undef VMINMAX
1010
1011void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
1012{
1013    int i;
1014
1015    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1016        int32_t prod = a->s16[i] * b->s16[i];
1017        r->s16[i] = (int16_t) (prod + c->s16[i]);
1018    }
1019}
1020
1021#define VMRG_DO(name, element, highp)                                   \
1022    void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
1023    {                                                                   \
1024        ppc_avr_t result;                                               \
1025        int i;                                                          \
1026        size_t n_elems = ARRAY_SIZE(r->element);                        \
1027                                                                        \
1028        for (i = 0; i < n_elems / 2; i++) {                             \
1029            if (highp) {                                                \
1030                result.element[i*2+HI_IDX] = a->element[i];             \
1031                result.element[i*2+LO_IDX] = b->element[i];             \
1032            } else {                                                    \
1033                result.element[n_elems - i * 2 - (1 + HI_IDX)] =        \
1034                    b->element[n_elems - i - 1];                        \
1035                result.element[n_elems - i * 2 - (1 + LO_IDX)] =        \
1036                    a->element[n_elems - i - 1];                        \
1037            }                                                           \
1038        }                                                               \
1039        *r = result;                                                    \
1040    }
1041#if defined(HOST_WORDS_BIGENDIAN)
1042#define MRGHI 0
1043#define MRGLO 1
1044#else
1045#define MRGHI 1
1046#define MRGLO 0
1047#endif
1048#define VMRG(suffix, element)                   \
1049    VMRG_DO(mrgl##suffix, element, MRGHI)       \
1050    VMRG_DO(mrgh##suffix, element, MRGLO)
1051VMRG(b, u8)
1052VMRG(h, u16)
1053VMRG(w, u32)
1054#undef VMRG_DO
1055#undef VMRG
1056#undef MRGHI
1057#undef MRGLO
1058
1059void helper_vmsummbm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1060                     ppc_avr_t *b, ppc_avr_t *c)
1061{
1062    int32_t prod[16];
1063    int i;
1064
1065    for (i = 0; i < ARRAY_SIZE(r->s8); i++) {
1066        prod[i] = (int32_t)a->s8[i] * b->u8[i];
1067    }
1068
1069    VECTOR_FOR_INORDER_I(i, s32) {
1070        r->s32[i] = c->s32[i] + prod[4 * i] + prod[4 * i + 1] +
1071            prod[4 * i + 2] + prod[4 * i + 3];
1072    }
1073}
1074
1075void helper_vmsumshm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1076                     ppc_avr_t *b, ppc_avr_t *c)
1077{
1078    int32_t prod[8];
1079    int i;
1080
1081    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1082        prod[i] = a->s16[i] * b->s16[i];
1083    }
1084
1085    VECTOR_FOR_INORDER_I(i, s32) {
1086        r->s32[i] = c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1087    }
1088}
1089
1090void helper_vmsumshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1091                     ppc_avr_t *b, ppc_avr_t *c)
1092{
1093    int32_t prod[8];
1094    int i;
1095    int sat = 0;
1096
1097    for (i = 0; i < ARRAY_SIZE(r->s16); i++) {
1098        prod[i] = (int32_t)a->s16[i] * b->s16[i];
1099    }
1100
1101    VECTOR_FOR_INORDER_I(i, s32) {
1102        int64_t t = (int64_t)c->s32[i] + prod[2 * i] + prod[2 * i + 1];
1103
1104        r->u32[i] = cvtsdsw(t, &sat);
1105    }
1106
1107    if (sat) {
1108        env->vscr |= (1 << VSCR_SAT);
1109    }
1110}
1111
1112void helper_vmsumubm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1113                     ppc_avr_t *b, ppc_avr_t *c)
1114{
1115    uint16_t prod[16];
1116    int i;
1117
1118    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1119        prod[i] = a->u8[i] * b->u8[i];
1120    }
1121
1122    VECTOR_FOR_INORDER_I(i, u32) {
1123        r->u32[i] = c->u32[i] + prod[4 * i] + prod[4 * i + 1] +
1124            prod[4 * i + 2] + prod[4 * i + 3];
1125    }
1126}
1127
1128void helper_vmsumuhm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1129                     ppc_avr_t *b, ppc_avr_t *c)
1130{
1131    uint32_t prod[8];
1132    int i;
1133
1134    for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1135        prod[i] = a->u16[i] * b->u16[i];
1136    }
1137
1138    VECTOR_FOR_INORDER_I(i, u32) {
1139        r->u32[i] = c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1140    }
1141}
1142
1143void helper_vmsumuhs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
1144                     ppc_avr_t *b, ppc_avr_t *c)
1145{
1146    uint32_t prod[8];
1147    int i;
1148    int sat = 0;
1149
1150    for (i = 0; i < ARRAY_SIZE(r->u16); i++) {
1151        prod[i] = a->u16[i] * b->u16[i];
1152    }
1153
1154    VECTOR_FOR_INORDER_I(i, s32) {
1155        uint64_t t = (uint64_t)c->u32[i] + prod[2 * i] + prod[2 * i + 1];
1156
1157        r->u32[i] = cvtuduw(t, &sat);
1158    }
1159
1160    if (sat) {
1161        env->vscr |= (1 << VSCR_SAT);
1162    }
1163}
1164
1165#define VMUL_DO(name, mul_element, prod_element, cast, evenp)           \
1166    void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)       \
1167    {                                                                   \
1168        int i;                                                          \
1169                                                                        \
1170        VECTOR_FOR_INORDER_I(i, prod_element) {                         \
1171            if (evenp) {                                                \
1172                r->prod_element[i] =                                    \
1173                    (cast)a->mul_element[i * 2 + HI_IDX] *              \
1174                    (cast)b->mul_element[i * 2 + HI_IDX];               \
1175            } else {                                                    \
1176                r->prod_element[i] =                                    \
1177                    (cast)a->mul_element[i * 2 + LO_IDX] *              \
1178                    (cast)b->mul_element[i * 2 + LO_IDX];               \
1179            }                                                           \
1180        }                                                               \
1181    }
1182#define VMUL(suffix, mul_element, prod_element, cast)            \
1183    VMUL_DO(mule##suffix, mul_element, prod_element, cast, 1)    \
1184    VMUL_DO(mulo##suffix, mul_element, prod_element, cast, 0)
1185VMUL(sb, s8, s16, int16_t)
1186VMUL(sh, s16, s32, int32_t)
1187VMUL(sw, s32, s64, int64_t)
1188VMUL(ub, u8, u16, uint16_t)
1189VMUL(uh, u16, u32, uint32_t)
1190VMUL(uw, u32, u64, uint64_t)
1191#undef VMUL_DO
1192#undef VMUL
1193
1194void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1195                  ppc_avr_t *c)
1196{
1197    ppc_avr_t result;
1198    int i;
1199
1200    VECTOR_FOR_INORDER_I(i, u8) {
1201        int s = c->u8[i] & 0x1f;
1202#if defined(HOST_WORDS_BIGENDIAN)
1203        int index = s & 0xf;
1204#else
1205        int index = 15 - (s & 0xf);
1206#endif
1207
1208        if (s & 0x10) {
1209            result.u8[i] = b->u8[index];
1210        } else {
1211            result.u8[i] = a->u8[index];
1212        }
1213    }
1214    *r = result;
1215}
1216
1217void helper_vpermr(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1218                  ppc_avr_t *c)
1219{
1220    ppc_avr_t result;
1221    int i;
1222
1223    VECTOR_FOR_INORDER_I(i, u8) {
1224        int s = c->u8[i] & 0x1f;
1225#if defined(HOST_WORDS_BIGENDIAN)
1226        int index = 15 - (s & 0xf);
1227#else
1228        int index = s & 0xf;
1229#endif
1230
1231        if (s & 0x10) {
1232            result.u8[i] = a->u8[index];
1233        } else {
1234            result.u8[i] = b->u8[index];
1235        }
1236    }
1237    *r = result;
1238}
1239
1240#if defined(HOST_WORDS_BIGENDIAN)
1241#define VBPERMQ_INDEX(avr, i) ((avr)->u8[(i)])
1242#define VBPERMD_INDEX(i) (i)
1243#define VBPERMQ_DW(index) (((index) & 0x40) != 0)
1244#define EXTRACT_BIT(avr, i, index) (extract64((avr)->u64[i], index, 1))
1245#else
1246#define VBPERMQ_INDEX(avr, i) ((avr)->u8[15-(i)])
1247#define VBPERMD_INDEX(i) (1 - i)
1248#define VBPERMQ_DW(index) (((index) & 0x40) == 0)
1249#define EXTRACT_BIT(avr, i, index) \
1250        (extract64((avr)->u64[1 - i], 63 - index, 1))
1251#endif
1252
1253void helper_vbpermd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1254{
1255    int i, j;
1256    ppc_avr_t result = { .u64 = { 0, 0 } };
1257    VECTOR_FOR_INORDER_I(i, u64) {
1258        for (j = 0; j < 8; j++) {
1259            int index = VBPERMQ_INDEX(b, (i * 8) + j);
1260            if (index < 64 && EXTRACT_BIT(a, i, index)) {
1261                result.u64[VBPERMD_INDEX(i)] |= (0x80 >> j);
1262            }
1263        }
1264    }
1265    *r = result;
1266}
1267
1268void helper_vbpermq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1269{
1270    int i;
1271    uint64_t perm = 0;
1272
1273    VECTOR_FOR_INORDER_I(i, u8) {
1274        int index = VBPERMQ_INDEX(b, i);
1275
1276        if (index < 128) {
1277            uint64_t mask = (1ull << (63-(index & 0x3F)));
1278            if (a->u64[VBPERMQ_DW(index)] & mask) {
1279                perm |= (0x8000 >> i);
1280            }
1281        }
1282    }
1283
1284    r->u64[HI_IDX] = perm;
1285    r->u64[LO_IDX] = 0;
1286}
1287
1288#undef VBPERMQ_INDEX
1289#undef VBPERMQ_DW
1290
1291static const uint64_t VGBBD_MASKS[256] = {
1292    0x0000000000000000ull, /* 00 */
1293    0x0000000000000080ull, /* 01 */
1294    0x0000000000008000ull, /* 02 */
1295    0x0000000000008080ull, /* 03 */
1296    0x0000000000800000ull, /* 04 */
1297    0x0000000000800080ull, /* 05 */
1298    0x0000000000808000ull, /* 06 */
1299    0x0000000000808080ull, /* 07 */
1300    0x0000000080000000ull, /* 08 */
1301    0x0000000080000080ull, /* 09 */
1302    0x0000000080008000ull, /* 0A */
1303    0x0000000080008080ull, /* 0B */
1304    0x0000000080800000ull, /* 0C */
1305    0x0000000080800080ull, /* 0D */
1306    0x0000000080808000ull, /* 0E */
1307    0x0000000080808080ull, /* 0F */
1308    0x0000008000000000ull, /* 10 */
1309    0x0000008000000080ull, /* 11 */
1310    0x0000008000008000ull, /* 12 */
1311    0x0000008000008080ull, /* 13 */
1312    0x0000008000800000ull, /* 14 */
1313    0x0000008000800080ull, /* 15 */
1314    0x0000008000808000ull, /* 16 */
1315    0x0000008000808080ull, /* 17 */
1316    0x0000008080000000ull, /* 18 */
1317    0x0000008080000080ull, /* 19 */
1318    0x0000008080008000ull, /* 1A */
1319    0x0000008080008080ull, /* 1B */
1320    0x0000008080800000ull, /* 1C */
1321    0x0000008080800080ull, /* 1D */
1322    0x0000008080808000ull, /* 1E */
1323    0x0000008080808080ull, /* 1F */
1324    0x0000800000000000ull, /* 20 */
1325    0x0000800000000080ull, /* 21 */
1326    0x0000800000008000ull, /* 22 */
1327    0x0000800000008080ull, /* 23 */
1328    0x0000800000800000ull, /* 24 */
1329    0x0000800000800080ull, /* 25 */
1330    0x0000800000808000ull, /* 26 */
1331    0x0000800000808080ull, /* 27 */
1332    0x0000800080000000ull, /* 28 */
1333    0x0000800080000080ull, /* 29 */
1334    0x0000800080008000ull, /* 2A */
1335    0x0000800080008080ull, /* 2B */
1336    0x0000800080800000ull, /* 2C */
1337    0x0000800080800080ull, /* 2D */
1338    0x0000800080808000ull, /* 2E */
1339    0x0000800080808080ull, /* 2F */
1340    0x0000808000000000ull, /* 30 */
1341    0x0000808000000080ull, /* 31 */
1342    0x0000808000008000ull, /* 32 */
1343    0x0000808000008080ull, /* 33 */
1344    0x0000808000800000ull, /* 34 */
1345    0x0000808000800080ull, /* 35 */
1346    0x0000808000808000ull, /* 36 */
1347    0x0000808000808080ull, /* 37 */
1348    0x0000808080000000ull, /* 38 */
1349    0x0000808080000080ull, /* 39 */
1350    0x0000808080008000ull, /* 3A */
1351    0x0000808080008080ull, /* 3B */
1352    0x0000808080800000ull, /* 3C */
1353    0x0000808080800080ull, /* 3D */
1354    0x0000808080808000ull, /* 3E */
1355    0x0000808080808080ull, /* 3F */
1356    0x0080000000000000ull, /* 40 */
1357    0x0080000000000080ull, /* 41 */
1358    0x0080000000008000ull, /* 42 */
1359    0x0080000000008080ull, /* 43 */
1360    0x0080000000800000ull, /* 44 */
1361    0x0080000000800080ull, /* 45 */
1362    0x0080000000808000ull, /* 46 */
1363    0x0080000000808080ull, /* 47 */
1364    0x0080000080000000ull, /* 48 */
1365    0x0080000080000080ull, /* 49 */
1366    0x0080000080008000ull, /* 4A */
1367    0x0080000080008080ull, /* 4B */
1368    0x0080000080800000ull, /* 4C */
1369    0x0080000080800080ull, /* 4D */
1370    0x0080000080808000ull, /* 4E */
1371    0x0080000080808080ull, /* 4F */
1372    0x0080008000000000ull, /* 50 */
1373    0x0080008000000080ull, /* 51 */
1374    0x0080008000008000ull, /* 52 */
1375    0x0080008000008080ull, /* 53 */
1376    0x0080008000800000ull, /* 54 */
1377    0x0080008000800080ull, /* 55 */
1378    0x0080008000808000ull, /* 56 */
1379    0x0080008000808080ull, /* 57 */
1380    0x0080008080000000ull, /* 58 */
1381    0x0080008080000080ull, /* 59 */
1382    0x0080008080008000ull, /* 5A */
1383    0x0080008080008080ull, /* 5B */
1384    0x0080008080800000ull, /* 5C */
1385    0x0080008080800080ull, /* 5D */
1386    0x0080008080808000ull, /* 5E */
1387    0x0080008080808080ull, /* 5F */
1388    0x0080800000000000ull, /* 60 */
1389    0x0080800000000080ull, /* 61 */
1390    0x0080800000008000ull, /* 62 */
1391    0x0080800000008080ull, /* 63 */
1392    0x0080800000800000ull, /* 64 */
1393    0x0080800000800080ull, /* 65 */
1394    0x0080800000808000ull, /* 66 */
1395    0x0080800000808080ull, /* 67 */
1396    0x0080800080000000ull, /* 68 */
1397    0x0080800080000080ull, /* 69 */
1398    0x0080800080008000ull, /* 6A */
1399    0x0080800080008080ull, /* 6B */
1400    0x0080800080800000ull, /* 6C */
1401    0x0080800080800080ull, /* 6D */
1402    0x0080800080808000ull, /* 6E */
1403    0x0080800080808080ull, /* 6F */
1404    0x0080808000000000ull, /* 70 */
1405    0x0080808000000080ull, /* 71 */
1406    0x0080808000008000ull, /* 72 */
1407    0x0080808000008080ull, /* 73 */
1408    0x0080808000800000ull, /* 74 */
1409    0x0080808000800080ull, /* 75 */
1410    0x0080808000808000ull, /* 76 */
1411    0x0080808000808080ull, /* 77 */
1412    0x0080808080000000ull, /* 78 */
1413    0x0080808080000080ull, /* 79 */
1414    0x0080808080008000ull, /* 7A */
1415    0x0080808080008080ull, /* 7B */
1416    0x0080808080800000ull, /* 7C */
1417    0x0080808080800080ull, /* 7D */
1418    0x0080808080808000ull, /* 7E */
1419    0x0080808080808080ull, /* 7F */
1420    0x8000000000000000ull, /* 80 */
1421    0x8000000000000080ull, /* 81 */
1422    0x8000000000008000ull, /* 82 */
1423    0x8000000000008080ull, /* 83 */
1424    0x8000000000800000ull, /* 84 */
1425    0x8000000000800080ull, /* 85 */
1426    0x8000000000808000ull, /* 86 */
1427    0x8000000000808080ull, /* 87 */
1428    0x8000000080000000ull, /* 88 */
1429    0x8000000080000080ull, /* 89 */
1430    0x8000000080008000ull, /* 8A */
1431    0x8000000080008080ull, /* 8B */
1432    0x8000000080800000ull, /* 8C */
1433    0x8000000080800080ull, /* 8D */
1434    0x8000000080808000ull, /* 8E */
1435    0x8000000080808080ull, /* 8F */
1436    0x8000008000000000ull, /* 90 */
1437    0x8000008000000080ull, /* 91 */
1438    0x8000008000008000ull, /* 92 */
1439    0x8000008000008080ull, /* 93 */
1440    0x8000008000800000ull, /* 94 */
1441    0x8000008000800080ull, /* 95 */
1442    0x8000008000808000ull, /* 96 */
1443    0x8000008000808080ull, /* 97 */
1444    0x8000008080000000ull, /* 98 */
1445    0x8000008080000080ull, /* 99 */
1446    0x8000008080008000ull, /* 9A */
1447    0x8000008080008080ull, /* 9B */
1448    0x8000008080800000ull, /* 9C */
1449    0x8000008080800080ull, /* 9D */
1450    0x8000008080808000ull, /* 9E */
1451    0x8000008080808080ull, /* 9F */
1452    0x8000800000000000ull, /* A0 */
1453    0x8000800000000080ull, /* A1 */
1454    0x8000800000008000ull, /* A2 */
1455    0x8000800000008080ull, /* A3 */
1456    0x8000800000800000ull, /* A4 */
1457    0x8000800000800080ull, /* A5 */
1458    0x8000800000808000ull, /* A6 */
1459    0x8000800000808080ull, /* A7 */
1460    0x8000800080000000ull, /* A8 */
1461    0x8000800080000080ull, /* A9 */
1462    0x8000800080008000ull, /* AA */
1463    0x8000800080008080ull, /* AB */
1464    0x8000800080800000ull, /* AC */
1465    0x8000800080800080ull, /* AD */
1466    0x8000800080808000ull, /* AE */
1467    0x8000800080808080ull, /* AF */
1468    0x8000808000000000ull, /* B0 */
1469    0x8000808000000080ull, /* B1 */
1470    0x8000808000008000ull, /* B2 */
1471    0x8000808000008080ull, /* B3 */
1472    0x8000808000800000ull, /* B4 */
1473    0x8000808000800080ull, /* B5 */
1474    0x8000808000808000ull, /* B6 */
1475    0x8000808000808080ull, /* B7 */
1476    0x8000808080000000ull, /* B8 */
1477    0x8000808080000080ull, /* B9 */
1478    0x8000808080008000ull, /* BA */
1479    0x8000808080008080ull, /* BB */
1480    0x8000808080800000ull, /* BC */
1481    0x8000808080800080ull, /* BD */
1482    0x8000808080808000ull, /* BE */
1483    0x8000808080808080ull, /* BF */
1484    0x8080000000000000ull, /* C0 */
1485    0x8080000000000080ull, /* C1 */
1486    0x8080000000008000ull, /* C2 */
1487    0x8080000000008080ull, /* C3 */
1488    0x8080000000800000ull, /* C4 */
1489    0x8080000000800080ull, /* C5 */
1490    0x8080000000808000ull, /* C6 */
1491    0x8080000000808080ull, /* C7 */
1492    0x8080000080000000ull, /* C8 */
1493    0x8080000080000080ull, /* C9 */
1494    0x8080000080008000ull, /* CA */
1495    0x8080000080008080ull, /* CB */
1496    0x8080000080800000ull, /* CC */
1497    0x8080000080800080ull, /* CD */
1498    0x8080000080808000ull, /* CE */
1499    0x8080000080808080ull, /* CF */
1500    0x8080008000000000ull, /* D0 */
1501    0x8080008000000080ull, /* D1 */
1502    0x8080008000008000ull, /* D2 */
1503    0x8080008000008080ull, /* D3 */
1504    0x8080008000800000ull, /* D4 */
1505    0x8080008000800080ull, /* D5 */
1506    0x8080008000808000ull, /* D6 */
1507    0x8080008000808080ull, /* D7 */
1508    0x8080008080000000ull, /* D8 */
1509    0x8080008080000080ull, /* D9 */
1510    0x8080008080008000ull, /* DA */
1511    0x8080008080008080ull, /* DB */
1512    0x8080008080800000ull, /* DC */
1513    0x8080008080800080ull, /* DD */
1514    0x8080008080808000ull, /* DE */
1515    0x8080008080808080ull, /* DF */
1516    0x8080800000000000ull, /* E0 */
1517    0x8080800000000080ull, /* E1 */
1518    0x8080800000008000ull, /* E2 */
1519    0x8080800000008080ull, /* E3 */
1520    0x8080800000800000ull, /* E4 */
1521    0x8080800000800080ull, /* E5 */
1522    0x8080800000808000ull, /* E6 */
1523    0x8080800000808080ull, /* E7 */
1524    0x8080800080000000ull, /* E8 */
1525    0x8080800080000080ull, /* E9 */
1526    0x8080800080008000ull, /* EA */
1527    0x8080800080008080ull, /* EB */
1528    0x8080800080800000ull, /* EC */
1529    0x8080800080800080ull, /* ED */
1530    0x8080800080808000ull, /* EE */
1531    0x8080800080808080ull, /* EF */
1532    0x8080808000000000ull, /* F0 */
1533    0x8080808000000080ull, /* F1 */
1534    0x8080808000008000ull, /* F2 */
1535    0x8080808000008080ull, /* F3 */
1536    0x8080808000800000ull, /* F4 */
1537    0x8080808000800080ull, /* F5 */
1538    0x8080808000808000ull, /* F6 */
1539    0x8080808000808080ull, /* F7 */
1540    0x8080808080000000ull, /* F8 */
1541    0x8080808080000080ull, /* F9 */
1542    0x8080808080008000ull, /* FA */
1543    0x8080808080008080ull, /* FB */
1544    0x8080808080800000ull, /* FC */
1545    0x8080808080800080ull, /* FD */
1546    0x8080808080808000ull, /* FE */
1547    0x8080808080808080ull, /* FF */
1548};
1549
1550void helper_vgbbd(ppc_avr_t *r, ppc_avr_t *b)
1551{
1552    int i;
1553    uint64_t t[2] = { 0, 0 };
1554
1555    VECTOR_FOR_INORDER_I(i, u8) {
1556#if defined(HOST_WORDS_BIGENDIAN)
1557        t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (i & 7);
1558#else
1559        t[i>>3] |= VGBBD_MASKS[b->u8[i]] >> (7-(i & 7));
1560#endif
1561    }
1562
1563    r->u64[0] = t[0];
1564    r->u64[1] = t[1];
1565}
1566
1567#define PMSUM(name, srcfld, trgfld, trgtyp)                   \
1568void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)  \
1569{                                                             \
1570    int i, j;                                                 \
1571    trgtyp prod[sizeof(ppc_avr_t)/sizeof(a->srcfld[0])];      \
1572                                                              \
1573    VECTOR_FOR_INORDER_I(i, srcfld) {                         \
1574        prod[i] = 0;                                          \
1575        for (j = 0; j < sizeof(a->srcfld[0]) * 8; j++) {      \
1576            if (a->srcfld[i] & (1ull<<j)) {                   \
1577                prod[i] ^= ((trgtyp)b->srcfld[i] << j);       \
1578            }                                                 \
1579        }                                                     \
1580    }                                                         \
1581                                                              \
1582    VECTOR_FOR_INORDER_I(i, trgfld) {                         \
1583        r->trgfld[i] = prod[2*i] ^ prod[2*i+1];               \
1584    }                                                         \
1585}
1586
1587PMSUM(vpmsumb, u8, u16, uint16_t)
1588PMSUM(vpmsumh, u16, u32, uint32_t)
1589PMSUM(vpmsumw, u32, u64, uint64_t)
1590
1591void helper_vpmsumd(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1592{
1593
1594#ifdef CONFIG_INT128
1595    int i, j;
1596    __uint128_t prod[2];
1597
1598    VECTOR_FOR_INORDER_I(i, u64) {
1599        prod[i] = 0;
1600        for (j = 0; j < 64; j++) {
1601            if (a->u64[i] & (1ull<<j)) {
1602                prod[i] ^= (((__uint128_t)b->u64[i]) << j);
1603            }
1604        }
1605    }
1606
1607    r->u128 = prod[0] ^ prod[1];
1608
1609#else
1610    int i, j;
1611    ppc_avr_t prod[2];
1612
1613    VECTOR_FOR_INORDER_I(i, u64) {
1614        prod[i].u64[LO_IDX] = prod[i].u64[HI_IDX] = 0;
1615        for (j = 0; j < 64; j++) {
1616            if (a->u64[i] & (1ull<<j)) {
1617                ppc_avr_t bshift;
1618                if (j == 0) {
1619                    bshift.u64[HI_IDX] = 0;
1620                    bshift.u64[LO_IDX] = b->u64[i];
1621                } else {
1622                    bshift.u64[HI_IDX] = b->u64[i] >> (64-j);
1623                    bshift.u64[LO_IDX] = b->u64[i] << j;
1624                }
1625                prod[i].u64[LO_IDX] ^= bshift.u64[LO_IDX];
1626                prod[i].u64[HI_IDX] ^= bshift.u64[HI_IDX];
1627            }
1628        }
1629    }
1630
1631    r->u64[LO_IDX] = prod[0].u64[LO_IDX] ^ prod[1].u64[LO_IDX];
1632    r->u64[HI_IDX] = prod[0].u64[HI_IDX] ^ prod[1].u64[HI_IDX];
1633#endif
1634}
1635
1636
1637#if defined(HOST_WORDS_BIGENDIAN)
1638#define PKBIG 1
1639#else
1640#define PKBIG 0
1641#endif
1642void helper_vpkpx(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1643{
1644    int i, j;
1645    ppc_avr_t result;
1646#if defined(HOST_WORDS_BIGENDIAN)
1647    const ppc_avr_t *x[2] = { a, b };
1648#else
1649    const ppc_avr_t *x[2] = { b, a };
1650#endif
1651
1652    VECTOR_FOR_INORDER_I(i, u64) {
1653        VECTOR_FOR_INORDER_I(j, u32) {
1654            uint32_t e = x[i]->u32[j];
1655
1656            result.u16[4*i+j] = (((e >> 9) & 0xfc00) |
1657                                 ((e >> 6) & 0x3e0) |
1658                                 ((e >> 3) & 0x1f));
1659        }
1660    }
1661    *r = result;
1662}
1663
1664#define VPK(suffix, from, to, cvt, dosat)                               \
1665    void helper_vpk##suffix(CPUPPCState *env, ppc_avr_t *r,             \
1666                            ppc_avr_t *a, ppc_avr_t *b)                 \
1667    {                                                                   \
1668        int i;                                                          \
1669        int sat = 0;                                                    \
1670        ppc_avr_t result;                                               \
1671        ppc_avr_t *a0 = PKBIG ? a : b;                                  \
1672        ppc_avr_t *a1 = PKBIG ? b : a;                                  \
1673                                                                        \
1674        VECTOR_FOR_INORDER_I(i, from) {                                 \
1675            result.to[i] = cvt(a0->from[i], &sat);                      \
1676            result.to[i+ARRAY_SIZE(r->from)] = cvt(a1->from[i], &sat);  \
1677        }                                                               \
1678        *r = result;                                                    \
1679        if (dosat && sat) {                                             \
1680            env->vscr |= (1 << VSCR_SAT);                               \
1681        }                                                               \
1682    }
1683#define I(x, y) (x)
1684VPK(shss, s16, s8, cvtshsb, 1)
1685VPK(shus, s16, u8, cvtshub, 1)
1686VPK(swss, s32, s16, cvtswsh, 1)
1687VPK(swus, s32, u16, cvtswuh, 1)
1688VPK(sdss, s64, s32, cvtsdsw, 1)
1689VPK(sdus, s64, u32, cvtsduw, 1)
1690VPK(uhus, u16, u8, cvtuhub, 1)
1691VPK(uwus, u32, u16, cvtuwuh, 1)
1692VPK(udus, u64, u32, cvtuduw, 1)
1693VPK(uhum, u16, u8, I, 0)
1694VPK(uwum, u32, u16, I, 0)
1695VPK(udum, u64, u32, I, 0)
1696#undef I
1697#undef VPK
1698#undef PKBIG
1699
1700void helper_vrefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1701{
1702    int i;
1703
1704    for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1705        r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status);
1706    }
1707}
1708
1709#define VRFI(suffix, rounding)                                  \
1710    void helper_vrfi##suffix(CPUPPCState *env, ppc_avr_t *r,    \
1711                             ppc_avr_t *b)                      \
1712    {                                                           \
1713        int i;                                                  \
1714        float_status s = env->vec_status;                       \
1715                                                                \
1716        set_float_rounding_mode(rounding, &s);                  \
1717        for (i = 0; i < ARRAY_SIZE(r->f); i++) {                \
1718            r->f[i] = float32_round_to_int (b->f[i], &s);       \
1719        }                                                       \
1720    }
1721VRFI(n, float_round_nearest_even)
1722VRFI(m, float_round_down)
1723VRFI(p, float_round_up)
1724VRFI(z, float_round_to_zero)
1725#undef VRFI
1726
1727#define VROTATE(suffix, element, mask)                                  \
1728    void helper_vrl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)   \
1729    {                                                                   \
1730        int i;                                                          \
1731                                                                        \
1732        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
1733            unsigned int shift = b->element[i] & mask;                  \
1734            r->element[i] = (a->element[i] << shift) |                  \
1735                (a->element[i] >> (sizeof(a->element[0]) * 8 - shift)); \
1736        }                                                               \
1737    }
1738VROTATE(b, u8, 0x7)
1739VROTATE(h, u16, 0xF)
1740VROTATE(w, u32, 0x1F)
1741VROTATE(d, u64, 0x3F)
1742#undef VROTATE
1743
1744void helper_vrsqrtefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1745{
1746    int i;
1747
1748    for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1749        float32 t = float32_sqrt(b->f[i], &env->vec_status);
1750
1751        r->f[i] = float32_div(float32_one, t, &env->vec_status);
1752    }
1753}
1754
1755#define VRLMI(name, size, element, insert)                            \
1756void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)          \
1757{                                                                     \
1758    int i;                                                            \
1759    for (i = 0; i < ARRAY_SIZE(r->element); i++) {                    \
1760        uint##size##_t src1 = a->element[i];                          \
1761        uint##size##_t src2 = b->element[i];                          \
1762        uint##size##_t src3 = r->element[i];                          \
1763        uint##size##_t begin, end, shift, mask, rot_val;              \
1764                                                                      \
1765        shift = extract##size(src2, 0, 6);                            \
1766        end   = extract##size(src2, 8, 6);                            \
1767        begin = extract##size(src2, 16, 6);                           \
1768        rot_val = rol##size(src1, shift);                             \
1769        mask = mask_u##size(begin, end);                              \
1770        if (insert) {                                                 \
1771            r->element[i] = (rot_val & mask) | (src3 & ~mask);        \
1772        } else {                                                      \
1773            r->element[i] = (rot_val & mask);                         \
1774        }                                                             \
1775    }                                                                 \
1776}
1777
1778VRLMI(vrldmi, 64, u64, 1);
1779VRLMI(vrlwmi, 32, u32, 1);
1780VRLMI(vrldnm, 64, u64, 0);
1781VRLMI(vrlwnm, 32, u32, 0);
1782
1783void helper_vsel(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
1784                 ppc_avr_t *c)
1785{
1786    r->u64[0] = (a->u64[0] & ~c->u64[0]) | (b->u64[0] & c->u64[0]);
1787    r->u64[1] = (a->u64[1] & ~c->u64[1]) | (b->u64[1] & c->u64[1]);
1788}
1789
1790void helper_vexptefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1791{
1792    int i;
1793
1794    for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1795        r->f[i] = float32_exp2(b->f[i], &env->vec_status);
1796    }
1797}
1798
1799void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *b)
1800{
1801    int i;
1802
1803    for (i = 0; i < ARRAY_SIZE(r->f); i++) {
1804        r->f[i] = float32_log2(b->f[i], &env->vec_status);
1805    }
1806}
1807
1808/* The specification says that the results are undefined if all of the
1809 * shift counts are not identical.  We check to make sure that they are
1810 * to conform to what real hardware appears to do.  */
1811#define VSHIFT(suffix, leftp)                                           \
1812    void helper_vs##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)    \
1813    {                                                                   \
1814        int shift = b->u8[LO_IDX*15] & 0x7;                             \
1815        int doit = 1;                                                   \
1816        int i;                                                          \
1817                                                                        \
1818        for (i = 0; i < ARRAY_SIZE(r->u8); i++) {                       \
1819            doit = doit && ((b->u8[i] & 0x7) == shift);                 \
1820        }                                                               \
1821        if (doit) {                                                     \
1822            if (shift == 0) {                                           \
1823                *r = *a;                                                \
1824            } else if (leftp) {                                         \
1825                uint64_t carry = a->u64[LO_IDX] >> (64 - shift);        \
1826                                                                        \
1827                r->u64[HI_IDX] = (a->u64[HI_IDX] << shift) | carry;     \
1828                r->u64[LO_IDX] = a->u64[LO_IDX] << shift;               \
1829            } else {                                                    \
1830                uint64_t carry = a->u64[HI_IDX] << (64 - shift);        \
1831                                                                        \
1832                r->u64[LO_IDX] = (a->u64[LO_IDX] >> shift) | carry;     \
1833                r->u64[HI_IDX] = a->u64[HI_IDX] >> shift;               \
1834            }                                                           \
1835        }                                                               \
1836    }
1837VSHIFT(l, 1)
1838VSHIFT(r, 0)
1839#undef VSHIFT
1840
1841#define VSL(suffix, element, mask)                                      \
1842    void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)   \
1843    {                                                                   \
1844        int i;                                                          \
1845                                                                        \
1846        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
1847            unsigned int shift = b->element[i] & mask;                  \
1848                                                                        \
1849            r->element[i] = a->element[i] << shift;                     \
1850        }                                                               \
1851    }
1852VSL(b, u8, 0x7)
1853VSL(h, u16, 0x0F)
1854VSL(w, u32, 0x1F)
1855VSL(d, u64, 0x3F)
1856#undef VSL
1857
1858void helper_vslv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1859{
1860    int i;
1861    unsigned int shift, bytes, size;
1862
1863    size = ARRAY_SIZE(r->u8);
1864    for (i = 0; i < size; i++) {
1865        shift = b->u8[i] & 0x7;             /* extract shift value */
1866        bytes = (a->u8[i] << 8) +             /* extract adjacent bytes */
1867            (((i + 1) < size) ? a->u8[i + 1] : 0);
1868        r->u8[i] = (bytes << shift) >> 8;   /* shift and store result */
1869    }
1870}
1871
1872void helper_vsrv(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1873{
1874    int i;
1875    unsigned int shift, bytes;
1876
1877    /* Use reverse order, as destination and source register can be same. Its
1878     * being modified in place saving temporary, reverse order will guarantee
1879     * that computed result is not fed back.
1880     */
1881    for (i = ARRAY_SIZE(r->u8) - 1; i >= 0; i--) {
1882        shift = b->u8[i] & 0x7;                 /* extract shift value */
1883        bytes = ((i ? a->u8[i - 1] : 0) << 8) + a->u8[i];
1884                                                /* extract adjacent bytes */
1885        r->u8[i] = (bytes >> shift) & 0xFF;     /* shift and store result */
1886    }
1887}
1888
1889void helper_vsldoi(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t shift)
1890{
1891    int sh = shift & 0xf;
1892    int i;
1893    ppc_avr_t result;
1894
1895#if defined(HOST_WORDS_BIGENDIAN)
1896    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1897        int index = sh + i;
1898        if (index > 0xf) {
1899            result.u8[i] = b->u8[index - 0x10];
1900        } else {
1901            result.u8[i] = a->u8[index];
1902        }
1903    }
1904#else
1905    for (i = 0; i < ARRAY_SIZE(r->u8); i++) {
1906        int index = (16 - sh) + i;
1907        if (index > 0xf) {
1908            result.u8[i] = a->u8[index - 0x10];
1909        } else {
1910            result.u8[i] = b->u8[index];
1911        }
1912    }
1913#endif
1914    *r = result;
1915}
1916
1917void helper_vslo(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
1918{
1919    int sh = (b->u8[LO_IDX*0xf] >> 3) & 0xf;
1920
1921#if defined(HOST_WORDS_BIGENDIAN)
1922    memmove(&r->u8[0], &a->u8[sh], 16 - sh);
1923    memset(&r->u8[16-sh], 0, sh);
1924#else
1925    memmove(&r->u8[sh], &a->u8[0], 16 - sh);
1926    memset(&r->u8[0], 0, sh);
1927#endif
1928}
1929
1930/* Experimental testing shows that hardware masks the immediate.  */
1931#define _SPLAT_MASKED(element) (splat & (ARRAY_SIZE(r->element) - 1))
1932#if defined(HOST_WORDS_BIGENDIAN)
1933#define SPLAT_ELEMENT(element) _SPLAT_MASKED(element)
1934#else
1935#define SPLAT_ELEMENT(element)                                  \
1936    (ARRAY_SIZE(r->element) - 1 - _SPLAT_MASKED(element))
1937#endif
1938#define VSPLT(suffix, element)                                          \
1939    void helper_vsplt##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t splat) \
1940    {                                                                   \
1941        uint32_t s = b->element[SPLAT_ELEMENT(element)];                \
1942        int i;                                                          \
1943                                                                        \
1944        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
1945            r->element[i] = s;                                          \
1946        }                                                               \
1947    }
1948VSPLT(b, u8)
1949VSPLT(h, u16)
1950VSPLT(w, u32)
1951#undef VSPLT
1952#undef SPLAT_ELEMENT
1953#undef _SPLAT_MASKED
1954#if defined(HOST_WORDS_BIGENDIAN)
1955#define VINSERT(suffix, element)                                            \
1956    void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1957    {                                                                       \
1958        memmove(&r->u8[index], &b->u8[8 - sizeof(r->element)],              \
1959               sizeof(r->element[0]));                                      \
1960    }
1961#else
1962#define VINSERT(suffix, element)                                            \
1963    void helper_vinsert##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1964    {                                                                       \
1965        uint32_t d = (16 - index) - sizeof(r->element[0]);                  \
1966        memmove(&r->u8[d], &b->u8[8], sizeof(r->element[0]));               \
1967    }
1968#endif
1969VINSERT(b, u8)
1970VINSERT(h, u16)
1971VINSERT(w, u32)
1972VINSERT(d, u64)
1973#undef VINSERT
1974#if defined(HOST_WORDS_BIGENDIAN)
1975#define VEXTRACT(suffix, element)                                            \
1976    void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1977    {                                                                        \
1978        uint32_t es = sizeof(r->element[0]);                                 \
1979        memmove(&r->u8[8 - es], &b->u8[index], es);                          \
1980        memset(&r->u8[8], 0, 8);                                             \
1981        memset(&r->u8[0], 0, 8 - es);                                        \
1982    }
1983#else
1984#define VEXTRACT(suffix, element)                                            \
1985    void helper_vextract##suffix(ppc_avr_t *r, ppc_avr_t *b, uint32_t index) \
1986    {                                                                        \
1987        uint32_t es = sizeof(r->element[0]);                                 \
1988        uint32_t s = (16 - index) - es;                                      \
1989        memmove(&r->u8[8], &b->u8[s], es);                                   \
1990        memset(&r->u8[0], 0, 8);                                             \
1991        memset(&r->u8[8 + es], 0, 8 - es);                                   \
1992    }
1993#endif
1994VEXTRACT(ub, u8)
1995VEXTRACT(uh, u16)
1996VEXTRACT(uw, u32)
1997VEXTRACT(d, u64)
1998#undef VEXTRACT
1999
2000#define VEXT_SIGNED(name, element, mask, cast, recast)              \
2001void helper_##name(ppc_avr_t *r, ppc_avr_t *b)                      \
2002{                                                                   \
2003    int i;                                                          \
2004    VECTOR_FOR_INORDER_I(i, element) {                              \
2005        r->element[i] = (recast)((cast)(b->element[i] & mask));     \
2006    }                                                               \
2007}
2008VEXT_SIGNED(vextsb2w, s32, UINT8_MAX, int8_t, int32_t)
2009VEXT_SIGNED(vextsb2d, s64, UINT8_MAX, int8_t, int64_t)
2010VEXT_SIGNED(vextsh2w, s32, UINT16_MAX, int16_t, int32_t)
2011VEXT_SIGNED(vextsh2d, s64, UINT16_MAX, int16_t, int64_t)
2012VEXT_SIGNED(vextsw2d, s64, UINT32_MAX, int32_t, int64_t)
2013#undef VEXT_SIGNED
2014
2015#define VNEG(name, element)                                         \
2016void helper_##name(ppc_avr_t *r, ppc_avr_t *b)                      \
2017{                                                                   \
2018    int i;                                                          \
2019    VECTOR_FOR_INORDER_I(i, element) {                              \
2020        r->element[i] = -b->element[i];                             \
2021    }                                                               \
2022}
2023VNEG(vnegw, s32)
2024VNEG(vnegd, s64)
2025#undef VNEG
2026
2027#define VSPLTI(suffix, element, splat_type)                     \
2028    void helper_vspltis##suffix(ppc_avr_t *r, uint32_t splat)   \
2029    {                                                           \
2030        splat_type x = (int8_t)(splat << 3) >> 3;               \
2031        int i;                                                  \
2032                                                                \
2033        for (i = 0; i < ARRAY_SIZE(r->element); i++) {          \
2034            r->element[i] = x;                                  \
2035        }                                                       \
2036    }
2037VSPLTI(b, s8, int8_t)
2038VSPLTI(h, s16, int16_t)
2039VSPLTI(w, s32, int32_t)
2040#undef VSPLTI
2041
2042#define VSR(suffix, element, mask)                                      \
2043    void helper_vsr##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)   \
2044    {                                                                   \
2045        int i;                                                          \
2046                                                                        \
2047        for (i = 0; i < ARRAY_SIZE(r->element); i++) {                  \
2048            unsigned int shift = b->element[i] & mask;                  \
2049            r->element[i] = a->element[i] >> shift;                     \
2050        }                                                               \
2051    }
2052VSR(ab, s8, 0x7)
2053VSR(ah, s16, 0xF)
2054VSR(aw, s32, 0x1F)
2055VSR(ad, s64, 0x3F)
2056VSR(b, u8, 0x7)
2057VSR(h, u16, 0xF)
2058VSR(w, u32, 0x1F)
2059VSR(d, u64, 0x3F)
2060#undef VSR
2061
2062void helper_vsro(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2063{
2064    int sh = (b->u8[LO_IDX * 0xf] >> 3) & 0xf;
2065
2066#if defined(HOST_WORDS_BIGENDIAN)
2067    memmove(&r->u8[sh], &a->u8[0], 16 - sh);
2068    memset(&r->u8[0], 0, sh);
2069#else
2070    memmove(&r->u8[0], &a->u8[sh], 16 - sh);
2071    memset(&r->u8[16 - sh], 0, sh);
2072#endif
2073}
2074
2075void helper_vsubcuw(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2076{
2077    int i;
2078
2079    for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2080        r->u32[i] = a->u32[i] >= b->u32[i];
2081    }
2082}
2083
2084void helper_vsumsws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2085{
2086    int64_t t;
2087    int i, upper;
2088    ppc_avr_t result;
2089    int sat = 0;
2090
2091#if defined(HOST_WORDS_BIGENDIAN)
2092    upper = ARRAY_SIZE(r->s32)-1;
2093#else
2094    upper = 0;
2095#endif
2096    t = (int64_t)b->s32[upper];
2097    for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2098        t += a->s32[i];
2099        result.s32[i] = 0;
2100    }
2101    result.s32[upper] = cvtsdsw(t, &sat);
2102    *r = result;
2103
2104    if (sat) {
2105        env->vscr |= (1 << VSCR_SAT);
2106    }
2107}
2108
2109void helper_vsum2sws(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2110{
2111    int i, j, upper;
2112    ppc_avr_t result;
2113    int sat = 0;
2114
2115#if defined(HOST_WORDS_BIGENDIAN)
2116    upper = 1;
2117#else
2118    upper = 0;
2119#endif
2120    for (i = 0; i < ARRAY_SIZE(r->u64); i++) {
2121        int64_t t = (int64_t)b->s32[upper + i * 2];
2122
2123        result.u64[i] = 0;
2124        for (j = 0; j < ARRAY_SIZE(r->u64); j++) {
2125            t += a->s32[2 * i + j];
2126        }
2127        result.s32[upper + i * 2] = cvtsdsw(t, &sat);
2128    }
2129
2130    *r = result;
2131    if (sat) {
2132        env->vscr |= (1 << VSCR_SAT);
2133    }
2134}
2135
2136void helper_vsum4sbs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2137{
2138    int i, j;
2139    int sat = 0;
2140
2141    for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2142        int64_t t = (int64_t)b->s32[i];
2143
2144        for (j = 0; j < ARRAY_SIZE(r->s32); j++) {
2145            t += a->s8[4 * i + j];
2146        }
2147        r->s32[i] = cvtsdsw(t, &sat);
2148    }
2149
2150    if (sat) {
2151        env->vscr |= (1 << VSCR_SAT);
2152    }
2153}
2154
2155void helper_vsum4shs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2156{
2157    int sat = 0;
2158    int i;
2159
2160    for (i = 0; i < ARRAY_SIZE(r->s32); i++) {
2161        int64_t t = (int64_t)b->s32[i];
2162
2163        t += a->s16[2 * i] + a->s16[2 * i + 1];
2164        r->s32[i] = cvtsdsw(t, &sat);
2165    }
2166
2167    if (sat) {
2168        env->vscr |= (1 << VSCR_SAT);
2169    }
2170}
2171
2172void helper_vsum4ubs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2173{
2174    int i, j;
2175    int sat = 0;
2176
2177    for (i = 0; i < ARRAY_SIZE(r->u32); i++) {
2178        uint64_t t = (uint64_t)b->u32[i];
2179
2180        for (j = 0; j < ARRAY_SIZE(r->u32); j++) {
2181            t += a->u8[4 * i + j];
2182        }
2183        r->u32[i] = cvtuduw(t, &sat);
2184    }
2185
2186    if (sat) {
2187        env->vscr |= (1 << VSCR_SAT);
2188    }
2189}
2190
2191#if defined(HOST_WORDS_BIGENDIAN)
2192#define UPKHI 1
2193#define UPKLO 0
2194#else
2195#define UPKHI 0
2196#define UPKLO 1
2197#endif
2198#define VUPKPX(suffix, hi)                                              \
2199    void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b)                \
2200    {                                                                   \
2201        int i;                                                          \
2202        ppc_avr_t result;                                               \
2203                                                                        \
2204        for (i = 0; i < ARRAY_SIZE(r->u32); i++) {                      \
2205            uint16_t e = b->u16[hi ? i : i+4];                          \
2206            uint8_t a = (e >> 15) ? 0xff : 0;                           \
2207            uint8_t r = (e >> 10) & 0x1f;                               \
2208            uint8_t g = (e >> 5) & 0x1f;                                \
2209            uint8_t b = e & 0x1f;                                       \
2210                                                                        \
2211            result.u32[i] = (a << 24) | (r << 16) | (g << 8) | b;       \
2212        }                                                               \
2213        *r = result;                                                    \
2214    }
2215VUPKPX(lpx, UPKLO)
2216VUPKPX(hpx, UPKHI)
2217#undef VUPKPX
2218
2219#define VUPK(suffix, unpacked, packee, hi)                              \
2220    void helper_vupk##suffix(ppc_avr_t *r, ppc_avr_t *b)                \
2221    {                                                                   \
2222        int i;                                                          \
2223        ppc_avr_t result;                                               \
2224                                                                        \
2225        if (hi) {                                                       \
2226            for (i = 0; i < ARRAY_SIZE(r->unpacked); i++) {             \
2227                result.unpacked[i] = b->packee[i];                      \
2228            }                                                           \
2229        } else {                                                        \
2230            for (i = ARRAY_SIZE(r->unpacked); i < ARRAY_SIZE(r->packee); \
2231                 i++) {                                                 \
2232                result.unpacked[i - ARRAY_SIZE(r->unpacked)] = b->packee[i]; \
2233            }                                                           \
2234        }                                                               \
2235        *r = result;                                                    \
2236    }
2237VUPK(hsb, s16, s8, UPKHI)
2238VUPK(hsh, s32, s16, UPKHI)
2239VUPK(hsw, s64, s32, UPKHI)
2240VUPK(lsb, s16, s8, UPKLO)
2241VUPK(lsh, s32, s16, UPKLO)
2242VUPK(lsw, s64, s32, UPKLO)
2243#undef VUPK
2244#undef UPKHI
2245#undef UPKLO
2246
2247#define VGENERIC_DO(name, element)                                      \
2248    void helper_v##name(ppc_avr_t *r, ppc_avr_t *b)                     \
2249    {                                                                   \
2250        int i;                                                          \
2251                                                                        \
2252        VECTOR_FOR_INORDER_I(i, element) {                              \
2253            r->element[i] = name(b->element[i]);                        \
2254        }                                                               \
2255    }
2256
2257#define clzb(v) ((v) ? clz32((uint32_t)(v) << 24) : 8)
2258#define clzh(v) ((v) ? clz32((uint32_t)(v) << 16) : 16)
2259#define clzw(v) clz32((v))
2260#define clzd(v) clz64((v))
2261
2262VGENERIC_DO(clzb, u8)
2263VGENERIC_DO(clzh, u16)
2264VGENERIC_DO(clzw, u32)
2265VGENERIC_DO(clzd, u64)
2266
2267#undef clzb
2268#undef clzh
2269#undef clzw
2270#undef clzd
2271
2272#define ctzb(v) ((v) ? ctz32(v) : 8)
2273#define ctzh(v) ((v) ? ctz32(v) : 16)
2274#define ctzw(v) ctz32((v))
2275#define ctzd(v) ctz64((v))
2276
2277VGENERIC_DO(ctzb, u8)
2278VGENERIC_DO(ctzh, u16)
2279VGENERIC_DO(ctzw, u32)
2280VGENERIC_DO(ctzd, u64)
2281
2282#undef ctzb
2283#undef ctzh
2284#undef ctzw
2285#undef ctzd
2286
2287#define popcntb(v) ctpop8(v)
2288#define popcnth(v) ctpop16(v)
2289#define popcntw(v) ctpop32(v)
2290#define popcntd(v) ctpop64(v)
2291
2292VGENERIC_DO(popcntb, u8)
2293VGENERIC_DO(popcnth, u16)
2294VGENERIC_DO(popcntw, u32)
2295VGENERIC_DO(popcntd, u64)
2296
2297#undef popcntb
2298#undef popcnth
2299#undef popcntw
2300#undef popcntd
2301
2302#undef VGENERIC_DO
2303
2304#if defined(HOST_WORDS_BIGENDIAN)
2305#define QW_ONE { .u64 = { 0, 1 } }
2306#else
2307#define QW_ONE { .u64 = { 1, 0 } }
2308#endif
2309
2310#ifndef CONFIG_INT128
2311
2312static inline void avr_qw_not(ppc_avr_t *t, ppc_avr_t a)
2313{
2314    t->u64[0] = ~a.u64[0];
2315    t->u64[1] = ~a.u64[1];
2316}
2317
2318static int avr_qw_cmpu(ppc_avr_t a, ppc_avr_t b)
2319{
2320    if (a.u64[HI_IDX] < b.u64[HI_IDX]) {
2321        return -1;
2322    } else if (a.u64[HI_IDX] > b.u64[HI_IDX]) {
2323        return 1;
2324    } else if (a.u64[LO_IDX] < b.u64[LO_IDX]) {
2325        return -1;
2326    } else if (a.u64[LO_IDX] > b.u64[LO_IDX]) {
2327        return 1;
2328    } else {
2329        return 0;
2330    }
2331}
2332
2333static void avr_qw_add(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2334{
2335    t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
2336    t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
2337                     (~a.u64[LO_IDX] < b.u64[LO_IDX]);
2338}
2339
2340static int avr_qw_addc(ppc_avr_t *t, ppc_avr_t a, ppc_avr_t b)
2341{
2342    ppc_avr_t not_a;
2343    t->u64[LO_IDX] = a.u64[LO_IDX] + b.u64[LO_IDX];
2344    t->u64[HI_IDX] = a.u64[HI_IDX] + b.u64[HI_IDX] +
2345                     (~a.u64[LO_IDX] < b.u64[LO_IDX]);
2346    avr_qw_not(&not_a, a);
2347    return avr_qw_cmpu(not_a, b) < 0;
2348}
2349
2350#endif
2351
2352void helper_vadduqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2353{
2354#ifdef CONFIG_INT128
2355    r->u128 = a->u128 + b->u128;
2356#else
2357    avr_qw_add(r, *a, *b);
2358#endif
2359}
2360
2361void helper_vaddeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2362{
2363#ifdef CONFIG_INT128
2364    r->u128 = a->u128 + b->u128 + (c->u128 & 1);
2365#else
2366
2367    if (c->u64[LO_IDX] & 1) {
2368        ppc_avr_t tmp;
2369
2370        tmp.u64[HI_IDX] = 0;
2371        tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2372        avr_qw_add(&tmp, *a, tmp);
2373        avr_qw_add(r, tmp, *b);
2374    } else {
2375        avr_qw_add(r, *a, *b);
2376    }
2377#endif
2378}
2379
2380void helper_vaddcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2381{
2382#ifdef CONFIG_INT128
2383    r->u128 = (~a->u128 < b->u128);
2384#else
2385    ppc_avr_t not_a;
2386
2387    avr_qw_not(&not_a, *a);
2388
2389    r->u64[HI_IDX] = 0;
2390    r->u64[LO_IDX] = (avr_qw_cmpu(not_a, *b) < 0);
2391#endif
2392}
2393
2394void helper_vaddecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2395{
2396#ifdef CONFIG_INT128
2397    int carry_out = (~a->u128 < b->u128);
2398    if (!carry_out && (c->u128 & 1)) {
2399        carry_out = ((a->u128 + b->u128 + 1) == 0) &&
2400                    ((a->u128 != 0) || (b->u128 != 0));
2401    }
2402    r->u128 = carry_out;
2403#else
2404
2405    int carry_in = c->u64[LO_IDX] & 1;
2406    int carry_out = 0;
2407    ppc_avr_t tmp;
2408
2409    carry_out = avr_qw_addc(&tmp, *a, *b);
2410
2411    if (!carry_out && carry_in) {
2412        ppc_avr_t one = QW_ONE;
2413        carry_out = avr_qw_addc(&tmp, tmp, one);
2414    }
2415    r->u64[HI_IDX] = 0;
2416    r->u64[LO_IDX] = carry_out;
2417#endif
2418}
2419
2420void helper_vsubuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2421{
2422#ifdef CONFIG_INT128
2423    r->u128 = a->u128 - b->u128;
2424#else
2425    ppc_avr_t tmp;
2426    ppc_avr_t one = QW_ONE;
2427
2428    avr_qw_not(&tmp, *b);
2429    avr_qw_add(&tmp, *a, tmp);
2430    avr_qw_add(r, tmp, one);
2431#endif
2432}
2433
2434void helper_vsubeuqm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2435{
2436#ifdef CONFIG_INT128
2437    r->u128 = a->u128 + ~b->u128 + (c->u128 & 1);
2438#else
2439    ppc_avr_t tmp, sum;
2440
2441    avr_qw_not(&tmp, *b);
2442    avr_qw_add(&sum, *a, tmp);
2443
2444    tmp.u64[HI_IDX] = 0;
2445    tmp.u64[LO_IDX] = c->u64[LO_IDX] & 1;
2446    avr_qw_add(r, sum, tmp);
2447#endif
2448}
2449
2450void helper_vsubcuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2451{
2452#ifdef CONFIG_INT128
2453    r->u128 = (~a->u128 < ~b->u128) ||
2454                 (a->u128 + ~b->u128 == (__uint128_t)-1);
2455#else
2456    int carry = (avr_qw_cmpu(*a, *b) > 0);
2457    if (!carry) {
2458        ppc_avr_t tmp;
2459        avr_qw_not(&tmp, *b);
2460        avr_qw_add(&tmp, *a, tmp);
2461        carry = ((tmp.s64[HI_IDX] == -1ull) && (tmp.s64[LO_IDX] == -1ull));
2462    }
2463    r->u64[HI_IDX] = 0;
2464    r->u64[LO_IDX] = carry;
2465#endif
2466}
2467
2468void helper_vsubecuq(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
2469{
2470#ifdef CONFIG_INT128
2471    r->u128 =
2472        (~a->u128 < ~b->u128) ||
2473        ((c->u128 & 1) && (a->u128 + ~b->u128 == (__uint128_t)-1));
2474#else
2475    int carry_in = c->u64[LO_IDX] & 1;
2476    int carry_out = (avr_qw_cmpu(*a, *b) > 0);
2477    if (!carry_out && carry_in) {
2478        ppc_avr_t tmp;
2479        avr_qw_not(&tmp, *b);
2480        avr_qw_add(&tmp, *a, tmp);
2481        carry_out = ((tmp.u64[HI_IDX] == -1ull) && (tmp.u64[LO_IDX] == -1ull));
2482    }
2483
2484    r->u64[HI_IDX] = 0;
2485    r->u64[LO_IDX] = carry_out;
2486#endif
2487}
2488
2489#define BCD_PLUS_PREF_1 0xC
2490#define BCD_PLUS_PREF_2 0xF
2491#define BCD_PLUS_ALT_1  0xA
2492#define BCD_NEG_PREF    0xD
2493#define BCD_NEG_ALT     0xB
2494#define BCD_PLUS_ALT_2  0xE
2495#define NATIONAL_PLUS   0x2B
2496#define NATIONAL_NEG    0x2D
2497
2498#if defined(HOST_WORDS_BIGENDIAN)
2499#define BCD_DIG_BYTE(n) (15 - (n/2))
2500#else
2501#define BCD_DIG_BYTE(n) (n/2)
2502#endif
2503
2504static int bcd_get_sgn(ppc_avr_t *bcd)
2505{
2506    switch (bcd->u8[BCD_DIG_BYTE(0)] & 0xF) {
2507    case BCD_PLUS_PREF_1:
2508    case BCD_PLUS_PREF_2:
2509    case BCD_PLUS_ALT_1:
2510    case BCD_PLUS_ALT_2:
2511    {
2512        return 1;
2513    }
2514
2515    case BCD_NEG_PREF:
2516    case BCD_NEG_ALT:
2517    {
2518        return -1;
2519    }
2520
2521    default:
2522    {
2523        return 0;
2524    }
2525    }
2526}
2527
2528static int bcd_preferred_sgn(int sgn, int ps)
2529{
2530    if (sgn >= 0) {
2531        return (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2;
2532    } else {
2533        return BCD_NEG_PREF;
2534    }
2535}
2536
2537static uint8_t bcd_get_digit(ppc_avr_t *bcd, int n, int *invalid)
2538{
2539    uint8_t result;
2540    if (n & 1) {
2541        result = bcd->u8[BCD_DIG_BYTE(n)] >> 4;
2542    } else {
2543       result = bcd->u8[BCD_DIG_BYTE(n)] & 0xF;
2544    }
2545
2546    if (unlikely(result > 9)) {
2547        *invalid = true;
2548    }
2549    return result;
2550}
2551
2552static void bcd_put_digit(ppc_avr_t *bcd, uint8_t digit, int n)
2553{
2554    if (n & 1) {
2555        bcd->u8[BCD_DIG_BYTE(n)] &= 0x0F;
2556        bcd->u8[BCD_DIG_BYTE(n)] |= (digit<<4);
2557    } else {
2558        bcd->u8[BCD_DIG_BYTE(n)] &= 0xF0;
2559        bcd->u8[BCD_DIG_BYTE(n)] |= digit;
2560    }
2561}
2562
2563static int bcd_cmp_zero(ppc_avr_t *bcd)
2564{
2565    if (bcd->u64[HI_IDX] == 0 && (bcd->u64[LO_IDX] >> 4) == 0) {
2566        return 1 << CRF_EQ;
2567    } else {
2568        return (bcd_get_sgn(bcd) == 1) ? 1 << CRF_GT : 1 << CRF_LT;
2569    }
2570}
2571
2572static uint16_t get_national_digit(ppc_avr_t *reg, int n)
2573{
2574#if defined(HOST_WORDS_BIGENDIAN)
2575    return reg->u16[7 - n];
2576#else
2577    return reg->u16[n];
2578#endif
2579}
2580
2581static void set_national_digit(ppc_avr_t *reg, uint8_t val, int n)
2582{
2583#if defined(HOST_WORDS_BIGENDIAN)
2584    reg->u16[7 - n] = val;
2585#else
2586    reg->u16[n] = val;
2587#endif
2588}
2589
2590static int bcd_cmp_mag(ppc_avr_t *a, ppc_avr_t *b)
2591{
2592    int i;
2593    int invalid = 0;
2594    for (i = 31; i > 0; i--) {
2595        uint8_t dig_a = bcd_get_digit(a, i, &invalid);
2596        uint8_t dig_b = bcd_get_digit(b, i, &invalid);
2597        if (unlikely(invalid)) {
2598            return 0; /* doesn't matter */
2599        } else if (dig_a > dig_b) {
2600            return 1;
2601        } else if (dig_a < dig_b) {
2602            return -1;
2603        }
2604    }
2605
2606    return 0;
2607}
2608
2609static int bcd_add_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2610                       int *overflow)
2611{
2612    int carry = 0;
2613    int i;
2614    int is_zero = 1;
2615    for (i = 1; i <= 31; i++) {
2616        uint8_t digit = bcd_get_digit(a, i, invalid) +
2617                        bcd_get_digit(b, i, invalid) + carry;
2618        is_zero &= (digit == 0);
2619        if (digit > 9) {
2620            carry = 1;
2621            digit -= 10;
2622        } else {
2623            carry = 0;
2624        }
2625
2626        bcd_put_digit(t, digit, i);
2627
2628        if (unlikely(*invalid)) {
2629            return -1;
2630        }
2631    }
2632
2633    *overflow = carry;
2634    return is_zero;
2635}
2636
2637static int bcd_sub_mag(ppc_avr_t *t, ppc_avr_t *a, ppc_avr_t *b, int *invalid,
2638                       int *overflow)
2639{
2640    int carry = 0;
2641    int i;
2642    int is_zero = 1;
2643    for (i = 1; i <= 31; i++) {
2644        uint8_t digit = bcd_get_digit(a, i, invalid) -
2645                        bcd_get_digit(b, i, invalid) + carry;
2646        is_zero &= (digit == 0);
2647        if (digit & 0x80) {
2648            carry = -1;
2649            digit += 10;
2650        } else {
2651            carry = 0;
2652        }
2653
2654        bcd_put_digit(t, digit, i);
2655
2656        if (unlikely(*invalid)) {
2657            return -1;
2658        }
2659    }
2660
2661    *overflow = carry;
2662    return is_zero;
2663}
2664
2665uint32_t helper_bcdadd(ppc_avr_t *r,  ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2666{
2667
2668    int sgna = bcd_get_sgn(a);
2669    int sgnb = bcd_get_sgn(b);
2670    int invalid = (sgna == 0) || (sgnb == 0);
2671    int overflow = 0;
2672    int zero = 0;
2673    uint32_t cr = 0;
2674    ppc_avr_t result = { .u64 = { 0, 0 } };
2675
2676    if (!invalid) {
2677        if (sgna == sgnb) {
2678            result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2679            zero = bcd_add_mag(&result, a, b, &invalid, &overflow);
2680            cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT;
2681        } else if (bcd_cmp_mag(a, b) > 0) {
2682            result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
2683            zero = bcd_sub_mag(&result, a, b, &invalid, &overflow);
2684            cr = (sgna > 0) ? 1 << CRF_GT : 1 << CRF_LT;
2685        } else {
2686            result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps);
2687            zero = bcd_sub_mag(&result, b, a, &invalid, &overflow);
2688            cr = (sgnb > 0) ? 1 << CRF_GT : 1 << CRF_LT;
2689        }
2690    }
2691
2692    if (unlikely(invalid)) {
2693        result.u64[HI_IDX] = result.u64[LO_IDX] = -1;
2694        cr = 1 << CRF_SO;
2695    } else if (overflow) {
2696        cr |= 1 << CRF_SO;
2697    } else if (zero) {
2698        cr = 1 << CRF_EQ;
2699    }
2700
2701    *r = result;
2702
2703    return cr;
2704}
2705
2706uint32_t helper_bcdsub(ppc_avr_t *r,  ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
2707{
2708    ppc_avr_t bcopy = *b;
2709    int sgnb = bcd_get_sgn(b);
2710    if (sgnb < 0) {
2711        bcd_put_digit(&bcopy, BCD_PLUS_PREF_1, 0);
2712    } else if (sgnb > 0) {
2713        bcd_put_digit(&bcopy, BCD_NEG_PREF, 0);
2714    }
2715    /* else invalid ... defer to bcdadd code for proper handling */
2716
2717    return helper_bcdadd(r, a, &bcopy, ps);
2718}
2719
2720uint32_t helper_bcdcfn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2721{
2722    int i;
2723    int cr = 0;
2724    uint16_t national = 0;
2725    uint16_t sgnb = get_national_digit(b, 0);
2726    ppc_avr_t ret = { .u64 = { 0, 0 } };
2727    int invalid = (sgnb != NATIONAL_PLUS && sgnb != NATIONAL_NEG);
2728
2729    for (i = 1; i < 8; i++) {
2730        national = get_national_digit(b, i);
2731        if (unlikely(national < 0x30 || national > 0x39)) {
2732            invalid = 1;
2733            break;
2734        }
2735
2736        bcd_put_digit(&ret, national & 0xf, i);
2737    }
2738
2739    if (sgnb == NATIONAL_PLUS) {
2740        bcd_put_digit(&ret, (ps == 0) ? BCD_PLUS_PREF_1 : BCD_PLUS_PREF_2, 0);
2741    } else {
2742        bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2743    }
2744
2745    cr = bcd_cmp_zero(&ret);
2746
2747    if (unlikely(invalid)) {
2748        cr = 1 << CRF_SO;
2749    }
2750
2751    *r = ret;
2752
2753    return cr;
2754}
2755
2756uint32_t helper_bcdctn(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2757{
2758    int i;
2759    int cr = 0;
2760    int sgnb = bcd_get_sgn(b);
2761    int invalid = (sgnb == 0);
2762    ppc_avr_t ret = { .u64 = { 0, 0 } };
2763
2764    int ox_flag = (b->u64[HI_IDX] != 0) || ((b->u64[LO_IDX] >> 32) != 0);
2765
2766    for (i = 1; i < 8; i++) {
2767        set_national_digit(&ret, 0x30 + bcd_get_digit(b, i, &invalid), i);
2768
2769        if (unlikely(invalid)) {
2770            break;
2771        }
2772    }
2773    set_national_digit(&ret, (sgnb == -1) ? NATIONAL_NEG : NATIONAL_PLUS, 0);
2774
2775    cr = bcd_cmp_zero(b);
2776
2777    if (ox_flag) {
2778        cr |= 1 << CRF_SO;
2779    }
2780
2781    if (unlikely(invalid)) {
2782        cr = 1 << CRF_SO;
2783    }
2784
2785    *r = ret;
2786
2787    return cr;
2788}
2789
2790uint32_t helper_bcdcfz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2791{
2792    int i;
2793    int cr = 0;
2794    int invalid = 0;
2795    int zone_digit = 0;
2796    int zone_lead = ps ? 0xF : 0x3;
2797    int digit = 0;
2798    ppc_avr_t ret = { .u64 = { 0, 0 } };
2799    int sgnb = b->u8[BCD_DIG_BYTE(0)] >> 4;
2800
2801    if (unlikely((sgnb < 0xA) && ps)) {
2802        invalid = 1;
2803    }
2804
2805    for (i = 0; i < 16; i++) {
2806        zone_digit = (i * 2) ? b->u8[BCD_DIG_BYTE(i * 2)] >> 4 : zone_lead;
2807        digit = b->u8[BCD_DIG_BYTE(i * 2)] & 0xF;
2808        if (unlikely(zone_digit != zone_lead || digit > 0x9)) {
2809            invalid = 1;
2810            break;
2811        }
2812
2813        bcd_put_digit(&ret, digit, i + 1);
2814    }
2815
2816    if ((ps && (sgnb == 0xB || sgnb == 0xD)) ||
2817            (!ps && (sgnb & 0x4))) {
2818        bcd_put_digit(&ret, BCD_NEG_PREF, 0);
2819    } else {
2820        bcd_put_digit(&ret, BCD_PLUS_PREF_1, 0);
2821    }
2822
2823    cr = bcd_cmp_zero(&ret);
2824
2825    if (unlikely(invalid)) {
2826        cr = 1 << CRF_SO;
2827    }
2828
2829    *r = ret;
2830
2831    return cr;
2832}
2833
2834uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
2835{
2836    int i;
2837    int cr = 0;
2838    uint8_t digit = 0;
2839    int sgnb = bcd_get_sgn(b);
2840    int zone_lead = (ps) ? 0xF0 : 0x30;
2841    int invalid = (sgnb == 0);
2842    ppc_avr_t ret = { .u64 = { 0, 0 } };
2843
2844    int ox_flag = ((b->u64[HI_IDX] >> 4) != 0);
2845
2846    for (i = 0; i < 16; i++) {
2847        digit = bcd_get_digit(b, i + 1, &invalid);
2848
2849        if (unlikely(invalid)) {
2850            break;
2851        }
2852
2853        ret.u8[BCD_DIG_BYTE(i * 2)] = zone_lead + digit;
2854    }
2855
2856    if (ps) {
2857        bcd_put_digit(&ret, (sgnb == 1) ? 0xC : 0xD, 1);
2858    } else {
2859        bcd_put_digit(&ret, (sgnb == 1) ? 0x3 : 0x7, 1);
2860    }
2861
2862    cr = bcd_cmp_zero(b);
2863
2864    if (ox_flag) {
2865        cr |= 1 << CRF_SO;
2866    }
2867
2868    if (unlikely(invalid)) {
2869        cr = 1 << CRF_SO;
2870    }
2871
2872    *r = ret;
2873
2874    return cr;
2875}
2876
2877void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
2878{
2879    int i;
2880    VECTOR_FOR_INORDER_I(i, u8) {
2881        r->u8[i] = AES_sbox[a->u8[i]];
2882    }
2883}
2884
2885void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2886{
2887    ppc_avr_t result;
2888    int i;
2889
2890    VECTOR_FOR_INORDER_I(i, u32) {
2891        result.AVRW(i) = b->AVRW(i) ^
2892            (AES_Te0[a->AVRB(AES_shifts[4*i + 0])] ^
2893             AES_Te1[a->AVRB(AES_shifts[4*i + 1])] ^
2894             AES_Te2[a->AVRB(AES_shifts[4*i + 2])] ^
2895             AES_Te3[a->AVRB(AES_shifts[4*i + 3])]);
2896    }
2897    *r = result;
2898}
2899
2900void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2901{
2902    ppc_avr_t result;
2903    int i;
2904
2905    VECTOR_FOR_INORDER_I(i, u8) {
2906        result.AVRB(i) = b->AVRB(i) ^ (AES_sbox[a->AVRB(AES_shifts[i])]);
2907    }
2908    *r = result;
2909}
2910
2911void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2912{
2913    /* This differs from what is written in ISA V2.07.  The RTL is */
2914    /* incorrect and will be fixed in V2.07B.                      */
2915    int i;
2916    ppc_avr_t tmp;
2917
2918    VECTOR_FOR_INORDER_I(i, u8) {
2919        tmp.AVRB(i) = b->AVRB(i) ^ AES_isbox[a->AVRB(AES_ishifts[i])];
2920    }
2921
2922    VECTOR_FOR_INORDER_I(i, u32) {
2923        r->AVRW(i) =
2924            AES_imc[tmp.AVRB(4*i + 0)][0] ^
2925            AES_imc[tmp.AVRB(4*i + 1)][1] ^
2926            AES_imc[tmp.AVRB(4*i + 2)][2] ^
2927            AES_imc[tmp.AVRB(4*i + 3)][3];
2928    }
2929}
2930
2931void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
2932{
2933    ppc_avr_t result;
2934    int i;
2935
2936    VECTOR_FOR_INORDER_I(i, u8) {
2937        result.AVRB(i) = b->AVRB(i) ^ (AES_isbox[a->AVRB(AES_ishifts[i])]);
2938    }
2939    *r = result;
2940}
2941
2942#define ROTRu32(v, n) (((v) >> (n)) | ((v) << (32-n)))
2943#if defined(HOST_WORDS_BIGENDIAN)
2944#define EL_IDX(i) (i)
2945#else
2946#define EL_IDX(i) (3 - (i))
2947#endif
2948
2949void helper_vshasigmaw(ppc_avr_t *r,  ppc_avr_t *a, uint32_t st_six)
2950{
2951    int st = (st_six & 0x10) != 0;
2952    int six = st_six & 0xF;
2953    int i;
2954
2955    VECTOR_FOR_INORDER_I(i, u32) {
2956        if (st == 0) {
2957            if ((six & (0x8 >> i)) == 0) {
2958                r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 7) ^
2959                                    ROTRu32(a->u32[EL_IDX(i)], 18) ^
2960                                    (a->u32[EL_IDX(i)] >> 3);
2961            } else { /* six.bit[i] == 1 */
2962                r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 17) ^
2963                                    ROTRu32(a->u32[EL_IDX(i)], 19) ^
2964                                    (a->u32[EL_IDX(i)] >> 10);
2965            }
2966        } else { /* st == 1 */
2967            if ((six & (0x8 >> i)) == 0) {
2968                r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 2) ^
2969                                    ROTRu32(a->u32[EL_IDX(i)], 13) ^
2970                                    ROTRu32(a->u32[EL_IDX(i)], 22);
2971            } else { /* six.bit[i] == 1 */
2972                r->u32[EL_IDX(i)] = ROTRu32(a->u32[EL_IDX(i)], 6) ^
2973                                    ROTRu32(a->u32[EL_IDX(i)], 11) ^
2974                                    ROTRu32(a->u32[EL_IDX(i)], 25);
2975            }
2976        }
2977    }
2978}
2979
2980#undef ROTRu32
2981#undef EL_IDX
2982
2983#define ROTRu64(v, n) (((v) >> (n)) | ((v) << (64-n)))
2984#if defined(HOST_WORDS_BIGENDIAN)
2985#define EL_IDX(i) (i)
2986#else
2987#define EL_IDX(i) (1 - (i))
2988#endif
2989
2990void helper_vshasigmad(ppc_avr_t *r,  ppc_avr_t *a, uint32_t st_six)
2991{
2992    int st = (st_six & 0x10) != 0;
2993    int six = st_six & 0xF;
2994    int i;
2995
2996    VECTOR_FOR_INORDER_I(i, u64) {
2997        if (st == 0) {
2998            if ((six & (0x8 >> (2*i))) == 0) {
2999                r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 1) ^
3000                                    ROTRu64(a->u64[EL_IDX(i)], 8) ^
3001                                    (a->u64[EL_IDX(i)] >> 7);
3002            } else { /* six.bit[2*i] == 1 */
3003                r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 19) ^
3004                                    ROTRu64(a->u64[EL_IDX(i)], 61) ^
3005                                    (a->u64[EL_IDX(i)] >> 6);
3006            }
3007        } else { /* st == 1 */
3008            if ((six & (0x8 >> (2*i))) == 0) {
3009                r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 28) ^
3010                                    ROTRu64(a->u64[EL_IDX(i)], 34) ^
3011                                    ROTRu64(a->u64[EL_IDX(i)], 39);
3012            } else { /* six.bit[2*i] == 1 */
3013                r->u64[EL_IDX(i)] = ROTRu64(a->u64[EL_IDX(i)], 14) ^
3014                                    ROTRu64(a->u64[EL_IDX(i)], 18) ^
3015                                    ROTRu64(a->u64[EL_IDX(i)], 41);
3016            }
3017        }
3018    }
3019}
3020
3021#undef ROTRu64
3022#undef EL_IDX
3023
3024void helper_vpermxor(ppc_avr_t *r,  ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
3025{
3026    ppc_avr_t result;
3027    int i;
3028
3029    VECTOR_FOR_INORDER_I(i, u8) {
3030        int indexA = c->u8[i] >> 4;
3031        int indexB = c->u8[i] & 0xF;
3032#if defined(HOST_WORDS_BIGENDIAN)
3033        result.u8[i] = a->u8[indexA] ^ b->u8[indexB];
3034#else
3035        result.u8[i] = a->u8[15-indexA] ^ b->u8[15-indexB];
3036#endif
3037    }
3038    *r = result;
3039}
3040
3041#undef VECTOR_FOR_INORDER_I
3042#undef HI_IDX
3043#undef LO_IDX
3044
3045/*****************************************************************************/
3046/* SPE extension helpers */
3047/* Use a table to make this quicker */
3048static const uint8_t hbrev[16] = {
3049    0x0, 0x8, 0x4, 0xC, 0x2, 0xA, 0x6, 0xE,
3050    0x1, 0x9, 0x5, 0xD, 0x3, 0xB, 0x7, 0xF,
3051};
3052
3053static inline uint8_t byte_reverse(uint8_t val)
3054{
3055    return hbrev[val >> 4] | (hbrev[val & 0xF] << 4);
3056}
3057
3058static inline uint32_t word_reverse(uint32_t val)
3059{
3060    return byte_reverse(val >> 24) | (byte_reverse(val >> 16) << 8) |
3061        (byte_reverse(val >> 8) << 16) | (byte_reverse(val) << 24);
3062}
3063
3064#define MASKBITS 16 /* Random value - to be fixed (implementation dependent) */
3065target_ulong helper_brinc(target_ulong arg1, target_ulong arg2)
3066{
3067    uint32_t a, b, d, mask;
3068
3069    mask = UINT32_MAX >> (32 - MASKBITS);
3070    a = arg1 & mask;
3071    b = arg2 & mask;
3072    d = word_reverse(1 + word_reverse(a | ~b));
3073    return (arg1 & ~mask) | (d & b);
3074}
3075
3076uint32_t helper_cntlsw32(uint32_t val)
3077{
3078    if (val & 0x80000000) {
3079        return clz32(~val);
3080    } else {
3081        return clz32(val);
3082    }
3083}
3084
3085uint32_t helper_cntlzw32(uint32_t val)
3086{
3087    return clz32(val);
3088}
3089
3090/* 440 specific */
3091target_ulong helper_dlmzb(CPUPPCState *env, target_ulong high,
3092                          target_ulong low, uint32_t update_Rc)
3093{
3094    target_ulong mask;
3095    int i;
3096
3097    i = 1;
3098    for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3099        if ((high & mask) == 0) {
3100            if (update_Rc) {
3101                env->crf[0] = 0x4;
3102            }
3103            goto done;
3104        }
3105        i++;
3106    }
3107    for (mask = 0xFF000000; mask != 0; mask = mask >> 8) {
3108        if ((low & mask) == 0) {
3109            if (update_Rc) {
3110                env->crf[0] = 0x8;
3111            }
3112            goto done;
3113        }
3114        i++;
3115    }
3116    i = 8;
3117    if (update_Rc) {
3118        env->crf[0] = 0x2;
3119    }
3120 done:
3121    env->xer = (env->xer & ~0x7F) | i;
3122    if (update_Rc) {
3123        env->crf[0] |= xer_so;
3124    }
3125    return i;
3126}
3127