qemu/target/arm/helper-a64.c
<<
>>
Prefs
   1/*
   2 *  AArch64 specific helpers
   3 *
   4 *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include "cpu.h"
  22#include "exec/gdbstub.h"
  23#include "exec/helper-proto.h"
  24#include "qemu/host-utils.h"
  25#include "qemu/log.h"
  26#include "sysemu/sysemu.h"
  27#include "qemu/bitops.h"
  28#include "internals.h"
  29#include "qemu/crc32c.h"
  30#include "exec/exec-all.h"
  31#include "exec/cpu_ldst.h"
  32#include "qemu/int128.h"
  33#include "tcg.h"
  34#include "fpu/softfloat.h"
  35#include <zlib.h> /* For crc32 */
  36
  37/* C2.4.7 Multiply and divide */
  38/* special cases for 0 and LLONG_MIN are mandated by the standard */
  39uint64_t HELPER(udiv64)(uint64_t num, uint64_t den)
  40{
  41    if (den == 0) {
  42        return 0;
  43    }
  44    return num / den;
  45}
  46
  47int64_t HELPER(sdiv64)(int64_t num, int64_t den)
  48{
  49    if (den == 0) {
  50        return 0;
  51    }
  52    if (num == LLONG_MIN && den == -1) {
  53        return LLONG_MIN;
  54    }
  55    return num / den;
  56}
  57
  58uint64_t HELPER(rbit64)(uint64_t x)
  59{
  60    return revbit64(x);
  61}
  62
  63/* Convert a softfloat float_relation_ (as returned by
  64 * the float*_compare functions) to the correct ARM
  65 * NZCV flag state.
  66 */
  67static inline uint32_t float_rel_to_flags(int res)
  68{
  69    uint64_t flags;
  70    switch (res) {
  71    case float_relation_equal:
  72        flags = PSTATE_Z | PSTATE_C;
  73        break;
  74    case float_relation_less:
  75        flags = PSTATE_N;
  76        break;
  77    case float_relation_greater:
  78        flags = PSTATE_C;
  79        break;
  80    case float_relation_unordered:
  81    default:
  82        flags = PSTATE_C | PSTATE_V;
  83        break;
  84    }
  85    return flags;
  86}
  87
  88uint64_t HELPER(vfp_cmps_a64)(float32 x, float32 y, void *fp_status)
  89{
  90    return float_rel_to_flags(float32_compare_quiet(x, y, fp_status));
  91}
  92
  93uint64_t HELPER(vfp_cmpes_a64)(float32 x, float32 y, void *fp_status)
  94{
  95    return float_rel_to_flags(float32_compare(x, y, fp_status));
  96}
  97
  98uint64_t HELPER(vfp_cmpd_a64)(float64 x, float64 y, void *fp_status)
  99{
 100    return float_rel_to_flags(float64_compare_quiet(x, y, fp_status));
 101}
 102
 103uint64_t HELPER(vfp_cmped_a64)(float64 x, float64 y, void *fp_status)
 104{
 105    return float_rel_to_flags(float64_compare(x, y, fp_status));
 106}
 107
 108float32 HELPER(vfp_mulxs)(float32 a, float32 b, void *fpstp)
 109{
 110    float_status *fpst = fpstp;
 111
 112    a = float32_squash_input_denormal(a, fpst);
 113    b = float32_squash_input_denormal(b, fpst);
 114
 115    if ((float32_is_zero(a) && float32_is_infinity(b)) ||
 116        (float32_is_infinity(a) && float32_is_zero(b))) {
 117        /* 2.0 with the sign bit set to sign(A) XOR sign(B) */
 118        return make_float32((1U << 30) |
 119                            ((float32_val(a) ^ float32_val(b)) & (1U << 31)));
 120    }
 121    return float32_mul(a, b, fpst);
 122}
 123
 124float64 HELPER(vfp_mulxd)(float64 a, float64 b, void *fpstp)
 125{
 126    float_status *fpst = fpstp;
 127
 128    a = float64_squash_input_denormal(a, fpst);
 129    b = float64_squash_input_denormal(b, fpst);
 130
 131    if ((float64_is_zero(a) && float64_is_infinity(b)) ||
 132        (float64_is_infinity(a) && float64_is_zero(b))) {
 133        /* 2.0 with the sign bit set to sign(A) XOR sign(B) */
 134        return make_float64((1ULL << 62) |
 135                            ((float64_val(a) ^ float64_val(b)) & (1ULL << 63)));
 136    }
 137    return float64_mul(a, b, fpst);
 138}
 139
 140uint64_t HELPER(simd_tbl)(CPUARMState *env, uint64_t result, uint64_t indices,
 141                          uint32_t rn, uint32_t numregs)
 142{
 143    /* Helper function for SIMD TBL and TBX. We have to do the table
 144     * lookup part for the 64 bits worth of indices we're passed in.
 145     * result is the initial results vector (either zeroes for TBL
 146     * or some guest values for TBX), rn the register number where
 147     * the table starts, and numregs the number of registers in the table.
 148     * We return the results of the lookups.
 149     */
 150    int shift;
 151
 152    for (shift = 0; shift < 64; shift += 8) {
 153        int index = extract64(indices, shift, 8);
 154        if (index < 16 * numregs) {
 155            /* Convert index (a byte offset into the virtual table
 156             * which is a series of 128-bit vectors concatenated)
 157             * into the correct register element plus a bit offset
 158             * into that element, bearing in mind that the table
 159             * can wrap around from V31 to V0.
 160             */
 161            int elt = (rn * 2 + (index >> 3)) % 64;
 162            int bitidx = (index & 7) * 8;
 163            uint64_t *q = aa64_vfp_qreg(env, elt >> 1);
 164            uint64_t val = extract64(q[elt & 1], bitidx, 8);
 165
 166            result = deposit64(result, shift, 8, val);
 167        }
 168    }
 169    return result;
 170}
 171
 172/* 64bit/double versions of the neon float compare functions */
 173uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, void *fpstp)
 174{
 175    float_status *fpst = fpstp;
 176    return -float64_eq_quiet(a, b, fpst);
 177}
 178
 179uint64_t HELPER(neon_cge_f64)(float64 a, float64 b, void *fpstp)
 180{
 181    float_status *fpst = fpstp;
 182    return -float64_le(b, a, fpst);
 183}
 184
 185uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, void *fpstp)
 186{
 187    float_status *fpst = fpstp;
 188    return -float64_lt(b, a, fpst);
 189}
 190
 191/* Reciprocal step and sqrt step. Note that unlike the A32/T32
 192 * versions, these do a fully fused multiply-add or
 193 * multiply-add-and-halve.
 194 */
 195#define float16_two make_float16(0x4000)
 196#define float16_three make_float16(0x4200)
 197#define float16_one_point_five make_float16(0x3e00)
 198
 199#define float32_two make_float32(0x40000000)
 200#define float32_three make_float32(0x40400000)
 201#define float32_one_point_five make_float32(0x3fc00000)
 202
 203#define float64_two make_float64(0x4000000000000000ULL)
 204#define float64_three make_float64(0x4008000000000000ULL)
 205#define float64_one_point_five make_float64(0x3FF8000000000000ULL)
 206
 207float16 HELPER(recpsf_f16)(float16 a, float16 b, void *fpstp)
 208{
 209    float_status *fpst = fpstp;
 210
 211    a = float16_squash_input_denormal(a, fpst);
 212    b = float16_squash_input_denormal(b, fpst);
 213
 214    a = float16_chs(a);
 215    if ((float16_is_infinity(a) && float16_is_zero(b)) ||
 216        (float16_is_infinity(b) && float16_is_zero(a))) {
 217        return float16_two;
 218    }
 219    return float16_muladd(a, b, float16_two, 0, fpst);
 220}
 221
 222float32 HELPER(recpsf_f32)(float32 a, float32 b, void *fpstp)
 223{
 224    float_status *fpst = fpstp;
 225
 226    a = float32_squash_input_denormal(a, fpst);
 227    b = float32_squash_input_denormal(b, fpst);
 228
 229    a = float32_chs(a);
 230    if ((float32_is_infinity(a) && float32_is_zero(b)) ||
 231        (float32_is_infinity(b) && float32_is_zero(a))) {
 232        return float32_two;
 233    }
 234    return float32_muladd(a, b, float32_two, 0, fpst);
 235}
 236
 237float64 HELPER(recpsf_f64)(float64 a, float64 b, void *fpstp)
 238{
 239    float_status *fpst = fpstp;
 240
 241    a = float64_squash_input_denormal(a, fpst);
 242    b = float64_squash_input_denormal(b, fpst);
 243
 244    a = float64_chs(a);
 245    if ((float64_is_infinity(a) && float64_is_zero(b)) ||
 246        (float64_is_infinity(b) && float64_is_zero(a))) {
 247        return float64_two;
 248    }
 249    return float64_muladd(a, b, float64_two, 0, fpst);
 250}
 251
 252float16 HELPER(rsqrtsf_f16)(float16 a, float16 b, void *fpstp)
 253{
 254    float_status *fpst = fpstp;
 255
 256    a = float16_squash_input_denormal(a, fpst);
 257    b = float16_squash_input_denormal(b, fpst);
 258
 259    a = float16_chs(a);
 260    if ((float16_is_infinity(a) && float16_is_zero(b)) ||
 261        (float16_is_infinity(b) && float16_is_zero(a))) {
 262        return float16_one_point_five;
 263    }
 264    return float16_muladd(a, b, float16_three, float_muladd_halve_result, fpst);
 265}
 266
 267float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, void *fpstp)
 268{
 269    float_status *fpst = fpstp;
 270
 271    a = float32_squash_input_denormal(a, fpst);
 272    b = float32_squash_input_denormal(b, fpst);
 273
 274    a = float32_chs(a);
 275    if ((float32_is_infinity(a) && float32_is_zero(b)) ||
 276        (float32_is_infinity(b) && float32_is_zero(a))) {
 277        return float32_one_point_five;
 278    }
 279    return float32_muladd(a, b, float32_three, float_muladd_halve_result, fpst);
 280}
 281
 282float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, void *fpstp)
 283{
 284    float_status *fpst = fpstp;
 285
 286    a = float64_squash_input_denormal(a, fpst);
 287    b = float64_squash_input_denormal(b, fpst);
 288
 289    a = float64_chs(a);
 290    if ((float64_is_infinity(a) && float64_is_zero(b)) ||
 291        (float64_is_infinity(b) && float64_is_zero(a))) {
 292        return float64_one_point_five;
 293    }
 294    return float64_muladd(a, b, float64_three, float_muladd_halve_result, fpst);
 295}
 296
 297/* Pairwise long add: add pairs of adjacent elements into
 298 * double-width elements in the result (eg _s8 is an 8x8->16 op)
 299 */
 300uint64_t HELPER(neon_addlp_s8)(uint64_t a)
 301{
 302    uint64_t nsignmask = 0x0080008000800080ULL;
 303    uint64_t wsignmask = 0x8000800080008000ULL;
 304    uint64_t elementmask = 0x00ff00ff00ff00ffULL;
 305    uint64_t tmp1, tmp2;
 306    uint64_t res, signres;
 307
 308    /* Extract odd elements, sign extend each to a 16 bit field */
 309    tmp1 = a & elementmask;
 310    tmp1 ^= nsignmask;
 311    tmp1 |= wsignmask;
 312    tmp1 = (tmp1 - nsignmask) ^ wsignmask;
 313    /* Ditto for the even elements */
 314    tmp2 = (a >> 8) & elementmask;
 315    tmp2 ^= nsignmask;
 316    tmp2 |= wsignmask;
 317    tmp2 = (tmp2 - nsignmask) ^ wsignmask;
 318
 319    /* calculate the result by summing bits 0..14, 16..22, etc,
 320     * and then adjusting the sign bits 15, 23, etc manually.
 321     * This ensures the addition can't overflow the 16 bit field.
 322     */
 323    signres = (tmp1 ^ tmp2) & wsignmask;
 324    res = (tmp1 & ~wsignmask) + (tmp2 & ~wsignmask);
 325    res ^= signres;
 326
 327    return res;
 328}
 329
 330uint64_t HELPER(neon_addlp_u8)(uint64_t a)
 331{
 332    uint64_t tmp;
 333
 334    tmp = a & 0x00ff00ff00ff00ffULL;
 335    tmp += (a >> 8) & 0x00ff00ff00ff00ffULL;
 336    return tmp;
 337}
 338
 339uint64_t HELPER(neon_addlp_s16)(uint64_t a)
 340{
 341    int32_t reslo, reshi;
 342
 343    reslo = (int32_t)(int16_t)a + (int32_t)(int16_t)(a >> 16);
 344    reshi = (int32_t)(int16_t)(a >> 32) + (int32_t)(int16_t)(a >> 48);
 345
 346    return (uint32_t)reslo | (((uint64_t)reshi) << 32);
 347}
 348
 349uint64_t HELPER(neon_addlp_u16)(uint64_t a)
 350{
 351    uint64_t tmp;
 352
 353    tmp = a & 0x0000ffff0000ffffULL;
 354    tmp += (a >> 16) & 0x0000ffff0000ffffULL;
 355    return tmp;
 356}
 357
 358/* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */
 359float16 HELPER(frecpx_f16)(float16 a, void *fpstp)
 360{
 361    float_status *fpst = fpstp;
 362    uint16_t val16, sbit;
 363    int16_t exp;
 364
 365    if (float16_is_any_nan(a)) {
 366        float16 nan = a;
 367        if (float16_is_signaling_nan(a, fpst)) {
 368            float_raise(float_flag_invalid, fpst);
 369            nan = float16_maybe_silence_nan(a, fpst);
 370        }
 371        if (fpst->default_nan_mode) {
 372            nan = float16_default_nan(fpst);
 373        }
 374        return nan;
 375    }
 376
 377    val16 = float16_val(a);
 378    sbit = 0x8000 & val16;
 379    exp = extract32(val16, 10, 5);
 380
 381    if (exp == 0) {
 382        return make_float16(deposit32(sbit, 10, 5, 0x1e));
 383    } else {
 384        return make_float16(deposit32(sbit, 10, 5, ~exp));
 385    }
 386}
 387
 388float32 HELPER(frecpx_f32)(float32 a, void *fpstp)
 389{
 390    float_status *fpst = fpstp;
 391    uint32_t val32, sbit;
 392    int32_t exp;
 393
 394    if (float32_is_any_nan(a)) {
 395        float32 nan = a;
 396        if (float32_is_signaling_nan(a, fpst)) {
 397            float_raise(float_flag_invalid, fpst);
 398            nan = float32_maybe_silence_nan(a, fpst);
 399        }
 400        if (fpst->default_nan_mode) {
 401            nan = float32_default_nan(fpst);
 402        }
 403        return nan;
 404    }
 405
 406    val32 = float32_val(a);
 407    sbit = 0x80000000ULL & val32;
 408    exp = extract32(val32, 23, 8);
 409
 410    if (exp == 0) {
 411        return make_float32(sbit | (0xfe << 23));
 412    } else {
 413        return make_float32(sbit | (~exp & 0xff) << 23);
 414    }
 415}
 416
 417float64 HELPER(frecpx_f64)(float64 a, void *fpstp)
 418{
 419    float_status *fpst = fpstp;
 420    uint64_t val64, sbit;
 421    int64_t exp;
 422
 423    if (float64_is_any_nan(a)) {
 424        float64 nan = a;
 425        if (float64_is_signaling_nan(a, fpst)) {
 426            float_raise(float_flag_invalid, fpst);
 427            nan = float64_maybe_silence_nan(a, fpst);
 428        }
 429        if (fpst->default_nan_mode) {
 430            nan = float64_default_nan(fpst);
 431        }
 432        return nan;
 433    }
 434
 435    val64 = float64_val(a);
 436    sbit = 0x8000000000000000ULL & val64;
 437    exp = extract64(float64_val(a), 52, 11);
 438
 439    if (exp == 0) {
 440        return make_float64(sbit | (0x7feULL << 52));
 441    } else {
 442        return make_float64(sbit | (~exp & 0x7ffULL) << 52);
 443    }
 444}
 445
 446float32 HELPER(fcvtx_f64_to_f32)(float64 a, CPUARMState *env)
 447{
 448    /* Von Neumann rounding is implemented by using round-to-zero
 449     * and then setting the LSB of the result if Inexact was raised.
 450     */
 451    float32 r;
 452    float_status *fpst = &env->vfp.fp_status;
 453    float_status tstat = *fpst;
 454    int exflags;
 455
 456    set_float_rounding_mode(float_round_to_zero, &tstat);
 457    set_float_exception_flags(0, &tstat);
 458    r = float64_to_float32(a, &tstat);
 459    r = float32_maybe_silence_nan(r, &tstat);
 460    exflags = get_float_exception_flags(&tstat);
 461    if (exflags & float_flag_inexact) {
 462        r = make_float32(float32_val(r) | 1);
 463    }
 464    exflags |= get_float_exception_flags(fpst);
 465    set_float_exception_flags(exflags, fpst);
 466    return r;
 467}
 468
 469/* 64-bit versions of the CRC helpers. Note that although the operation
 470 * (and the prototypes of crc32c() and crc32() mean that only the bottom
 471 * 32 bits of the accumulator and result are used, we pass and return
 472 * uint64_t for convenience of the generated code. Unlike the 32-bit
 473 * instruction set versions, val may genuinely have 64 bits of data in it.
 474 * The upper bytes of val (above the number specified by 'bytes') must have
 475 * been zeroed out by the caller.
 476 */
 477uint64_t HELPER(crc32_64)(uint64_t acc, uint64_t val, uint32_t bytes)
 478{
 479    uint8_t buf[8];
 480
 481    stq_le_p(buf, val);
 482
 483    /* zlib crc32 converts the accumulator and output to one's complement.  */
 484    return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff;
 485}
 486
 487uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes)
 488{
 489    uint8_t buf[8];
 490
 491    stq_le_p(buf, val);
 492
 493    /* Linux crc32c converts the output to one's complement.  */
 494    return crc32c(acc, buf, bytes) ^ 0xffffffff;
 495}
 496
 497/* Returns 0 on success; 1 otherwise.  */
 498static uint64_t do_paired_cmpxchg64_le(CPUARMState *env, uint64_t addr,
 499                                       uint64_t new_lo, uint64_t new_hi,
 500                                       bool parallel, uintptr_t ra)
 501{
 502    Int128 oldv, cmpv, newv;
 503    bool success;
 504
 505    cmpv = int128_make128(env->exclusive_val, env->exclusive_high);
 506    newv = int128_make128(new_lo, new_hi);
 507
 508    if (parallel) {
 509#ifndef CONFIG_ATOMIC128
 510        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
 511#else
 512        int mem_idx = cpu_mmu_index(env, false);
 513        TCGMemOpIdx oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
 514        oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra);
 515        success = int128_eq(oldv, cmpv);
 516#endif
 517    } else {
 518        uint64_t o0, o1;
 519
 520#ifdef CONFIG_USER_ONLY
 521        /* ??? Enforce alignment.  */
 522        uint64_t *haddr = g2h(addr);
 523
 524        helper_retaddr = ra;
 525        o0 = ldq_le_p(haddr + 0);
 526        o1 = ldq_le_p(haddr + 1);
 527        oldv = int128_make128(o0, o1);
 528
 529        success = int128_eq(oldv, cmpv);
 530        if (success) {
 531            stq_le_p(haddr + 0, int128_getlo(newv));
 532            stq_le_p(haddr + 1, int128_gethi(newv));
 533        }
 534        helper_retaddr = 0;
 535#else
 536        int mem_idx = cpu_mmu_index(env, false);
 537        TCGMemOpIdx oi0 = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
 538        TCGMemOpIdx oi1 = make_memop_idx(MO_LEQ, mem_idx);
 539
 540        o0 = helper_le_ldq_mmu(env, addr + 0, oi0, ra);
 541        o1 = helper_le_ldq_mmu(env, addr + 8, oi1, ra);
 542        oldv = int128_make128(o0, o1);
 543
 544        success = int128_eq(oldv, cmpv);
 545        if (success) {
 546            helper_le_stq_mmu(env, addr + 0, int128_getlo(newv), oi1, ra);
 547            helper_le_stq_mmu(env, addr + 8, int128_gethi(newv), oi1, ra);
 548        }
 549#endif
 550    }
 551
 552    return !success;
 553}
 554
 555uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr,
 556                                              uint64_t new_lo, uint64_t new_hi)
 557{
 558    return do_paired_cmpxchg64_le(env, addr, new_lo, new_hi, false, GETPC());
 559}
 560
 561uint64_t HELPER(paired_cmpxchg64_le_parallel)(CPUARMState *env, uint64_t addr,
 562                                              uint64_t new_lo, uint64_t new_hi)
 563{
 564    return do_paired_cmpxchg64_le(env, addr, new_lo, new_hi, true, GETPC());
 565}
 566
 567static uint64_t do_paired_cmpxchg64_be(CPUARMState *env, uint64_t addr,
 568                                       uint64_t new_lo, uint64_t new_hi,
 569                                       bool parallel, uintptr_t ra)
 570{
 571    Int128 oldv, cmpv, newv;
 572    bool success;
 573
 574    /* high and low need to be switched here because this is not actually a
 575     * 128bit store but two doublewords stored consecutively
 576     */
 577    cmpv = int128_make128(env->exclusive_high, env->exclusive_val);
 578    newv = int128_make128(new_hi, new_lo);
 579
 580    if (parallel) {
 581#ifndef CONFIG_ATOMIC128
 582        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
 583#else
 584        int mem_idx = cpu_mmu_index(env, false);
 585        TCGMemOpIdx oi = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx);
 586        oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
 587        success = int128_eq(oldv, cmpv);
 588#endif
 589    } else {
 590        uint64_t o0, o1;
 591
 592#ifdef CONFIG_USER_ONLY
 593        /* ??? Enforce alignment.  */
 594        uint64_t *haddr = g2h(addr);
 595
 596        helper_retaddr = ra;
 597        o1 = ldq_be_p(haddr + 0);
 598        o0 = ldq_be_p(haddr + 1);
 599        oldv = int128_make128(o0, o1);
 600
 601        success = int128_eq(oldv, cmpv);
 602        if (success) {
 603            stq_be_p(haddr + 0, int128_gethi(newv));
 604            stq_be_p(haddr + 1, int128_getlo(newv));
 605        }
 606        helper_retaddr = 0;
 607#else
 608        int mem_idx = cpu_mmu_index(env, false);
 609        TCGMemOpIdx oi0 = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx);
 610        TCGMemOpIdx oi1 = make_memop_idx(MO_BEQ, mem_idx);
 611
 612        o1 = helper_be_ldq_mmu(env, addr + 0, oi0, ra);
 613        o0 = helper_be_ldq_mmu(env, addr + 8, oi1, ra);
 614        oldv = int128_make128(o0, o1);
 615
 616        success = int128_eq(oldv, cmpv);
 617        if (success) {
 618            helper_be_stq_mmu(env, addr + 0, int128_gethi(newv), oi1, ra);
 619            helper_be_stq_mmu(env, addr + 8, int128_getlo(newv), oi1, ra);
 620        }
 621#endif
 622    }
 623
 624    return !success;
 625}
 626
 627uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr,
 628                                     uint64_t new_lo, uint64_t new_hi)
 629{
 630    return do_paired_cmpxchg64_be(env, addr, new_lo, new_hi, false, GETPC());
 631}
 632
 633uint64_t HELPER(paired_cmpxchg64_be_parallel)(CPUARMState *env, uint64_t addr,
 634                                     uint64_t new_lo, uint64_t new_hi)
 635{
 636    return do_paired_cmpxchg64_be(env, addr, new_lo, new_hi, true, GETPC());
 637}
 638
 639/*
 640 * AdvSIMD half-precision
 641 */
 642
 643#define ADVSIMD_HELPER(name, suffix) HELPER(glue(glue(advsimd_, name), suffix))
 644
 645#define ADVSIMD_HALFOP(name) \
 646float16 ADVSIMD_HELPER(name, h)(float16 a, float16 b, void *fpstp) \
 647{ \
 648    float_status *fpst = fpstp; \
 649    return float16_ ## name(a, b, fpst);    \
 650}
 651
 652ADVSIMD_HALFOP(add)
 653ADVSIMD_HALFOP(sub)
 654ADVSIMD_HALFOP(mul)
 655ADVSIMD_HALFOP(div)
 656ADVSIMD_HALFOP(min)
 657ADVSIMD_HALFOP(max)
 658ADVSIMD_HALFOP(minnum)
 659ADVSIMD_HALFOP(maxnum)
 660
 661#define ADVSIMD_TWOHALFOP(name)                                         \
 662uint32_t ADVSIMD_HELPER(name, 2h)(uint32_t two_a, uint32_t two_b, void *fpstp) \
 663{ \
 664    float16  a1, a2, b1, b2;                        \
 665    uint32_t r1, r2;                                \
 666    float_status *fpst = fpstp;                     \
 667    a1 = extract32(two_a, 0, 16);                   \
 668    a2 = extract32(two_a, 16, 16);                  \
 669    b1 = extract32(two_b, 0, 16);                   \
 670    b2 = extract32(two_b, 16, 16);                  \
 671    r1 = float16_ ## name(a1, b1, fpst);            \
 672    r2 = float16_ ## name(a2, b2, fpst);            \
 673    return deposit32(r1, 16, 16, r2);               \
 674}
 675
 676ADVSIMD_TWOHALFOP(add)
 677ADVSIMD_TWOHALFOP(sub)
 678ADVSIMD_TWOHALFOP(mul)
 679ADVSIMD_TWOHALFOP(div)
 680ADVSIMD_TWOHALFOP(min)
 681ADVSIMD_TWOHALFOP(max)
 682ADVSIMD_TWOHALFOP(minnum)
 683ADVSIMD_TWOHALFOP(maxnum)
 684
 685/* Data processing - scalar floating-point and advanced SIMD */
 686static float16 float16_mulx(float16 a, float16 b, void *fpstp)
 687{
 688    float_status *fpst = fpstp;
 689
 690    a = float16_squash_input_denormal(a, fpst);
 691    b = float16_squash_input_denormal(b, fpst);
 692
 693    if ((float16_is_zero(a) && float16_is_infinity(b)) ||
 694        (float16_is_infinity(a) && float16_is_zero(b))) {
 695        /* 2.0 with the sign bit set to sign(A) XOR sign(B) */
 696        return make_float16((1U << 14) |
 697                            ((float16_val(a) ^ float16_val(b)) & (1U << 15)));
 698    }
 699    return float16_mul(a, b, fpst);
 700}
 701
 702ADVSIMD_HALFOP(mulx)
 703ADVSIMD_TWOHALFOP(mulx)
 704
 705/* fused multiply-accumulate */
 706float16 HELPER(advsimd_muladdh)(float16 a, float16 b, float16 c, void *fpstp)
 707{
 708    float_status *fpst = fpstp;
 709    return float16_muladd(a, b, c, 0, fpst);
 710}
 711
 712uint32_t HELPER(advsimd_muladd2h)(uint32_t two_a, uint32_t two_b,
 713                                  uint32_t two_c, void *fpstp)
 714{
 715    float_status *fpst = fpstp;
 716    float16  a1, a2, b1, b2, c1, c2;
 717    uint32_t r1, r2;
 718    a1 = extract32(two_a, 0, 16);
 719    a2 = extract32(two_a, 16, 16);
 720    b1 = extract32(two_b, 0, 16);
 721    b2 = extract32(two_b, 16, 16);
 722    c1 = extract32(two_c, 0, 16);
 723    c2 = extract32(two_c, 16, 16);
 724    r1 = float16_muladd(a1, b1, c1, 0, fpst);
 725    r2 = float16_muladd(a2, b2, c2, 0, fpst);
 726    return deposit32(r1, 16, 16, r2);
 727}
 728
 729/*
 730 * Floating point comparisons produce an integer result. Softfloat
 731 * routines return float_relation types which we convert to the 0/-1
 732 * Neon requires.
 733 */
 734
 735#define ADVSIMD_CMPRES(test) (test) ? 0xffff : 0
 736
 737uint32_t HELPER(advsimd_ceq_f16)(float16 a, float16 b, void *fpstp)
 738{
 739    float_status *fpst = fpstp;
 740    int compare = float16_compare_quiet(a, b, fpst);
 741    return ADVSIMD_CMPRES(compare == float_relation_equal);
 742}
 743
 744uint32_t HELPER(advsimd_cge_f16)(float16 a, float16 b, void *fpstp)
 745{
 746    float_status *fpst = fpstp;
 747    int compare = float16_compare(a, b, fpst);
 748    return ADVSIMD_CMPRES(compare == float_relation_greater ||
 749                          compare == float_relation_equal);
 750}
 751
 752uint32_t HELPER(advsimd_cgt_f16)(float16 a, float16 b, void *fpstp)
 753{
 754    float_status *fpst = fpstp;
 755    int compare = float16_compare(a, b, fpst);
 756    return ADVSIMD_CMPRES(compare == float_relation_greater);
 757}
 758
 759uint32_t HELPER(advsimd_acge_f16)(float16 a, float16 b, void *fpstp)
 760{
 761    float_status *fpst = fpstp;
 762    float16 f0 = float16_abs(a);
 763    float16 f1 = float16_abs(b);
 764    int compare = float16_compare(f0, f1, fpst);
 765    return ADVSIMD_CMPRES(compare == float_relation_greater ||
 766                          compare == float_relation_equal);
 767}
 768
 769uint32_t HELPER(advsimd_acgt_f16)(float16 a, float16 b, void *fpstp)
 770{
 771    float_status *fpst = fpstp;
 772    float16 f0 = float16_abs(a);
 773    float16 f1 = float16_abs(b);
 774    int compare = float16_compare(f0, f1, fpst);
 775    return ADVSIMD_CMPRES(compare == float_relation_greater);
 776}
 777
 778/* round to integral */
 779float16 HELPER(advsimd_rinth_exact)(float16 x, void *fp_status)
 780{
 781    return float16_round_to_int(x, fp_status);
 782}
 783
 784float16 HELPER(advsimd_rinth)(float16 x, void *fp_status)
 785{
 786    int old_flags = get_float_exception_flags(fp_status), new_flags;
 787    float16 ret;
 788
 789    ret = float16_round_to_int(x, fp_status);
 790
 791    /* Suppress any inexact exceptions the conversion produced */
 792    if (!(old_flags & float_flag_inexact)) {
 793        new_flags = get_float_exception_flags(fp_status);
 794        set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status);
 795    }
 796
 797    return ret;
 798}
 799
 800/*
 801 * Half-precision floating point conversion functions
 802 *
 803 * There are a multitude of conversion functions with various
 804 * different rounding modes. This is dealt with by the calling code
 805 * setting the mode appropriately before calling the helper.
 806 */
 807
 808uint32_t HELPER(advsimd_f16tosinth)(float16 a, void *fpstp)
 809{
 810    float_status *fpst = fpstp;
 811
 812    /* Invalid if we are passed a NaN */
 813    if (float16_is_any_nan(a)) {
 814        float_raise(float_flag_invalid, fpst);
 815        return 0;
 816    }
 817    return float16_to_int16(a, fpst);
 818}
 819
 820uint32_t HELPER(advsimd_f16touinth)(float16 a, void *fpstp)
 821{
 822    float_status *fpst = fpstp;
 823
 824    /* Invalid if we are passed a NaN */
 825    if (float16_is_any_nan(a)) {
 826        float_raise(float_flag_invalid, fpst);
 827        return 0;
 828    }
 829    return float16_to_uint16(a, fpst);
 830}
 831
 832/*
 833 * Square Root and Reciprocal square root
 834 */
 835
 836float16 HELPER(sqrt_f16)(float16 a, void *fpstp)
 837{
 838    float_status *s = fpstp;
 839
 840    return float16_sqrt(a, s);
 841}
 842
 843
 844