qemu/target/arm/helper-a64.c
<<
>>
Prefs
   1/*
   2 *  AArch64 specific helpers
   3 *
   4 *  Copyright (c) 2013 Alexander Graf <agraf@suse.de>
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2.1 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include "qemu/units.h"
  22#include "cpu.h"
  23#include "exec/gdbstub.h"
  24#include "exec/helper-proto.h"
  25#include "qemu/host-utils.h"
  26#include "qemu/log.h"
  27#include "qemu/main-loop.h"
  28#include "qemu/bitops.h"
  29#include "internals.h"
  30#include "qemu/crc32c.h"
  31#include "exec/exec-all.h"
  32#include "exec/cpu_ldst.h"
  33#include "qemu/int128.h"
  34#include "qemu/atomic128.h"
  35#include "tcg/tcg.h"
  36#include "fpu/softfloat.h"
  37#include <zlib.h> /* For crc32 */
  38
  39/* C2.4.7 Multiply and divide */
  40/* special cases for 0 and LLONG_MIN are mandated by the standard */
  41uint64_t HELPER(udiv64)(uint64_t num, uint64_t den)
  42{
  43    if (den == 0) {
  44        return 0;
  45    }
  46    return num / den;
  47}
  48
  49int64_t HELPER(sdiv64)(int64_t num, int64_t den)
  50{
  51    if (den == 0) {
  52        return 0;
  53    }
  54    if (num == LLONG_MIN && den == -1) {
  55        return LLONG_MIN;
  56    }
  57    return num / den;
  58}
  59
  60uint64_t HELPER(rbit64)(uint64_t x)
  61{
  62    return revbit64(x);
  63}
  64
  65void HELPER(msr_i_spsel)(CPUARMState *env, uint32_t imm)
  66{
  67    update_spsel(env, imm);
  68}
  69
  70static void daif_check(CPUARMState *env, uint32_t op,
  71                       uint32_t imm, uintptr_t ra)
  72{
  73    /* DAIF update to PSTATE. This is OK from EL0 only if UMA is set.  */
  74    if (arm_current_el(env) == 0 && !(arm_sctlr(env, 0) & SCTLR_UMA)) {
  75        raise_exception_ra(env, EXCP_UDEF,
  76                           syn_aa64_sysregtrap(0, extract32(op, 0, 3),
  77                                               extract32(op, 3, 3), 4,
  78                                               imm, 0x1f, 0),
  79                           exception_target_el(env), ra);
  80    }
  81}
  82
  83void HELPER(msr_i_daifset)(CPUARMState *env, uint32_t imm)
  84{
  85    daif_check(env, 0x1e, imm, GETPC());
  86    env->daif |= (imm << 6) & PSTATE_DAIF;
  87}
  88
  89void HELPER(msr_i_daifclear)(CPUARMState *env, uint32_t imm)
  90{
  91    daif_check(env, 0x1f, imm, GETPC());
  92    env->daif &= ~((imm << 6) & PSTATE_DAIF);
  93}
  94
  95/* Convert a softfloat float_relation_ (as returned by
  96 * the float*_compare functions) to the correct ARM
  97 * NZCV flag state.
  98 */
  99static inline uint32_t float_rel_to_flags(int res)
 100{
 101    uint64_t flags;
 102    switch (res) {
 103    case float_relation_equal:
 104        flags = PSTATE_Z | PSTATE_C;
 105        break;
 106    case float_relation_less:
 107        flags = PSTATE_N;
 108        break;
 109    case float_relation_greater:
 110        flags = PSTATE_C;
 111        break;
 112    case float_relation_unordered:
 113    default:
 114        flags = PSTATE_C | PSTATE_V;
 115        break;
 116    }
 117    return flags;
 118}
 119
 120uint64_t HELPER(vfp_cmph_a64)(uint32_t x, uint32_t y, void *fp_status)
 121{
 122    return float_rel_to_flags(float16_compare_quiet(x, y, fp_status));
 123}
 124
 125uint64_t HELPER(vfp_cmpeh_a64)(uint32_t x, uint32_t y, void *fp_status)
 126{
 127    return float_rel_to_flags(float16_compare(x, y, fp_status));
 128}
 129
 130uint64_t HELPER(vfp_cmps_a64)(float32 x, float32 y, void *fp_status)
 131{
 132    return float_rel_to_flags(float32_compare_quiet(x, y, fp_status));
 133}
 134
 135uint64_t HELPER(vfp_cmpes_a64)(float32 x, float32 y, void *fp_status)
 136{
 137    return float_rel_to_flags(float32_compare(x, y, fp_status));
 138}
 139
 140uint64_t HELPER(vfp_cmpd_a64)(float64 x, float64 y, void *fp_status)
 141{
 142    return float_rel_to_flags(float64_compare_quiet(x, y, fp_status));
 143}
 144
 145uint64_t HELPER(vfp_cmped_a64)(float64 x, float64 y, void *fp_status)
 146{
 147    return float_rel_to_flags(float64_compare(x, y, fp_status));
 148}
 149
 150float32 HELPER(vfp_mulxs)(float32 a, float32 b, void *fpstp)
 151{
 152    float_status *fpst = fpstp;
 153
 154    a = float32_squash_input_denormal(a, fpst);
 155    b = float32_squash_input_denormal(b, fpst);
 156
 157    if ((float32_is_zero(a) && float32_is_infinity(b)) ||
 158        (float32_is_infinity(a) && float32_is_zero(b))) {
 159        /* 2.0 with the sign bit set to sign(A) XOR sign(B) */
 160        return make_float32((1U << 30) |
 161                            ((float32_val(a) ^ float32_val(b)) & (1U << 31)));
 162    }
 163    return float32_mul(a, b, fpst);
 164}
 165
 166float64 HELPER(vfp_mulxd)(float64 a, float64 b, void *fpstp)
 167{
 168    float_status *fpst = fpstp;
 169
 170    a = float64_squash_input_denormal(a, fpst);
 171    b = float64_squash_input_denormal(b, fpst);
 172
 173    if ((float64_is_zero(a) && float64_is_infinity(b)) ||
 174        (float64_is_infinity(a) && float64_is_zero(b))) {
 175        /* 2.0 with the sign bit set to sign(A) XOR sign(B) */
 176        return make_float64((1ULL << 62) |
 177                            ((float64_val(a) ^ float64_val(b)) & (1ULL << 63)));
 178    }
 179    return float64_mul(a, b, fpst);
 180}
 181
 182/* 64bit/double versions of the neon float compare functions */
 183uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, void *fpstp)
 184{
 185    float_status *fpst = fpstp;
 186    return -float64_eq_quiet(a, b, fpst);
 187}
 188
 189uint64_t HELPER(neon_cge_f64)(float64 a, float64 b, void *fpstp)
 190{
 191    float_status *fpst = fpstp;
 192    return -float64_le(b, a, fpst);
 193}
 194
 195uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, void *fpstp)
 196{
 197    float_status *fpst = fpstp;
 198    return -float64_lt(b, a, fpst);
 199}
 200
 201/* Reciprocal step and sqrt step. Note that unlike the A32/T32
 202 * versions, these do a fully fused multiply-add or
 203 * multiply-add-and-halve.
 204 */
 205
 206uint32_t HELPER(recpsf_f16)(uint32_t a, uint32_t b, void *fpstp)
 207{
 208    float_status *fpst = fpstp;
 209
 210    a = float16_squash_input_denormal(a, fpst);
 211    b = float16_squash_input_denormal(b, fpst);
 212
 213    a = float16_chs(a);
 214    if ((float16_is_infinity(a) && float16_is_zero(b)) ||
 215        (float16_is_infinity(b) && float16_is_zero(a))) {
 216        return float16_two;
 217    }
 218    return float16_muladd(a, b, float16_two, 0, fpst);
 219}
 220
 221float32 HELPER(recpsf_f32)(float32 a, float32 b, void *fpstp)
 222{
 223    float_status *fpst = fpstp;
 224
 225    a = float32_squash_input_denormal(a, fpst);
 226    b = float32_squash_input_denormal(b, fpst);
 227
 228    a = float32_chs(a);
 229    if ((float32_is_infinity(a) && float32_is_zero(b)) ||
 230        (float32_is_infinity(b) && float32_is_zero(a))) {
 231        return float32_two;
 232    }
 233    return float32_muladd(a, b, float32_two, 0, fpst);
 234}
 235
 236float64 HELPER(recpsf_f64)(float64 a, float64 b, void *fpstp)
 237{
 238    float_status *fpst = fpstp;
 239
 240    a = float64_squash_input_denormal(a, fpst);
 241    b = float64_squash_input_denormal(b, fpst);
 242
 243    a = float64_chs(a);
 244    if ((float64_is_infinity(a) && float64_is_zero(b)) ||
 245        (float64_is_infinity(b) && float64_is_zero(a))) {
 246        return float64_two;
 247    }
 248    return float64_muladd(a, b, float64_two, 0, fpst);
 249}
 250
 251uint32_t HELPER(rsqrtsf_f16)(uint32_t a, uint32_t b, void *fpstp)
 252{
 253    float_status *fpst = fpstp;
 254
 255    a = float16_squash_input_denormal(a, fpst);
 256    b = float16_squash_input_denormal(b, fpst);
 257
 258    a = float16_chs(a);
 259    if ((float16_is_infinity(a) && float16_is_zero(b)) ||
 260        (float16_is_infinity(b) && float16_is_zero(a))) {
 261        return float16_one_point_five;
 262    }
 263    return float16_muladd(a, b, float16_three, float_muladd_halve_result, fpst);
 264}
 265
 266float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, void *fpstp)
 267{
 268    float_status *fpst = fpstp;
 269
 270    a = float32_squash_input_denormal(a, fpst);
 271    b = float32_squash_input_denormal(b, fpst);
 272
 273    a = float32_chs(a);
 274    if ((float32_is_infinity(a) && float32_is_zero(b)) ||
 275        (float32_is_infinity(b) && float32_is_zero(a))) {
 276        return float32_one_point_five;
 277    }
 278    return float32_muladd(a, b, float32_three, float_muladd_halve_result, fpst);
 279}
 280
 281float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, void *fpstp)
 282{
 283    float_status *fpst = fpstp;
 284
 285    a = float64_squash_input_denormal(a, fpst);
 286    b = float64_squash_input_denormal(b, fpst);
 287
 288    a = float64_chs(a);
 289    if ((float64_is_infinity(a) && float64_is_zero(b)) ||
 290        (float64_is_infinity(b) && float64_is_zero(a))) {
 291        return float64_one_point_five;
 292    }
 293    return float64_muladd(a, b, float64_three, float_muladd_halve_result, fpst);
 294}
 295
 296/* Pairwise long add: add pairs of adjacent elements into
 297 * double-width elements in the result (eg _s8 is an 8x8->16 op)
 298 */
 299uint64_t HELPER(neon_addlp_s8)(uint64_t a)
 300{
 301    uint64_t nsignmask = 0x0080008000800080ULL;
 302    uint64_t wsignmask = 0x8000800080008000ULL;
 303    uint64_t elementmask = 0x00ff00ff00ff00ffULL;
 304    uint64_t tmp1, tmp2;
 305    uint64_t res, signres;
 306
 307    /* Extract odd elements, sign extend each to a 16 bit field */
 308    tmp1 = a & elementmask;
 309    tmp1 ^= nsignmask;
 310    tmp1 |= wsignmask;
 311    tmp1 = (tmp1 - nsignmask) ^ wsignmask;
 312    /* Ditto for the even elements */
 313    tmp2 = (a >> 8) & elementmask;
 314    tmp2 ^= nsignmask;
 315    tmp2 |= wsignmask;
 316    tmp2 = (tmp2 - nsignmask) ^ wsignmask;
 317
 318    /* calculate the result by summing bits 0..14, 16..22, etc,
 319     * and then adjusting the sign bits 15, 23, etc manually.
 320     * This ensures the addition can't overflow the 16 bit field.
 321     */
 322    signres = (tmp1 ^ tmp2) & wsignmask;
 323    res = (tmp1 & ~wsignmask) + (tmp2 & ~wsignmask);
 324    res ^= signres;
 325
 326    return res;
 327}
 328
 329uint64_t HELPER(neon_addlp_u8)(uint64_t a)
 330{
 331    uint64_t tmp;
 332
 333    tmp = a & 0x00ff00ff00ff00ffULL;
 334    tmp += (a >> 8) & 0x00ff00ff00ff00ffULL;
 335    return tmp;
 336}
 337
 338uint64_t HELPER(neon_addlp_s16)(uint64_t a)
 339{
 340    int32_t reslo, reshi;
 341
 342    reslo = (int32_t)(int16_t)a + (int32_t)(int16_t)(a >> 16);
 343    reshi = (int32_t)(int16_t)(a >> 32) + (int32_t)(int16_t)(a >> 48);
 344
 345    return (uint32_t)reslo | (((uint64_t)reshi) << 32);
 346}
 347
 348uint64_t HELPER(neon_addlp_u16)(uint64_t a)
 349{
 350    uint64_t tmp;
 351
 352    tmp = a & 0x0000ffff0000ffffULL;
 353    tmp += (a >> 16) & 0x0000ffff0000ffffULL;
 354    return tmp;
 355}
 356
 357/* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */
 358uint32_t HELPER(frecpx_f16)(uint32_t a, void *fpstp)
 359{
 360    float_status *fpst = fpstp;
 361    uint16_t val16, sbit;
 362    int16_t exp;
 363
 364    if (float16_is_any_nan(a)) {
 365        float16 nan = a;
 366        if (float16_is_signaling_nan(a, fpst)) {
 367            float_raise(float_flag_invalid, fpst);
 368            if (!fpst->default_nan_mode) {
 369                nan = float16_silence_nan(a, fpst);
 370            }
 371        }
 372        if (fpst->default_nan_mode) {
 373            nan = float16_default_nan(fpst);
 374        }
 375        return nan;
 376    }
 377
 378    a = float16_squash_input_denormal(a, fpst);
 379
 380    val16 = float16_val(a);
 381    sbit = 0x8000 & val16;
 382    exp = extract32(val16, 10, 5);
 383
 384    if (exp == 0) {
 385        return make_float16(deposit32(sbit, 10, 5, 0x1e));
 386    } else {
 387        return make_float16(deposit32(sbit, 10, 5, ~exp));
 388    }
 389}
 390
 391float32 HELPER(frecpx_f32)(float32 a, void *fpstp)
 392{
 393    float_status *fpst = fpstp;
 394    uint32_t val32, sbit;
 395    int32_t exp;
 396
 397    if (float32_is_any_nan(a)) {
 398        float32 nan = a;
 399        if (float32_is_signaling_nan(a, fpst)) {
 400            float_raise(float_flag_invalid, fpst);
 401            if (!fpst->default_nan_mode) {
 402                nan = float32_silence_nan(a, fpst);
 403            }
 404        }
 405        if (fpst->default_nan_mode) {
 406            nan = float32_default_nan(fpst);
 407        }
 408        return nan;
 409    }
 410
 411    a = float32_squash_input_denormal(a, fpst);
 412
 413    val32 = float32_val(a);
 414    sbit = 0x80000000ULL & val32;
 415    exp = extract32(val32, 23, 8);
 416
 417    if (exp == 0) {
 418        return make_float32(sbit | (0xfe << 23));
 419    } else {
 420        return make_float32(sbit | (~exp & 0xff) << 23);
 421    }
 422}
 423
 424float64 HELPER(frecpx_f64)(float64 a, void *fpstp)
 425{
 426    float_status *fpst = fpstp;
 427    uint64_t val64, sbit;
 428    int64_t exp;
 429
 430    if (float64_is_any_nan(a)) {
 431        float64 nan = a;
 432        if (float64_is_signaling_nan(a, fpst)) {
 433            float_raise(float_flag_invalid, fpst);
 434            if (!fpst->default_nan_mode) {
 435                nan = float64_silence_nan(a, fpst);
 436            }
 437        }
 438        if (fpst->default_nan_mode) {
 439            nan = float64_default_nan(fpst);
 440        }
 441        return nan;
 442    }
 443
 444    a = float64_squash_input_denormal(a, fpst);
 445
 446    val64 = float64_val(a);
 447    sbit = 0x8000000000000000ULL & val64;
 448    exp = extract64(float64_val(a), 52, 11);
 449
 450    if (exp == 0) {
 451        return make_float64(sbit | (0x7feULL << 52));
 452    } else {
 453        return make_float64(sbit | (~exp & 0x7ffULL) << 52);
 454    }
 455}
 456
 457float32 HELPER(fcvtx_f64_to_f32)(float64 a, CPUARMState *env)
 458{
 459    /* Von Neumann rounding is implemented by using round-to-zero
 460     * and then setting the LSB of the result if Inexact was raised.
 461     */
 462    float32 r;
 463    float_status *fpst = &env->vfp.fp_status;
 464    float_status tstat = *fpst;
 465    int exflags;
 466
 467    set_float_rounding_mode(float_round_to_zero, &tstat);
 468    set_float_exception_flags(0, &tstat);
 469    r = float64_to_float32(a, &tstat);
 470    exflags = get_float_exception_flags(&tstat);
 471    if (exflags & float_flag_inexact) {
 472        r = make_float32(float32_val(r) | 1);
 473    }
 474    exflags |= get_float_exception_flags(fpst);
 475    set_float_exception_flags(exflags, fpst);
 476    return r;
 477}
 478
 479/* 64-bit versions of the CRC helpers. Note that although the operation
 480 * (and the prototypes of crc32c() and crc32() mean that only the bottom
 481 * 32 bits of the accumulator and result are used, we pass and return
 482 * uint64_t for convenience of the generated code. Unlike the 32-bit
 483 * instruction set versions, val may genuinely have 64 bits of data in it.
 484 * The upper bytes of val (above the number specified by 'bytes') must have
 485 * been zeroed out by the caller.
 486 */
 487uint64_t HELPER(crc32_64)(uint64_t acc, uint64_t val, uint32_t bytes)
 488{
 489    uint8_t buf[8];
 490
 491    stq_le_p(buf, val);
 492
 493    /* zlib crc32 converts the accumulator and output to one's complement.  */
 494    return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff;
 495}
 496
 497uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes)
 498{
 499    uint8_t buf[8];
 500
 501    stq_le_p(buf, val);
 502
 503    /* Linux crc32c converts the output to one's complement.  */
 504    return crc32c(acc, buf, bytes) ^ 0xffffffff;
 505}
 506
 507uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr,
 508                                     uint64_t new_lo, uint64_t new_hi)
 509{
 510    Int128 cmpv = int128_make128(env->exclusive_val, env->exclusive_high);
 511    Int128 newv = int128_make128(new_lo, new_hi);
 512    Int128 oldv;
 513    uintptr_t ra = GETPC();
 514    uint64_t o0, o1;
 515    bool success;
 516
 517#ifdef CONFIG_USER_ONLY
 518    /* ??? Enforce alignment.  */
 519    uint64_t *haddr = g2h(env_cpu(env), addr);
 520
 521    set_helper_retaddr(ra);
 522    o0 = ldq_le_p(haddr + 0);
 523    o1 = ldq_le_p(haddr + 1);
 524    oldv = int128_make128(o0, o1);
 525
 526    success = int128_eq(oldv, cmpv);
 527    if (success) {
 528        stq_le_p(haddr + 0, int128_getlo(newv));
 529        stq_le_p(haddr + 1, int128_gethi(newv));
 530    }
 531    clear_helper_retaddr();
 532#else
 533    int mem_idx = cpu_mmu_index(env, false);
 534    TCGMemOpIdx oi0 = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
 535    TCGMemOpIdx oi1 = make_memop_idx(MO_LEQ, mem_idx);
 536
 537    o0 = helper_le_ldq_mmu(env, addr + 0, oi0, ra);
 538    o1 = helper_le_ldq_mmu(env, addr + 8, oi1, ra);
 539    oldv = int128_make128(o0, o1);
 540
 541    success = int128_eq(oldv, cmpv);
 542    if (success) {
 543        helper_le_stq_mmu(env, addr + 0, int128_getlo(newv), oi1, ra);
 544        helper_le_stq_mmu(env, addr + 8, int128_gethi(newv), oi1, ra);
 545    }
 546#endif
 547
 548    return !success;
 549}
 550
 551uint64_t HELPER(paired_cmpxchg64_le_parallel)(CPUARMState *env, uint64_t addr,
 552                                              uint64_t new_lo, uint64_t new_hi)
 553{
 554    Int128 oldv, cmpv, newv;
 555    uintptr_t ra = GETPC();
 556    bool success;
 557    int mem_idx;
 558    TCGMemOpIdx oi;
 559
 560    assert(HAVE_CMPXCHG128);
 561
 562    mem_idx = cpu_mmu_index(env, false);
 563    oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
 564
 565    cmpv = int128_make128(env->exclusive_val, env->exclusive_high);
 566    newv = int128_make128(new_lo, new_hi);
 567    oldv = cpu_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra);
 568
 569    success = int128_eq(oldv, cmpv);
 570    return !success;
 571}
 572
 573uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr,
 574                                     uint64_t new_lo, uint64_t new_hi)
 575{
 576    /*
 577     * High and low need to be switched here because this is not actually a
 578     * 128bit store but two doublewords stored consecutively
 579     */
 580    Int128 cmpv = int128_make128(env->exclusive_high, env->exclusive_val);
 581    Int128 newv = int128_make128(new_hi, new_lo);
 582    Int128 oldv;
 583    uintptr_t ra = GETPC();
 584    uint64_t o0, o1;
 585    bool success;
 586
 587#ifdef CONFIG_USER_ONLY
 588    /* ??? Enforce alignment.  */
 589    uint64_t *haddr = g2h(env_cpu(env), addr);
 590
 591    set_helper_retaddr(ra);
 592    o1 = ldq_be_p(haddr + 0);
 593    o0 = ldq_be_p(haddr + 1);
 594    oldv = int128_make128(o0, o1);
 595
 596    success = int128_eq(oldv, cmpv);
 597    if (success) {
 598        stq_be_p(haddr + 0, int128_gethi(newv));
 599        stq_be_p(haddr + 1, int128_getlo(newv));
 600    }
 601    clear_helper_retaddr();
 602#else
 603    int mem_idx = cpu_mmu_index(env, false);
 604    TCGMemOpIdx oi0 = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx);
 605    TCGMemOpIdx oi1 = make_memop_idx(MO_BEQ, mem_idx);
 606
 607    o1 = helper_be_ldq_mmu(env, addr + 0, oi0, ra);
 608    o0 = helper_be_ldq_mmu(env, addr + 8, oi1, ra);
 609    oldv = int128_make128(o0, o1);
 610
 611    success = int128_eq(oldv, cmpv);
 612    if (success) {
 613        helper_be_stq_mmu(env, addr + 0, int128_gethi(newv), oi1, ra);
 614        helper_be_stq_mmu(env, addr + 8, int128_getlo(newv), oi1, ra);
 615    }
 616#endif
 617
 618    return !success;
 619}
 620
 621uint64_t HELPER(paired_cmpxchg64_be_parallel)(CPUARMState *env, uint64_t addr,
 622                                              uint64_t new_lo, uint64_t new_hi)
 623{
 624    Int128 oldv, cmpv, newv;
 625    uintptr_t ra = GETPC();
 626    bool success;
 627    int mem_idx;
 628    TCGMemOpIdx oi;
 629
 630    assert(HAVE_CMPXCHG128);
 631
 632    mem_idx = cpu_mmu_index(env, false);
 633    oi = make_memop_idx(MO_BEQ | MO_ALIGN_16, mem_idx);
 634
 635    /*
 636     * High and low need to be switched here because this is not actually a
 637     * 128bit store but two doublewords stored consecutively
 638     */
 639    cmpv = int128_make128(env->exclusive_high, env->exclusive_val);
 640    newv = int128_make128(new_hi, new_lo);
 641    oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
 642
 643    success = int128_eq(oldv, cmpv);
 644    return !success;
 645}
 646
 647/* Writes back the old data into Rs.  */
 648void HELPER(casp_le_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr,
 649                              uint64_t new_lo, uint64_t new_hi)
 650{
 651    Int128 oldv, cmpv, newv;
 652    uintptr_t ra = GETPC();
 653    int mem_idx;
 654    TCGMemOpIdx oi;
 655
 656    assert(HAVE_CMPXCHG128);
 657
 658    mem_idx = cpu_mmu_index(env, false);
 659    oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
 660
 661    cmpv = int128_make128(env->xregs[rs], env->xregs[rs + 1]);
 662    newv = int128_make128(new_lo, new_hi);
 663    oldv = cpu_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra);
 664
 665    env->xregs[rs] = int128_getlo(oldv);
 666    env->xregs[rs + 1] = int128_gethi(oldv);
 667}
 668
 669void HELPER(casp_be_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr,
 670                              uint64_t new_hi, uint64_t new_lo)
 671{
 672    Int128 oldv, cmpv, newv;
 673    uintptr_t ra = GETPC();
 674    int mem_idx;
 675    TCGMemOpIdx oi;
 676
 677    assert(HAVE_CMPXCHG128);
 678
 679    mem_idx = cpu_mmu_index(env, false);
 680    oi = make_memop_idx(MO_LEQ | MO_ALIGN_16, mem_idx);
 681
 682    cmpv = int128_make128(env->xregs[rs + 1], env->xregs[rs]);
 683    newv = int128_make128(new_lo, new_hi);
 684    oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
 685
 686    env->xregs[rs + 1] = int128_getlo(oldv);
 687    env->xregs[rs] = int128_gethi(oldv);
 688}
 689
 690/*
 691 * AdvSIMD half-precision
 692 */
 693
 694#define ADVSIMD_HELPER(name, suffix) HELPER(glue(glue(advsimd_, name), suffix))
 695
 696#define ADVSIMD_HALFOP(name) \
 697uint32_t ADVSIMD_HELPER(name, h)(uint32_t a, uint32_t b, void *fpstp) \
 698{ \
 699    float_status *fpst = fpstp; \
 700    return float16_ ## name(a, b, fpst);    \
 701}
 702
 703ADVSIMD_HALFOP(add)
 704ADVSIMD_HALFOP(sub)
 705ADVSIMD_HALFOP(mul)
 706ADVSIMD_HALFOP(div)
 707ADVSIMD_HALFOP(min)
 708ADVSIMD_HALFOP(max)
 709ADVSIMD_HALFOP(minnum)
 710ADVSIMD_HALFOP(maxnum)
 711
 712#define ADVSIMD_TWOHALFOP(name)                                         \
 713uint32_t ADVSIMD_HELPER(name, 2h)(uint32_t two_a, uint32_t two_b, void *fpstp) \
 714{ \
 715    float16  a1, a2, b1, b2;                        \
 716    uint32_t r1, r2;                                \
 717    float_status *fpst = fpstp;                     \
 718    a1 = extract32(two_a, 0, 16);                   \
 719    a2 = extract32(two_a, 16, 16);                  \
 720    b1 = extract32(two_b, 0, 16);                   \
 721    b2 = extract32(two_b, 16, 16);                  \
 722    r1 = float16_ ## name(a1, b1, fpst);            \
 723    r2 = float16_ ## name(a2, b2, fpst);            \
 724    return deposit32(r1, 16, 16, r2);               \
 725}
 726
 727ADVSIMD_TWOHALFOP(add)
 728ADVSIMD_TWOHALFOP(sub)
 729ADVSIMD_TWOHALFOP(mul)
 730ADVSIMD_TWOHALFOP(div)
 731ADVSIMD_TWOHALFOP(min)
 732ADVSIMD_TWOHALFOP(max)
 733ADVSIMD_TWOHALFOP(minnum)
 734ADVSIMD_TWOHALFOP(maxnum)
 735
 736/* Data processing - scalar floating-point and advanced SIMD */
 737static float16 float16_mulx(float16 a, float16 b, void *fpstp)
 738{
 739    float_status *fpst = fpstp;
 740
 741    a = float16_squash_input_denormal(a, fpst);
 742    b = float16_squash_input_denormal(b, fpst);
 743
 744    if ((float16_is_zero(a) && float16_is_infinity(b)) ||
 745        (float16_is_infinity(a) && float16_is_zero(b))) {
 746        /* 2.0 with the sign bit set to sign(A) XOR sign(B) */
 747        return make_float16((1U << 14) |
 748                            ((float16_val(a) ^ float16_val(b)) & (1U << 15)));
 749    }
 750    return float16_mul(a, b, fpst);
 751}
 752
 753ADVSIMD_HALFOP(mulx)
 754ADVSIMD_TWOHALFOP(mulx)
 755
 756/* fused multiply-accumulate */
 757uint32_t HELPER(advsimd_muladdh)(uint32_t a, uint32_t b, uint32_t c,
 758                                 void *fpstp)
 759{
 760    float_status *fpst = fpstp;
 761    return float16_muladd(a, b, c, 0, fpst);
 762}
 763
 764uint32_t HELPER(advsimd_muladd2h)(uint32_t two_a, uint32_t two_b,
 765                                  uint32_t two_c, void *fpstp)
 766{
 767    float_status *fpst = fpstp;
 768    float16  a1, a2, b1, b2, c1, c2;
 769    uint32_t r1, r2;
 770    a1 = extract32(two_a, 0, 16);
 771    a2 = extract32(two_a, 16, 16);
 772    b1 = extract32(two_b, 0, 16);
 773    b2 = extract32(two_b, 16, 16);
 774    c1 = extract32(two_c, 0, 16);
 775    c2 = extract32(two_c, 16, 16);
 776    r1 = float16_muladd(a1, b1, c1, 0, fpst);
 777    r2 = float16_muladd(a2, b2, c2, 0, fpst);
 778    return deposit32(r1, 16, 16, r2);
 779}
 780
 781/*
 782 * Floating point comparisons produce an integer result. Softfloat
 783 * routines return float_relation types which we convert to the 0/-1
 784 * Neon requires.
 785 */
 786
 787#define ADVSIMD_CMPRES(test) (test) ? 0xffff : 0
 788
 789uint32_t HELPER(advsimd_ceq_f16)(uint32_t a, uint32_t b, void *fpstp)
 790{
 791    float_status *fpst = fpstp;
 792    int compare = float16_compare_quiet(a, b, fpst);
 793    return ADVSIMD_CMPRES(compare == float_relation_equal);
 794}
 795
 796uint32_t HELPER(advsimd_cge_f16)(uint32_t a, uint32_t b, void *fpstp)
 797{
 798    float_status *fpst = fpstp;
 799    int compare = float16_compare(a, b, fpst);
 800    return ADVSIMD_CMPRES(compare == float_relation_greater ||
 801                          compare == float_relation_equal);
 802}
 803
 804uint32_t HELPER(advsimd_cgt_f16)(uint32_t a, uint32_t b, void *fpstp)
 805{
 806    float_status *fpst = fpstp;
 807    int compare = float16_compare(a, b, fpst);
 808    return ADVSIMD_CMPRES(compare == float_relation_greater);
 809}
 810
 811uint32_t HELPER(advsimd_acge_f16)(uint32_t a, uint32_t b, void *fpstp)
 812{
 813    float_status *fpst = fpstp;
 814    float16 f0 = float16_abs(a);
 815    float16 f1 = float16_abs(b);
 816    int compare = float16_compare(f0, f1, fpst);
 817    return ADVSIMD_CMPRES(compare == float_relation_greater ||
 818                          compare == float_relation_equal);
 819}
 820
 821uint32_t HELPER(advsimd_acgt_f16)(uint32_t a, uint32_t b, void *fpstp)
 822{
 823    float_status *fpst = fpstp;
 824    float16 f0 = float16_abs(a);
 825    float16 f1 = float16_abs(b);
 826    int compare = float16_compare(f0, f1, fpst);
 827    return ADVSIMD_CMPRES(compare == float_relation_greater);
 828}
 829
 830/* round to integral */
 831uint32_t HELPER(advsimd_rinth_exact)(uint32_t x, void *fp_status)
 832{
 833    return float16_round_to_int(x, fp_status);
 834}
 835
 836uint32_t HELPER(advsimd_rinth)(uint32_t x, void *fp_status)
 837{
 838    int old_flags = get_float_exception_flags(fp_status), new_flags;
 839    float16 ret;
 840
 841    ret = float16_round_to_int(x, fp_status);
 842
 843    /* Suppress any inexact exceptions the conversion produced */
 844    if (!(old_flags & float_flag_inexact)) {
 845        new_flags = get_float_exception_flags(fp_status);
 846        set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status);
 847    }
 848
 849    return ret;
 850}
 851
 852/*
 853 * Half-precision floating point conversion functions
 854 *
 855 * There are a multitude of conversion functions with various
 856 * different rounding modes. This is dealt with by the calling code
 857 * setting the mode appropriately before calling the helper.
 858 */
 859
 860uint32_t HELPER(advsimd_f16tosinth)(uint32_t a, void *fpstp)
 861{
 862    float_status *fpst = fpstp;
 863
 864    /* Invalid if we are passed a NaN */
 865    if (float16_is_any_nan(a)) {
 866        float_raise(float_flag_invalid, fpst);
 867        return 0;
 868    }
 869    return float16_to_int16(a, fpst);
 870}
 871
 872uint32_t HELPER(advsimd_f16touinth)(uint32_t a, void *fpstp)
 873{
 874    float_status *fpst = fpstp;
 875
 876    /* Invalid if we are passed a NaN */
 877    if (float16_is_any_nan(a)) {
 878        float_raise(float_flag_invalid, fpst);
 879        return 0;
 880    }
 881    return float16_to_uint16(a, fpst);
 882}
 883
 884static int el_from_spsr(uint32_t spsr)
 885{
 886    /* Return the exception level that this SPSR is requesting a return to,
 887     * or -1 if it is invalid (an illegal return)
 888     */
 889    if (spsr & PSTATE_nRW) {
 890        switch (spsr & CPSR_M) {
 891        case ARM_CPU_MODE_USR:
 892            return 0;
 893        case ARM_CPU_MODE_HYP:
 894            return 2;
 895        case ARM_CPU_MODE_FIQ:
 896        case ARM_CPU_MODE_IRQ:
 897        case ARM_CPU_MODE_SVC:
 898        case ARM_CPU_MODE_ABT:
 899        case ARM_CPU_MODE_UND:
 900        case ARM_CPU_MODE_SYS:
 901            return 1;
 902        case ARM_CPU_MODE_MON:
 903            /* Returning to Mon from AArch64 is never possible,
 904             * so this is an illegal return.
 905             */
 906        default:
 907            return -1;
 908        }
 909    } else {
 910        if (extract32(spsr, 1, 1)) {
 911            /* Return with reserved M[1] bit set */
 912            return -1;
 913        }
 914        if (extract32(spsr, 0, 4) == 1) {
 915            /* return to EL0 with M[0] bit set */
 916            return -1;
 917        }
 918        return extract32(spsr, 2, 2);
 919    }
 920}
 921
 922static void cpsr_write_from_spsr_elx(CPUARMState *env,
 923                                     uint32_t val)
 924{
 925    uint32_t mask;
 926
 927    /* Save SPSR_ELx.SS into PSTATE. */
 928    env->pstate = (env->pstate & ~PSTATE_SS) | (val & PSTATE_SS);
 929    val &= ~PSTATE_SS;
 930
 931    /* Move DIT to the correct location for CPSR */
 932    if (val & PSTATE_DIT) {
 933        val &= ~PSTATE_DIT;
 934        val |= CPSR_DIT;
 935    }
 936
 937    mask = aarch32_cpsr_valid_mask(env->features, \
 938        &env_archcpu(env)->isar);
 939    cpsr_write(env, val, mask, CPSRWriteRaw);
 940}
 941
 942void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc)
 943{
 944    int cur_el = arm_current_el(env);
 945    unsigned int spsr_idx = aarch64_banked_spsr_index(cur_el);
 946    uint32_t spsr = env->banked_spsr[spsr_idx];
 947    int new_el;
 948    bool return_to_aa64 = (spsr & PSTATE_nRW) == 0;
 949
 950    aarch64_save_sp(env, cur_el);
 951
 952    arm_clear_exclusive(env);
 953
 954    /* We must squash the PSTATE.SS bit to zero unless both of the
 955     * following hold:
 956     *  1. debug exceptions are currently disabled
 957     *  2. singlestep will be active in the EL we return to
 958     * We check 1 here and 2 after we've done the pstate/cpsr write() to
 959     * transition to the EL we're going to.
 960     */
 961    if (arm_generate_debug_exceptions(env)) {
 962        spsr &= ~PSTATE_SS;
 963    }
 964
 965    new_el = el_from_spsr(spsr);
 966    if (new_el == -1) {
 967        goto illegal_return;
 968    }
 969    if (new_el > cur_el || (new_el == 2 && !arm_is_el2_enabled(env))) {
 970        /* Disallow return to an EL which is unimplemented or higher
 971         * than the current one.
 972         */
 973        goto illegal_return;
 974    }
 975
 976    if (new_el != 0 && arm_el_is_aa64(env, new_el) != return_to_aa64) {
 977        /* Return to an EL which is configured for a different register width */
 978        goto illegal_return;
 979    }
 980
 981    if (new_el == 1 && (arm_hcr_el2_eff(env) & HCR_TGE)) {
 982        goto illegal_return;
 983    }
 984
 985    qemu_mutex_lock_iothread();
 986    arm_call_pre_el_change_hook(env_archcpu(env));
 987    qemu_mutex_unlock_iothread();
 988
 989    if (!return_to_aa64) {
 990        env->aarch64 = 0;
 991        /* We do a raw CPSR write because aarch64_sync_64_to_32()
 992         * will sort the register banks out for us, and we've already
 993         * caught all the bad-mode cases in el_from_spsr().
 994         */
 995        cpsr_write_from_spsr_elx(env, spsr);
 996        if (!arm_singlestep_active(env)) {
 997            env->pstate &= ~PSTATE_SS;
 998        }
 999        aarch64_sync_64_to_32(env);
1000
1001        if (spsr & CPSR_T) {
1002            env->regs[15] = new_pc & ~0x1;
1003        } else {
1004            env->regs[15] = new_pc & ~0x3;
1005        }
1006        helper_rebuild_hflags_a32(env, new_el);
1007        qemu_log_mask(CPU_LOG_INT, "Exception return from AArch64 EL%d to "
1008                      "AArch32 EL%d PC 0x%" PRIx32 "\n",
1009                      cur_el, new_el, env->regs[15]);
1010    } else {
1011        int tbii;
1012
1013        env->aarch64 = 1;
1014        spsr &= aarch64_pstate_valid_mask(&env_archcpu(env)->isar);
1015        pstate_write(env, spsr);
1016        if (!arm_singlestep_active(env)) {
1017            env->pstate &= ~PSTATE_SS;
1018        }
1019        aarch64_restore_sp(env, new_el);
1020        helper_rebuild_hflags_a64(env, new_el);
1021
1022        /*
1023         * Apply TBI to the exception return address.  We had to delay this
1024         * until after we selected the new EL, so that we could select the
1025         * correct TBI+TBID bits.  This is made easier by waiting until after
1026         * the hflags rebuild, since we can pull the composite TBII field
1027         * from there.
1028         */
1029        tbii = EX_TBFLAG_A64(env->hflags, TBII);
1030        if ((tbii >> extract64(new_pc, 55, 1)) & 1) {
1031            /* TBI is enabled. */
1032            int core_mmu_idx = cpu_mmu_index(env, false);
1033            if (regime_has_2_ranges(core_to_aa64_mmu_idx(core_mmu_idx))) {
1034                new_pc = sextract64(new_pc, 0, 56);
1035            } else {
1036                new_pc = extract64(new_pc, 0, 56);
1037            }
1038        }
1039        env->pc = new_pc;
1040
1041        qemu_log_mask(CPU_LOG_INT, "Exception return from AArch64 EL%d to "
1042                      "AArch64 EL%d PC 0x%" PRIx64 "\n",
1043                      cur_el, new_el, env->pc);
1044    }
1045
1046    /*
1047     * Note that cur_el can never be 0.  If new_el is 0, then
1048     * el0_a64 is return_to_aa64, else el0_a64 is ignored.
1049     */
1050    aarch64_sve_change_el(env, cur_el, new_el, return_to_aa64);
1051
1052    qemu_mutex_lock_iothread();
1053    arm_call_el_change_hook(env_archcpu(env));
1054    qemu_mutex_unlock_iothread();
1055
1056    return;
1057
1058illegal_return:
1059    /* Illegal return events of various kinds have architecturally
1060     * mandated behaviour:
1061     * restore NZCV and DAIF from SPSR_ELx
1062     * set PSTATE.IL
1063     * restore PC from ELR_ELx
1064     * no change to exception level, execution state or stack pointer
1065     */
1066    env->pstate |= PSTATE_IL;
1067    env->pc = new_pc;
1068    spsr &= PSTATE_NZCV | PSTATE_DAIF;
1069    spsr |= pstate_read(env) & ~(PSTATE_NZCV | PSTATE_DAIF);
1070    pstate_write(env, spsr);
1071    if (!arm_singlestep_active(env)) {
1072        env->pstate &= ~PSTATE_SS;
1073    }
1074    qemu_log_mask(LOG_GUEST_ERROR, "Illegal exception return at EL%d: "
1075                  "resuming execution at 0x%" PRIx64 "\n", cur_el, env->pc);
1076}
1077
1078/*
1079 * Square Root and Reciprocal square root
1080 */
1081
1082uint32_t HELPER(sqrt_f16)(uint32_t a, void *fpstp)
1083{
1084    float_status *s = fpstp;
1085
1086    return float16_sqrt(a, s);
1087}
1088
1089void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
1090{
1091    /*
1092     * Implement DC ZVA, which zeroes a fixed-length block of memory.
1093     * Note that we do not implement the (architecturally mandated)
1094     * alignment fault for attempts to use this on Device memory
1095     * (which matches the usual QEMU behaviour of not implementing either
1096     * alignment faults or any memory attribute handling).
1097     */
1098    int blocklen = 4 << env_archcpu(env)->dcz_blocksize;
1099    uint64_t vaddr = vaddr_in & ~(blocklen - 1);
1100    int mmu_idx = cpu_mmu_index(env, false);
1101    void *mem;
1102
1103    /*
1104     * Trapless lookup.  In addition to actual invalid page, may
1105     * return NULL for I/O, watchpoints, clean pages, etc.
1106     */
1107    mem = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
1108
1109#ifndef CONFIG_USER_ONLY
1110    if (unlikely(!mem)) {
1111        uintptr_t ra = GETPC();
1112
1113        /*
1114         * Trap if accessing an invalid page.  DC_ZVA requires that we supply
1115         * the original pointer for an invalid page.  But watchpoints require
1116         * that we probe the actual space.  So do both.
1117         */
1118        (void) probe_write(env, vaddr_in, 1, mmu_idx, ra);
1119        mem = probe_write(env, vaddr, blocklen, mmu_idx, ra);
1120
1121        if (unlikely(!mem)) {
1122            /*
1123             * The only remaining reason for mem == NULL is I/O.
1124             * Just do a series of byte writes as the architecture demands.
1125             */
1126            for (int i = 0; i < blocklen; i++) {
1127                cpu_stb_mmuidx_ra(env, vaddr + i, 0, mmu_idx, ra);
1128            }
1129            return;
1130        }
1131    }
1132#endif
1133
1134    memset(mem, 0, blocklen);
1135}
1136