qemu/target/s390x/tcg/vec_fpu_helper.c
<<
>>
Prefs
   1/*
   2 * QEMU TCG support -- s390x vector floating point instruction support
   3 *
   4 * Copyright (C) 2019 Red Hat Inc
   5 *
   6 * Authors:
   7 *   David Hildenbrand <david@redhat.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  10 * See the COPYING file in the top-level directory.
  11 */
  12#include "qemu/osdep.h"
  13#include "qemu-common.h"
  14#include "cpu.h"
  15#include "s390x-internal.h"
  16#include "vec.h"
  17#include "tcg_s390x.h"
  18#include "tcg/tcg-gvec-desc.h"
  19#include "exec/exec-all.h"
  20#include "exec/helper-proto.h"
  21#include "fpu/softfloat.h"
  22
  23#define VIC_INVALID         0x1
  24#define VIC_DIVBYZERO       0x2
  25#define VIC_OVERFLOW        0x3
  26#define VIC_UNDERFLOW       0x4
  27#define VIC_INEXACT         0x5
  28
  29/* returns the VEX. If the VEX is 0, there is no trap */
  30static uint8_t check_ieee_exc(CPUS390XState *env, uint8_t enr, bool XxC,
  31                              uint8_t *vec_exc)
  32{
  33    uint8_t vece_exc = 0, trap_exc;
  34    unsigned qemu_exc;
  35
  36    /* Retrieve and clear the softfloat exceptions */
  37    qemu_exc = env->fpu_status.float_exception_flags;
  38    if (qemu_exc == 0) {
  39        return 0;
  40    }
  41    env->fpu_status.float_exception_flags = 0;
  42
  43    vece_exc = s390_softfloat_exc_to_ieee(qemu_exc);
  44
  45    /* Add them to the vector-wide s390x exception bits */
  46    *vec_exc |= vece_exc;
  47
  48    /* Check for traps and construct the VXC */
  49    trap_exc = vece_exc & env->fpc >> 24;
  50    if (trap_exc) {
  51        if (trap_exc & S390_IEEE_MASK_INVALID) {
  52            return enr << 4 | VIC_INVALID;
  53        } else if (trap_exc & S390_IEEE_MASK_DIVBYZERO) {
  54            return enr << 4 | VIC_DIVBYZERO;
  55        } else if (trap_exc & S390_IEEE_MASK_OVERFLOW) {
  56            return enr << 4 | VIC_OVERFLOW;
  57        } else if (trap_exc & S390_IEEE_MASK_UNDERFLOW) {
  58            return enr << 4 | VIC_UNDERFLOW;
  59        } else if (!XxC) {
  60            g_assert(trap_exc & S390_IEEE_MASK_INEXACT);
  61            /* inexact has lowest priority on traps */
  62            return enr << 4 | VIC_INEXACT;
  63        }
  64    }
  65    return 0;
  66}
  67
  68static void handle_ieee_exc(CPUS390XState *env, uint8_t vxc, uint8_t vec_exc,
  69                            uintptr_t retaddr)
  70{
  71    if (vxc) {
  72        /* on traps, the fpc flags are not updated, instruction is suppressed */
  73        tcg_s390_vector_exception(env, vxc, retaddr);
  74    }
  75    if (vec_exc) {
  76        /* indicate exceptions for all elements combined */
  77        env->fpc |= vec_exc << 16;
  78    }
  79}
  80
  81static float32 s390_vec_read_float32(const S390Vector *v, uint8_t enr)
  82{
  83    return make_float32(s390_vec_read_element32(v, enr));
  84}
  85
  86static float64 s390_vec_read_float64(const S390Vector *v, uint8_t enr)
  87{
  88    return make_float64(s390_vec_read_element64(v, enr));
  89}
  90
  91static float128 s390_vec_read_float128(const S390Vector *v)
  92{
  93    return make_float128(s390_vec_read_element64(v, 0),
  94                         s390_vec_read_element64(v, 1));
  95}
  96
  97static void s390_vec_write_float32(S390Vector *v, uint8_t enr, float32 data)
  98{
  99    return s390_vec_write_element32(v, enr, data);
 100}
 101
 102static void s390_vec_write_float64(S390Vector *v, uint8_t enr, float64 data)
 103{
 104    return s390_vec_write_element64(v, enr, data);
 105}
 106
 107static void s390_vec_write_float128(S390Vector *v, float128 data)
 108{
 109    s390_vec_write_element64(v, 0, data.high);
 110    s390_vec_write_element64(v, 1, data.low);
 111}
 112
 113typedef float32 (*vop32_2_fn)(float32 a, float_status *s);
 114static void vop32_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
 115                    bool s, bool XxC, uint8_t erm, vop32_2_fn fn,
 116                    uintptr_t retaddr)
 117{
 118    uint8_t vxc, vec_exc = 0;
 119    S390Vector tmp = {};
 120    int i, old_mode;
 121
 122    old_mode = s390_swap_bfp_rounding_mode(env, erm);
 123    for (i = 0; i < 4; i++) {
 124        const float32 a = s390_vec_read_float32(v2, i);
 125
 126        s390_vec_write_float32(&tmp, i, fn(a, &env->fpu_status));
 127        vxc = check_ieee_exc(env, i, XxC, &vec_exc);
 128        if (s || vxc) {
 129            break;
 130        }
 131    }
 132    s390_restore_bfp_rounding_mode(env, old_mode);
 133    handle_ieee_exc(env, vxc, vec_exc, retaddr);
 134    *v1 = tmp;
 135}
 136
 137typedef float64 (*vop64_2_fn)(float64 a, float_status *s);
 138static void vop64_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
 139                    bool s, bool XxC, uint8_t erm, vop64_2_fn fn,
 140                    uintptr_t retaddr)
 141{
 142    uint8_t vxc, vec_exc = 0;
 143    S390Vector tmp = {};
 144    int i, old_mode;
 145
 146    old_mode = s390_swap_bfp_rounding_mode(env, erm);
 147    for (i = 0; i < 2; i++) {
 148        const float64 a = s390_vec_read_float64(v2, i);
 149
 150        s390_vec_write_float64(&tmp, i, fn(a, &env->fpu_status));
 151        vxc = check_ieee_exc(env, i, XxC, &vec_exc);
 152        if (s || vxc) {
 153            break;
 154        }
 155    }
 156    s390_restore_bfp_rounding_mode(env, old_mode);
 157    handle_ieee_exc(env, vxc, vec_exc, retaddr);
 158    *v1 = tmp;
 159}
 160
 161typedef float128 (*vop128_2_fn)(float128 a, float_status *s);
 162static void vop128_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
 163                    bool s, bool XxC, uint8_t erm, vop128_2_fn fn,
 164                    uintptr_t retaddr)
 165{
 166    const float128 a = s390_vec_read_float128(v2);
 167    uint8_t vxc, vec_exc = 0;
 168    S390Vector tmp = {};
 169    int old_mode;
 170
 171    old_mode = s390_swap_bfp_rounding_mode(env, erm);
 172    s390_vec_write_float128(&tmp, fn(a, &env->fpu_status));
 173    vxc = check_ieee_exc(env, 0, XxC, &vec_exc);
 174    s390_restore_bfp_rounding_mode(env, old_mode);
 175    handle_ieee_exc(env, vxc, vec_exc, retaddr);
 176    *v1 = tmp;
 177}
 178
 179static float64 vcdg64(float64 a, float_status *s)
 180{
 181    return int64_to_float64(a, s);
 182}
 183
 184static float64 vcdlg64(float64 a, float_status *s)
 185{
 186    return uint64_to_float64(a, s);
 187}
 188
 189static float64 vcgd64(float64 a, float_status *s)
 190{
 191    const float64 tmp = float64_to_int64(a, s);
 192
 193    return float64_is_any_nan(a) ? INT64_MIN : tmp;
 194}
 195
 196static float64 vclgd64(float64 a, float_status *s)
 197{
 198    const float64 tmp = float64_to_uint64(a, s);
 199
 200    return float64_is_any_nan(a) ? 0 : tmp;
 201}
 202
 203#define DEF_GVEC_VOP2_FN(NAME, FN, BITS)                                       \
 204void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, CPUS390XState *env,   \
 205                               uint32_t desc)                                  \
 206{                                                                              \
 207    const uint8_t erm = extract32(simd_data(desc), 4, 4);                      \
 208    const bool se = extract32(simd_data(desc), 3, 1);                          \
 209    const bool XxC = extract32(simd_data(desc), 2, 1);                         \
 210                                                                               \
 211    vop##BITS##_2(v1, v2, env, se, XxC, erm, FN, GETPC());                     \
 212}
 213
 214#define DEF_GVEC_VOP2_64(NAME)                                                 \
 215DEF_GVEC_VOP2_FN(NAME, NAME##64, 64)
 216
 217#define DEF_GVEC_VOP2(NAME, OP)                                                \
 218DEF_GVEC_VOP2_FN(NAME, float32_##OP, 32)                                       \
 219DEF_GVEC_VOP2_FN(NAME, float64_##OP, 64)                                       \
 220DEF_GVEC_VOP2_FN(NAME, float128_##OP, 128)
 221
 222DEF_GVEC_VOP2_64(vcdg)
 223DEF_GVEC_VOP2_64(vcdlg)
 224DEF_GVEC_VOP2_64(vcgd)
 225DEF_GVEC_VOP2_64(vclgd)
 226DEF_GVEC_VOP2(vfi, round_to_int)
 227DEF_GVEC_VOP2(vfsq, sqrt)
 228
 229typedef float32 (*vop32_3_fn)(float32 a, float32 b, float_status *s);
 230static void vop32_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
 231                    CPUS390XState *env, bool s, vop32_3_fn fn,
 232                    uintptr_t retaddr)
 233{
 234    uint8_t vxc, vec_exc = 0;
 235    S390Vector tmp = {};
 236    int i;
 237
 238    for (i = 0; i < 4; i++) {
 239        const float32 a = s390_vec_read_float32(v2, i);
 240        const float32 b = s390_vec_read_float32(v3, i);
 241
 242        s390_vec_write_float32(&tmp, i, fn(a, b, &env->fpu_status));
 243        vxc = check_ieee_exc(env, i, false, &vec_exc);
 244        if (s || vxc) {
 245            break;
 246        }
 247    }
 248    handle_ieee_exc(env, vxc, vec_exc, retaddr);
 249    *v1 = tmp;
 250}
 251
 252typedef float64 (*vop64_3_fn)(float64 a, float64 b, float_status *s);
 253static void vop64_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
 254                    CPUS390XState *env, bool s, vop64_3_fn fn,
 255                    uintptr_t retaddr)
 256{
 257    uint8_t vxc, vec_exc = 0;
 258    S390Vector tmp = {};
 259    int i;
 260
 261    for (i = 0; i < 2; i++) {
 262        const float64 a = s390_vec_read_float64(v2, i);
 263        const float64 b = s390_vec_read_float64(v3, i);
 264
 265        s390_vec_write_float64(&tmp, i, fn(a, b, &env->fpu_status));
 266        vxc = check_ieee_exc(env, i, false, &vec_exc);
 267        if (s || vxc) {
 268            break;
 269        }
 270    }
 271    handle_ieee_exc(env, vxc, vec_exc, retaddr);
 272    *v1 = tmp;
 273}
 274
 275typedef float128 (*vop128_3_fn)(float128 a, float128 b, float_status *s);
 276static void vop128_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
 277                     CPUS390XState *env, bool s, vop128_3_fn fn,
 278                     uintptr_t retaddr)
 279{
 280    const float128 a = s390_vec_read_float128(v2);
 281    const float128 b = s390_vec_read_float128(v3);
 282    uint8_t vxc, vec_exc = 0;
 283    S390Vector tmp = {};
 284
 285    s390_vec_write_float128(&tmp, fn(a, b, &env->fpu_status));
 286    vxc = check_ieee_exc(env, 0, false, &vec_exc);
 287    handle_ieee_exc(env, vxc, vec_exc, retaddr);
 288    *v1 = tmp;
 289}
 290
 291#define DEF_GVEC_VOP3_B(NAME, OP, BITS)                                        \
 292void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3,       \
 293                              CPUS390XState *env, uint32_t desc)               \
 294{                                                                              \
 295    const bool se = extract32(simd_data(desc), 3, 1);                          \
 296                                                                               \
 297    vop##BITS##_3(v1, v2, v3, env, se, float##BITS##_##OP, GETPC());           \
 298}
 299
 300#define DEF_GVEC_VOP3(NAME, OP)                                                \
 301DEF_GVEC_VOP3_B(NAME, OP, 32)                                                  \
 302DEF_GVEC_VOP3_B(NAME, OP, 64)                                                  \
 303DEF_GVEC_VOP3_B(NAME, OP, 128)
 304
 305DEF_GVEC_VOP3(vfa, add)
 306DEF_GVEC_VOP3(vfs, sub)
 307DEF_GVEC_VOP3(vfd, div)
 308DEF_GVEC_VOP3(vfm, mul)
 309
 310static int wfc32(const S390Vector *v1, const S390Vector *v2,
 311                 CPUS390XState *env, bool signal, uintptr_t retaddr)
 312{
 313    /* only the zero-indexed elements are compared */
 314    const float32 a = s390_vec_read_float32(v1, 0);
 315    const float32 b = s390_vec_read_float32(v2, 0);
 316    uint8_t vxc, vec_exc = 0;
 317    int cmp;
 318
 319    if (signal) {
 320        cmp = float32_compare(a, b, &env->fpu_status);
 321    } else {
 322        cmp = float32_compare_quiet(a, b, &env->fpu_status);
 323    }
 324    vxc = check_ieee_exc(env, 0, false, &vec_exc);
 325    handle_ieee_exc(env, vxc, vec_exc, retaddr);
 326
 327    return float_comp_to_cc(env, cmp);
 328}
 329
 330static int wfc64(const S390Vector *v1, const S390Vector *v2,
 331                 CPUS390XState *env, bool signal, uintptr_t retaddr)
 332{
 333    /* only the zero-indexed elements are compared */
 334    const float64 a = s390_vec_read_float64(v1, 0);
 335    const float64 b = s390_vec_read_float64(v2, 0);
 336    uint8_t vxc, vec_exc = 0;
 337    int cmp;
 338
 339    if (signal) {
 340        cmp = float64_compare(a, b, &env->fpu_status);
 341    } else {
 342        cmp = float64_compare_quiet(a, b, &env->fpu_status);
 343    }
 344    vxc = check_ieee_exc(env, 0, false, &vec_exc);
 345    handle_ieee_exc(env, vxc, vec_exc, retaddr);
 346
 347    return float_comp_to_cc(env, cmp);
 348}
 349
 350static int wfc128(const S390Vector *v1, const S390Vector *v2,
 351                  CPUS390XState *env, bool signal, uintptr_t retaddr)
 352{
 353    /* only the zero-indexed elements are compared */
 354    const float128 a = s390_vec_read_float128(v1);
 355    const float128 b = s390_vec_read_float128(v2);
 356    uint8_t vxc, vec_exc = 0;
 357    int cmp;
 358
 359    if (signal) {
 360        cmp = float128_compare(a, b, &env->fpu_status);
 361    } else {
 362        cmp = float128_compare_quiet(a, b, &env->fpu_status);
 363    }
 364    vxc = check_ieee_exc(env, 0, false, &vec_exc);
 365    handle_ieee_exc(env, vxc, vec_exc, retaddr);
 366
 367    return float_comp_to_cc(env, cmp);
 368}
 369
 370#define DEF_GVEC_WFC_B(NAME, SIGNAL, BITS)                                     \
 371void HELPER(gvec_##NAME##BITS)(const void *v1, const void *v2,                 \
 372                               CPUS390XState *env, uint32_t desc)              \
 373{                                                                              \
 374    env->cc_op = wfc##BITS(v1, v2, env, SIGNAL, GETPC());                      \
 375}
 376
 377#define DEF_GVEC_WFC(NAME, SIGNAL)                                             \
 378     DEF_GVEC_WFC_B(NAME, SIGNAL, 32)                                          \
 379     DEF_GVEC_WFC_B(NAME, SIGNAL, 64)                                          \
 380     DEF_GVEC_WFC_B(NAME, SIGNAL, 128)
 381
 382DEF_GVEC_WFC(wfc, false)
 383DEF_GVEC_WFC(wfk, true)
 384
 385typedef bool (*vfc32_fn)(float32 a, float32 b, float_status *status);
 386static int vfc32(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
 387                 CPUS390XState *env, bool s, vfc32_fn fn, uintptr_t retaddr)
 388{
 389    uint8_t vxc, vec_exc = 0;
 390    S390Vector tmp = {};
 391    int match = 0;
 392    int i;
 393
 394    for (i = 0; i < 4; i++) {
 395        const float32 a = s390_vec_read_float32(v2, i);
 396        const float32 b = s390_vec_read_float32(v3, i);
 397
 398        /* swap the order of the parameters, so we can use existing functions */
 399        if (fn(b, a, &env->fpu_status)) {
 400            match++;
 401            s390_vec_write_element32(&tmp, i, -1u);
 402        }
 403        vxc = check_ieee_exc(env, i, false, &vec_exc);
 404        if (s || vxc) {
 405            break;
 406        }
 407    }
 408
 409    handle_ieee_exc(env, vxc, vec_exc, retaddr);
 410    *v1 = tmp;
 411    if (match) {
 412        return s || match == 4 ? 0 : 1;
 413    }
 414    return 3;
 415}
 416
 417typedef bool (*vfc64_fn)(float64 a, float64 b, float_status *status);
 418static int vfc64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
 419                 CPUS390XState *env, bool s, vfc64_fn fn, uintptr_t retaddr)
 420{
 421    uint8_t vxc, vec_exc = 0;
 422    S390Vector tmp = {};
 423    int match = 0;
 424    int i;
 425
 426    for (i = 0; i < 2; i++) {
 427        const float64 a = s390_vec_read_float64(v2, i);
 428        const float64 b = s390_vec_read_float64(v3, i);
 429
 430        /* swap the order of the parameters, so we can use existing functions */
 431        if (fn(b, a, &env->fpu_status)) {
 432            match++;
 433            s390_vec_write_element64(&tmp, i, -1ull);
 434        }
 435        vxc = check_ieee_exc(env, i, false, &vec_exc);
 436        if (s || vxc) {
 437            break;
 438        }
 439    }
 440
 441    handle_ieee_exc(env, vxc, vec_exc, retaddr);
 442    *v1 = tmp;
 443    if (match) {
 444        return s || match == 2 ? 0 : 1;
 445    }
 446    return 3;
 447}
 448
 449typedef bool (*vfc128_fn)(float128 a, float128 b, float_status *status);
 450static int vfc128(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
 451                 CPUS390XState *env, bool s, vfc128_fn fn, uintptr_t retaddr)
 452{
 453    const float128 a = s390_vec_read_float128(v2);
 454    const float128 b = s390_vec_read_float128(v3);
 455    uint8_t vxc, vec_exc = 0;
 456    S390Vector tmp = {};
 457    bool match = false;
 458
 459    /* swap the order of the parameters, so we can use existing functions */
 460    if (fn(b, a, &env->fpu_status)) {
 461        match = true;
 462        s390_vec_write_element64(&tmp, 0, -1ull);
 463        s390_vec_write_element64(&tmp, 1, -1ull);
 464    }
 465    vxc = check_ieee_exc(env, 0, false, &vec_exc);
 466    handle_ieee_exc(env, vxc, vec_exc, retaddr);
 467    *v1 = tmp;
 468    return match ? 0 : 3;
 469}
 470
 471#define DEF_GVEC_VFC_B(NAME, OP, BITS)                                         \
 472void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3,       \
 473                               CPUS390XState *env, uint32_t desc)              \
 474{                                                                              \
 475    const bool se = extract32(simd_data(desc), 3, 1);                          \
 476    const bool sq = extract32(simd_data(desc), 2, 1);                          \
 477    vfc##BITS##_fn fn = sq ? float##BITS##_##OP : float##BITS##_##OP##_quiet;  \
 478                                                                               \
 479    vfc##BITS(v1, v2, v3, env, se, fn, GETPC());                               \
 480}                                                                              \
 481                                                                               \
 482void HELPER(gvec_##NAME##BITS##_cc)(void *v1, const void *v2, const void *v3,  \
 483                                    CPUS390XState *env, uint32_t desc)         \
 484{                                                                              \
 485    const bool se = extract32(simd_data(desc), 3, 1);                          \
 486    const bool sq = extract32(simd_data(desc), 2, 1);                          \
 487    vfc##BITS##_fn fn = sq ? float##BITS##_##OP : float##BITS##_##OP##_quiet;  \
 488                                                                               \
 489    env->cc_op = vfc##BITS(v1, v2, v3, env, se, fn, GETPC());                  \
 490}
 491
 492#define DEF_GVEC_VFC(NAME, OP)                                                 \
 493DEF_GVEC_VFC_B(NAME, OP, 32)                                                   \
 494DEF_GVEC_VFC_B(NAME, OP, 64)                                                   \
 495DEF_GVEC_VFC_B(NAME, OP, 128)                                                  \
 496
 497DEF_GVEC_VFC(vfce, eq)
 498DEF_GVEC_VFC(vfch, lt)
 499DEF_GVEC_VFC(vfche, le)
 500
 501void HELPER(gvec_vfll32)(void *v1, const void *v2, CPUS390XState *env,
 502                         uint32_t desc)
 503{
 504    const bool s = extract32(simd_data(desc), 3, 1);
 505    uint8_t vxc, vec_exc = 0;
 506    S390Vector tmp = {};
 507    int i;
 508
 509    for (i = 0; i < 2; i++) {
 510        /* load from even element */
 511        const float32 a = s390_vec_read_element32(v2, i * 2);
 512        const uint64_t ret = float32_to_float64(a, &env->fpu_status);
 513
 514        s390_vec_write_element64(&tmp, i, ret);
 515        /* indicate the source element */
 516        vxc = check_ieee_exc(env, i * 2, false, &vec_exc);
 517        if (s || vxc) {
 518            break;
 519        }
 520    }
 521    handle_ieee_exc(env, vxc, vec_exc, GETPC());
 522    *(S390Vector *)v1 = tmp;
 523}
 524
 525void HELPER(gvec_vfll64)(void *v1, const void *v2, CPUS390XState *env,
 526                         uint32_t desc)
 527{
 528    /* load from even element */
 529    const float128 ret = float64_to_float128(s390_vec_read_float64(v2, 0),
 530                                             &env->fpu_status);
 531    uint8_t vxc, vec_exc = 0;
 532
 533    vxc = check_ieee_exc(env, 0, false, &vec_exc);
 534    handle_ieee_exc(env, vxc, vec_exc, GETPC());
 535    s390_vec_write_float128(v1, ret);
 536}
 537
 538void HELPER(gvec_vflr64)(void *v1, const void *v2, CPUS390XState *env,
 539                         uint32_t desc)
 540{
 541    const uint8_t erm = extract32(simd_data(desc), 4, 4);
 542    const bool s = extract32(simd_data(desc), 3, 1);
 543    const bool XxC = extract32(simd_data(desc), 2, 1);
 544    uint8_t vxc, vec_exc = 0;
 545    S390Vector tmp = {};
 546    int i, old_mode;
 547
 548    old_mode = s390_swap_bfp_rounding_mode(env, erm);
 549    for (i = 0; i < 2; i++) {
 550        float64 a = s390_vec_read_element64(v2, i);
 551        uint32_t ret = float64_to_float32(a, &env->fpu_status);
 552
 553        /* place at even element */
 554        s390_vec_write_element32(&tmp, i * 2, ret);
 555        /* indicate the source element */
 556        vxc = check_ieee_exc(env, i, XxC, &vec_exc);
 557        if (s || vxc) {
 558            break;
 559        }
 560    }
 561    s390_restore_bfp_rounding_mode(env, old_mode);
 562    handle_ieee_exc(env, vxc, vec_exc, GETPC());
 563    *(S390Vector *)v1 = tmp;
 564}
 565
 566void HELPER(gvec_vflr128)(void *v1, const void *v2, CPUS390XState *env,
 567                          uint32_t desc)
 568{
 569    const uint8_t erm = extract32(simd_data(desc), 4, 4);
 570    const bool XxC = extract32(simd_data(desc), 2, 1);
 571    uint8_t vxc, vec_exc = 0;
 572    int old_mode;
 573    float64 ret;
 574
 575    old_mode = s390_swap_bfp_rounding_mode(env, erm);
 576    ret = float128_to_float64(s390_vec_read_float128(v2), &env->fpu_status);
 577    vxc = check_ieee_exc(env, 0, XxC, &vec_exc);
 578    s390_restore_bfp_rounding_mode(env, old_mode);
 579    handle_ieee_exc(env, vxc, vec_exc, GETPC());
 580
 581    /* place at even element, odd element is unpredictable */
 582    s390_vec_write_float64(v1, 0, ret);
 583}
 584
 585static void vfma32(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
 586                   const S390Vector *v4, CPUS390XState *env, bool s, int flags,
 587                   uintptr_t retaddr)
 588{
 589    uint8_t vxc, vec_exc = 0;
 590    S390Vector tmp = {};
 591    int i;
 592
 593    for (i = 0; i < 4; i++) {
 594        const float32 a = s390_vec_read_float32(v2, i);
 595        const float32 b = s390_vec_read_float32(v3, i);
 596        const float32 c = s390_vec_read_float32(v4, i);
 597        float32 ret = float32_muladd(a, b, c, flags, &env->fpu_status);
 598
 599        s390_vec_write_float32(&tmp, i, ret);
 600        vxc = check_ieee_exc(env, i, false, &vec_exc);
 601        if (s || vxc) {
 602            break;
 603        }
 604    }
 605    handle_ieee_exc(env, vxc, vec_exc, retaddr);
 606    *v1 = tmp;
 607}
 608
 609static void vfma64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
 610                   const S390Vector *v4, CPUS390XState *env, bool s, int flags,
 611                   uintptr_t retaddr)
 612{
 613    uint8_t vxc, vec_exc = 0;
 614    S390Vector tmp = {};
 615    int i;
 616
 617    for (i = 0; i < 2; i++) {
 618        const float64 a = s390_vec_read_float64(v2, i);
 619        const float64 b = s390_vec_read_float64(v3, i);
 620        const float64 c = s390_vec_read_float64(v4, i);
 621        const float64 ret = float64_muladd(a, b, c, flags, &env->fpu_status);
 622
 623        s390_vec_write_float64(&tmp, i, ret);
 624        vxc = check_ieee_exc(env, i, false, &vec_exc);
 625        if (s || vxc) {
 626            break;
 627        }
 628    }
 629    handle_ieee_exc(env, vxc, vec_exc, retaddr);
 630    *v1 = tmp;
 631}
 632
 633static void vfma128(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
 634                    const S390Vector *v4, CPUS390XState *env, bool s, int flags,
 635                    uintptr_t retaddr)
 636{
 637    const float128 a = s390_vec_read_float128(v2);
 638    const float128 b = s390_vec_read_float128(v3);
 639    const float128 c = s390_vec_read_float128(v4);
 640    uint8_t vxc, vec_exc = 0;
 641    float128 ret;
 642
 643    ret = float128_muladd(a, b, c, flags, &env->fpu_status);
 644    vxc = check_ieee_exc(env, 0, false, &vec_exc);
 645    handle_ieee_exc(env, vxc, vec_exc, retaddr);
 646    s390_vec_write_float128(v1, ret);
 647}
 648
 649#define DEF_GVEC_VFMA_B(NAME, FLAGS, BITS)                                     \
 650void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3,       \
 651                               const void *v4, CPUS390XState *env,             \
 652                               uint32_t desc)                                  \
 653{                                                                              \
 654    const bool se = extract32(simd_data(desc), 3, 1);                          \
 655                                                                               \
 656    vfma##BITS(v1, v2, v3, v4, env, se, FLAGS, GETPC());                       \
 657}
 658
 659#define DEF_GVEC_VFMA(NAME, FLAGS)                                             \
 660    DEF_GVEC_VFMA_B(NAME, FLAGS, 32)                                           \
 661    DEF_GVEC_VFMA_B(NAME, FLAGS, 64)                                           \
 662    DEF_GVEC_VFMA_B(NAME, FLAGS, 128)
 663
 664DEF_GVEC_VFMA(vfma, 0)
 665DEF_GVEC_VFMA(vfms, float_muladd_negate_c)
 666DEF_GVEC_VFMA(vfnma, float_muladd_negate_result)
 667DEF_GVEC_VFMA(vfnms, float_muladd_negate_c | float_muladd_negate_result)
 668
 669void HELPER(gvec_vftci32)(void *v1, const void *v2, CPUS390XState *env,
 670                          uint32_t desc)
 671{
 672    uint16_t i3 = extract32(simd_data(desc), 4, 12);
 673    bool s = extract32(simd_data(desc), 3, 1);
 674    int i, match = 0;
 675
 676    for (i = 0; i < 4; i++) {
 677        float32 a = s390_vec_read_float32(v2, i);
 678
 679        if (float32_dcmask(env, a) & i3) {
 680            match++;
 681            s390_vec_write_element32(v1, i, -1u);
 682        } else {
 683            s390_vec_write_element32(v1, i, 0);
 684        }
 685        if (s) {
 686            break;
 687        }
 688    }
 689
 690    if (match == 4 || (s && match)) {
 691        env->cc_op = 0;
 692    } else if (match) {
 693        env->cc_op = 1;
 694    } else {
 695        env->cc_op = 3;
 696    }
 697}
 698
 699void HELPER(gvec_vftci64)(void *v1, const void *v2, CPUS390XState *env,
 700                          uint32_t desc)
 701{
 702    const uint16_t i3 = extract32(simd_data(desc), 4, 12);
 703    const bool s = extract32(simd_data(desc), 3, 1);
 704    int i, match = 0;
 705
 706    for (i = 0; i < 2; i++) {
 707        const float64 a = s390_vec_read_float64(v2, i);
 708
 709        if (float64_dcmask(env, a) & i3) {
 710            match++;
 711            s390_vec_write_element64(v1, i, -1ull);
 712        } else {
 713            s390_vec_write_element64(v1, i, 0);
 714        }
 715        if (s) {
 716            break;
 717        }
 718    }
 719
 720    if (match == 2 || (s && match)) {
 721        env->cc_op = 0;
 722    } else if (match) {
 723        env->cc_op = 1;
 724    } else {
 725        env->cc_op = 3;
 726    }
 727}
 728
 729void HELPER(gvec_vftci128)(void *v1, const void *v2, CPUS390XState *env,
 730                           uint32_t desc)
 731{
 732    const float128 a = s390_vec_read_float128(v2);
 733    uint16_t i3 = extract32(simd_data(desc), 4, 12);
 734
 735    if (float128_dcmask(env, a) & i3) {
 736        env->cc_op = 0;
 737        s390_vec_write_element64(v1, 0, -1ull);
 738        s390_vec_write_element64(v1, 1, -1ull);
 739    } else {
 740        env->cc_op = 3;
 741        s390_vec_write_element64(v1, 0, 0);
 742        s390_vec_write_element64(v1, 1, 0);
 743    }
 744}
 745
 746typedef enum S390MinMaxType {
 747    S390_MINMAX_TYPE_IEEE = 0,
 748    S390_MINMAX_TYPE_JAVA,
 749    S390_MINMAX_TYPE_C_MACRO,
 750    S390_MINMAX_TYPE_CPP,
 751    S390_MINMAX_TYPE_F,
 752} S390MinMaxType;
 753
 754typedef enum S390MinMaxRes {
 755    S390_MINMAX_RES_MINMAX = 0,
 756    S390_MINMAX_RES_A,
 757    S390_MINMAX_RES_B,
 758    S390_MINMAX_RES_SILENCE_A,
 759    S390_MINMAX_RES_SILENCE_B,
 760} S390MinMaxRes;
 761
 762static S390MinMaxRes vfmin_res(uint16_t dcmask_a, uint16_t dcmask_b,
 763                               S390MinMaxType type, float_status *s)
 764{
 765    const bool neg_a = dcmask_a & DCMASK_NEGATIVE;
 766    const bool nan_a = dcmask_a & DCMASK_NAN;
 767    const bool nan_b = dcmask_b & DCMASK_NAN;
 768
 769    g_assert(type > S390_MINMAX_TYPE_IEEE && type <= S390_MINMAX_TYPE_F);
 770
 771    if (unlikely((dcmask_a | dcmask_b) & DCMASK_NAN)) {
 772        const bool sig_a = dcmask_a & DCMASK_SIGNALING_NAN;
 773        const bool sig_b = dcmask_b & DCMASK_SIGNALING_NAN;
 774
 775        if ((dcmask_a | dcmask_b) & DCMASK_SIGNALING_NAN) {
 776            s->float_exception_flags |= float_flag_invalid;
 777        }
 778        switch (type) {
 779        case S390_MINMAX_TYPE_JAVA:
 780            if (sig_a) {
 781                return S390_MINMAX_RES_SILENCE_A;
 782            } else if (sig_b) {
 783                return S390_MINMAX_RES_SILENCE_B;
 784            }
 785            return nan_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
 786        case S390_MINMAX_TYPE_F:
 787            return nan_b ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
 788        case S390_MINMAX_TYPE_C_MACRO:
 789            s->float_exception_flags |= float_flag_invalid;
 790            return S390_MINMAX_RES_B;
 791        case S390_MINMAX_TYPE_CPP:
 792            s->float_exception_flags |= float_flag_invalid;
 793            return S390_MINMAX_RES_A;
 794        default:
 795            g_assert_not_reached();
 796        }
 797    } else if (unlikely(dcmask_a & dcmask_b & DCMASK_ZERO)) {
 798        switch (type) {
 799        case S390_MINMAX_TYPE_JAVA:
 800            return neg_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
 801        case S390_MINMAX_TYPE_C_MACRO:
 802            return S390_MINMAX_RES_B;
 803        case S390_MINMAX_TYPE_F:
 804            return !neg_a ? S390_MINMAX_RES_B : S390_MINMAX_RES_A;
 805        case S390_MINMAX_TYPE_CPP:
 806            return S390_MINMAX_RES_A;
 807        default:
 808            g_assert_not_reached();
 809        }
 810    }
 811    return S390_MINMAX_RES_MINMAX;
 812}
 813
 814static S390MinMaxRes vfmax_res(uint16_t dcmask_a, uint16_t dcmask_b,
 815                               S390MinMaxType type, float_status *s)
 816{
 817    g_assert(type > S390_MINMAX_TYPE_IEEE && type <= S390_MINMAX_TYPE_F);
 818
 819    if (unlikely((dcmask_a | dcmask_b) & DCMASK_NAN)) {
 820        const bool sig_a = dcmask_a & DCMASK_SIGNALING_NAN;
 821        const bool sig_b = dcmask_b & DCMASK_SIGNALING_NAN;
 822        const bool nan_a = dcmask_a & DCMASK_NAN;
 823        const bool nan_b = dcmask_b & DCMASK_NAN;
 824
 825        if ((dcmask_a | dcmask_b) & DCMASK_SIGNALING_NAN) {
 826            s->float_exception_flags |= float_flag_invalid;
 827        }
 828        switch (type) {
 829        case S390_MINMAX_TYPE_JAVA:
 830            if (sig_a) {
 831                return S390_MINMAX_RES_SILENCE_A;
 832            } else if (sig_b) {
 833                return S390_MINMAX_RES_SILENCE_B;
 834            }
 835            return nan_a ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
 836        case S390_MINMAX_TYPE_F:
 837            return nan_b ? S390_MINMAX_RES_A : S390_MINMAX_RES_B;
 838        case S390_MINMAX_TYPE_C_MACRO:
 839            s->float_exception_flags |= float_flag_invalid;
 840            return S390_MINMAX_RES_B;
 841        case S390_MINMAX_TYPE_CPP:
 842            s->float_exception_flags |= float_flag_invalid;
 843            return S390_MINMAX_RES_A;
 844        default:
 845            g_assert_not_reached();
 846        }
 847    } else if (unlikely(dcmask_a & dcmask_b & DCMASK_ZERO)) {
 848        const bool neg_a = dcmask_a & DCMASK_NEGATIVE;
 849
 850        switch (type) {
 851        case S390_MINMAX_TYPE_JAVA:
 852        case S390_MINMAX_TYPE_F:
 853            return neg_a ? S390_MINMAX_RES_B : S390_MINMAX_RES_A;
 854        case S390_MINMAX_TYPE_C_MACRO:
 855            return S390_MINMAX_RES_B;
 856        case S390_MINMAX_TYPE_CPP:
 857            return S390_MINMAX_RES_A;
 858        default:
 859            g_assert_not_reached();
 860        }
 861    }
 862    return S390_MINMAX_RES_MINMAX;
 863}
 864
 865static S390MinMaxRes vfminmax_res(uint16_t dcmask_a, uint16_t dcmask_b,
 866                                  S390MinMaxType type, bool is_min,
 867                                  float_status *s)
 868{
 869    return is_min ? vfmin_res(dcmask_a, dcmask_b, type, s) :
 870                    vfmax_res(dcmask_a, dcmask_b, type, s);
 871}
 872
 873static void vfminmax32(S390Vector *v1, const S390Vector *v2,
 874                       const S390Vector *v3, CPUS390XState *env,
 875                       S390MinMaxType type, bool is_min, bool is_abs, bool se,
 876                       uintptr_t retaddr)
 877{
 878    float_status *s = &env->fpu_status;
 879    uint8_t vxc, vec_exc = 0;
 880    S390Vector tmp = {};
 881    int i;
 882
 883    for (i = 0; i < 4; i++) {
 884        float32 a = s390_vec_read_float32(v2, i);
 885        float32 b = s390_vec_read_float32(v3, i);
 886        float32 result;
 887
 888        if (type != S390_MINMAX_TYPE_IEEE) {
 889            S390MinMaxRes res;
 890
 891            if (is_abs) {
 892                a = float32_abs(a);
 893                b = float32_abs(b);
 894            }
 895
 896            res = vfminmax_res(float32_dcmask(env, a), float32_dcmask(env, b),
 897                               type, is_min, s);
 898            switch (res) {
 899            case S390_MINMAX_RES_MINMAX:
 900                result = is_min ? float32_min(a, b, s) : float32_max(a, b, s);
 901                break;
 902            case S390_MINMAX_RES_A:
 903                result = a;
 904                break;
 905            case S390_MINMAX_RES_B:
 906                result = b;
 907                break;
 908            case S390_MINMAX_RES_SILENCE_A:
 909                result = float32_silence_nan(a, s);
 910                break;
 911            case S390_MINMAX_RES_SILENCE_B:
 912                result = float32_silence_nan(b, s);
 913                break;
 914            default:
 915                g_assert_not_reached();
 916            }
 917        } else if (!is_abs) {
 918            result = is_min ? float32_minnum(a, b, &env->fpu_status) :
 919                              float32_maxnum(a, b, &env->fpu_status);
 920        } else {
 921            result = is_min ? float32_minnummag(a, b, &env->fpu_status) :
 922                              float32_maxnummag(a, b, &env->fpu_status);
 923        }
 924
 925        s390_vec_write_float32(&tmp, i, result);
 926        vxc = check_ieee_exc(env, i, false, &vec_exc);
 927        if (se || vxc) {
 928            break;
 929        }
 930    }
 931    handle_ieee_exc(env, vxc, vec_exc, retaddr);
 932    *v1 = tmp;
 933}
 934
 935static void vfminmax64(S390Vector *v1, const S390Vector *v2,
 936                       const S390Vector *v3, CPUS390XState *env,
 937                       S390MinMaxType type, bool is_min, bool is_abs, bool se,
 938                       uintptr_t retaddr)
 939{
 940    float_status *s = &env->fpu_status;
 941    uint8_t vxc, vec_exc = 0;
 942    S390Vector tmp = {};
 943    int i;
 944
 945    for (i = 0; i < 2; i++) {
 946        float64 a = s390_vec_read_float64(v2, i);
 947        float64 b = s390_vec_read_float64(v3, i);
 948        float64 result;
 949
 950        if (type != S390_MINMAX_TYPE_IEEE) {
 951            S390MinMaxRes res;
 952
 953            if (is_abs) {
 954                a = float64_abs(a);
 955                b = float64_abs(b);
 956            }
 957
 958            res = vfminmax_res(float64_dcmask(env, a), float64_dcmask(env, b),
 959                               type, is_min, s);
 960            switch (res) {
 961            case S390_MINMAX_RES_MINMAX:
 962                result = is_min ? float64_min(a, b, s) : float64_max(a, b, s);
 963                break;
 964            case S390_MINMAX_RES_A:
 965                result = a;
 966                break;
 967            case S390_MINMAX_RES_B:
 968                result = b;
 969                break;
 970            case S390_MINMAX_RES_SILENCE_A:
 971                result = float64_silence_nan(a, s);
 972                break;
 973            case S390_MINMAX_RES_SILENCE_B:
 974                result = float64_silence_nan(b, s);
 975                break;
 976            default:
 977                g_assert_not_reached();
 978            }
 979        } else if (!is_abs) {
 980            result = is_min ? float64_minnum(a, b, &env->fpu_status) :
 981                              float64_maxnum(a, b, &env->fpu_status);
 982        } else {
 983            result = is_min ? float64_minnummag(a, b, &env->fpu_status) :
 984                              float64_maxnummag(a, b, &env->fpu_status);
 985        }
 986
 987        s390_vec_write_float64(&tmp, i, result);
 988        vxc = check_ieee_exc(env, i, false, &vec_exc);
 989        if (se || vxc) {
 990            break;
 991        }
 992    }
 993    handle_ieee_exc(env, vxc, vec_exc, retaddr);
 994    *v1 = tmp;
 995}
 996
 997static void vfminmax128(S390Vector *v1, const S390Vector *v2,
 998                        const S390Vector *v3, CPUS390XState *env,
 999                        S390MinMaxType type, bool is_min, bool is_abs, bool se,
1000                        uintptr_t retaddr)
1001{
1002    float128 a = s390_vec_read_float128(v2);
1003    float128 b = s390_vec_read_float128(v3);
1004    float_status *s = &env->fpu_status;
1005    uint8_t vxc, vec_exc = 0;
1006    float128 result;
1007
1008    if (type != S390_MINMAX_TYPE_IEEE) {
1009        S390MinMaxRes res;
1010
1011        if (is_abs) {
1012            a = float128_abs(a);
1013            b = float128_abs(b);
1014        }
1015
1016        res = vfminmax_res(float128_dcmask(env, a), float128_dcmask(env, b),
1017                           type, is_min, s);
1018        switch (res) {
1019        case S390_MINMAX_RES_MINMAX:
1020            result = is_min ? float128_min(a, b, s) : float128_max(a, b, s);
1021            break;
1022        case S390_MINMAX_RES_A:
1023            result = a;
1024            break;
1025        case S390_MINMAX_RES_B:
1026            result = b;
1027            break;
1028        case S390_MINMAX_RES_SILENCE_A:
1029            result = float128_silence_nan(a, s);
1030            break;
1031        case S390_MINMAX_RES_SILENCE_B:
1032            result = float128_silence_nan(b, s);
1033            break;
1034        default:
1035            g_assert_not_reached();
1036        }
1037    } else if (!is_abs) {
1038        result = is_min ? float128_minnum(a, b, &env->fpu_status) :
1039                          float128_maxnum(a, b, &env->fpu_status);
1040    } else {
1041        result = is_min ? float128_minnummag(a, b, &env->fpu_status) :
1042                          float128_maxnummag(a, b, &env->fpu_status);
1043    }
1044
1045    vxc = check_ieee_exc(env, 0, false, &vec_exc);
1046    handle_ieee_exc(env, vxc, vec_exc, retaddr);
1047    s390_vec_write_float128(v1, result);
1048}
1049
1050#define DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, BITS)                                \
1051void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3,       \
1052                               CPUS390XState *env, uint32_t desc)              \
1053{                                                                              \
1054    const bool se = extract32(simd_data(desc), 3, 1);                          \
1055    uint8_t type = extract32(simd_data(desc), 4, 4);                           \
1056    bool is_abs = false;                                                       \
1057                                                                               \
1058    if (type >= 8) {                                                           \
1059        is_abs = true;                                                         \
1060        type -= 8;                                                             \
1061    }                                                                          \
1062                                                                               \
1063    vfminmax##BITS(v1, v2, v3, env, type, IS_MIN, is_abs, se, GETPC());        \
1064}
1065
1066#define DEF_GVEC_VFMINMAX(NAME, IS_MIN)                                        \
1067    DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 32)                                      \
1068    DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 64)                                      \
1069    DEF_GVEC_VFMINMAX_B(NAME, IS_MIN, 128)
1070
1071DEF_GVEC_VFMINMAX(vfmax, false)
1072DEF_GVEC_VFMINMAX(vfmin, true)
1073