qemu/tests/tcg/hexagon/usr.c
<<
>>
Prefs
   1/*
   2 *  Copyright(c) 2022 Qualcomm Innovation Center, Inc. All Rights Reserved.
   3 *
   4 *  This program is free software; you can redistribute it and/or modify
   5 *  it under the terms of the GNU General Public License as published by
   6 *  the Free Software Foundation; either version 2 of the License, or
   7 *  (at your option) any later version.
   8 *
   9 *  This program is distributed in the hope that it will be useful,
  10 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  11 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12 *  GNU General Public License for more details.
  13 *
  14 *  You should have received a copy of the GNU General Public License
  15 *  along with this program; if not, see <http://www.gnu.org/licenses/>.
  16 */
  17
  18/*
  19 * Test instructions that might set bits in user status register (USR)
  20 */
  21
  22#include <stdio.h>
  23#include <stdint.h>
  24
  25int err;
  26
  27static void __check(int line, uint32_t val, uint32_t expect)
  28{
  29    if (val != expect) {
  30        printf("ERROR at line %d: %d != %d\n", line, val, expect);
  31        err++;
  32    }
  33}
  34
  35#define check(RES, EXP) __check(__LINE__, RES, EXP)
  36
  37static void __check32(int line, uint32_t val, uint32_t expect)
  38{
  39    if (val != expect) {
  40        printf("ERROR at line %d: 0x%08x != 0x%08x\n", line, val, expect);
  41        err++;
  42    }
  43}
  44
  45#define check32(RES, EXP) __check32(__LINE__, RES, EXP)
  46
  47static void __check64(int line, uint64_t val, uint64_t expect)
  48{
  49    if (val != expect) {
  50        printf("ERROR at line %d: 0x%016llx != 0x%016llx\n", line, val, expect);
  51        err++;
  52    }
  53}
  54
  55#define check64(RES, EXP) __check64(__LINE__, RES, EXP)
  56
  57/*
  58 * Some of the instructions tested are only available on certain versions
  59 * of the Hexagon core
  60 */
  61#define CORE_HAS_AUDIO    (__HEXAGON_ARCH__ >= 67 && defined(__HEXAGON_AUDIO__))
  62#define CORE_IS_V67       (__HEXAGON_ARCH__ >= 67)
  63
  64/* Define the bits in Hexagon USR register */
  65#define USR_OVF_BIT          0        /* Sticky saturation overflow */
  66#define USR_FPINVF_BIT       1        /* IEEE FP invalid sticky flag */
  67#define USR_FPDBZF_BIT       2        /* IEEE FP divide-by-zero sticky flag */
  68#define USR_FPOVFF_BIT       3        /* IEEE FP overflow sticky flag */
  69#define USR_FPUNFF_BIT       4        /* IEEE FP underflow sticky flag */
  70#define USR_FPINPF_BIT       5        /* IEEE FP inexact sticky flag */
  71
  72/* Corresponding values in USR */
  73#define USR_CLEAR            0
  74#define USR_OVF              (1 << USR_OVF_BIT)
  75#define USR_FPINVF           (1 << USR_FPINVF_BIT)
  76#define USR_FPDBZF           (1 << USR_FPDBZF_BIT)
  77#define USR_FPOVFF           (1 << USR_FPOVFF_BIT)
  78#define USR_FPUNFF           (1 << USR_FPUNFF_BIT)
  79#define USR_FPINPF           (1 << USR_FPINPF_BIT)
  80
  81/* Some useful floating point values */
  82const uint32_t SF_INF =              0x7f800000;
  83const uint32_t SF_QNaN =             0x7fc00000;
  84const uint32_t SF_SNaN =             0x7fb00000;
  85const uint32_t SF_QNaN_neg =         0xffc00000;
  86const uint32_t SF_SNaN_neg =         0xffb00000;
  87const uint32_t SF_HEX_NaN =          0xffffffff;
  88const uint32_t SF_zero =             0x00000000;
  89const uint32_t SF_zero_neg =         0x80000000;
  90const uint32_t SF_one =              0x3f800000;
  91const uint32_t SF_one_recip =        0x3f7f0001;         /* 0.9960...  */
  92const uint32_t SF_one_invsqrta =     0x3f7f0000;         /* 0.99609375 */
  93const uint32_t SF_two =              0x40000000;
  94const uint32_t SF_four =             0x40800000;
  95const uint32_t SF_small_neg =        0xab98fba8;
  96const uint32_t SF_large_pos =        0x5afa572e;
  97
  98const uint64_t DF_QNaN =             0x7ff8000000000000ULL;
  99const uint64_t DF_SNaN =             0x7ff7000000000000ULL;
 100const uint64_t DF_QNaN_neg =         0xfff8000000000000ULL;
 101const uint64_t DF_SNaN_neg =         0xfff7000000000000ULL;
 102const uint64_t DF_HEX_NaN =          0xffffffffffffffffULL;
 103const uint64_t DF_zero =             0x0000000000000000ULL;
 104const uint64_t DF_zero_neg =         0x8000000000000000ULL;
 105const uint64_t DF_any =              0x3f80000000000000ULL;
 106const uint64_t DF_one =              0x3ff0000000000000ULL;
 107const uint64_t DF_one_hh =           0x3ff001ff80000000ULL;     /* 1.00048... */
 108const uint64_t DF_small_neg =        0xbd731f7500000000ULL;
 109const uint64_t DF_large_pos =        0x7f80000000000001ULL;
 110
 111/*
 112 * Templates for functions to execute an instruction
 113 *
 114 * The templates vary by the number of arguments and the types of the args
 115 * and result.  We use one letter in the macro name for the result and each
 116 * argument:
 117 *     x             unknown (specified in a subsequent template) or don't care
 118 *     R             register (32 bits)
 119 *     P             pair (64 bits)
 120 *     p             predicate
 121 *     I             immediate
 122 *     Xx            read/write
 123 */
 124
 125/* Clear bits 0-5 in USR */
 126#define CLEAR_USRBITS \
 127    "r2 = usr\n\t" \
 128    "r2 = and(r2, #0xffffffc0)\n\t" \
 129    "usr = r2\n\t"
 130
 131/* Template for instructions with one register operand */
 132#define FUNC_x_OP_x(RESTYPE, SRCTYPE, NAME, INSN) \
 133static RESTYPE NAME(SRCTYPE src, uint32_t *usr_result) \
 134{ \
 135    RESTYPE result; \
 136    uint32_t usr; \
 137    asm(CLEAR_USRBITS \
 138        INSN  "\n\t" \
 139        "%1 = usr\n\t" \
 140        : "=r"(result), "=r"(usr) \
 141        : "r"(src) \
 142        : "r2", "usr"); \
 143      *usr_result = usr & 0x3f; \
 144      return result; \
 145}
 146
 147#define FUNC_R_OP_R(NAME, INSN) \
 148FUNC_x_OP_x(uint32_t, uint32_t, NAME, INSN)
 149
 150#define FUNC_R_OP_P(NAME, INSN) \
 151FUNC_x_OP_x(uint32_t, uint64_t, NAME, INSN)
 152
 153#define FUNC_P_OP_P(NAME, INSN) \
 154FUNC_x_OP_x(uint64_t, uint64_t, NAME, INSN)
 155
 156#define FUNC_P_OP_R(NAME, INSN) \
 157FUNC_x_OP_x(uint64_t, uint32_t, NAME, INSN)
 158
 159/*
 160 * Template for instructions with a register and predicate result
 161 * and one register operand
 162 */
 163#define FUNC_xp_OP_x(RESTYPE, SRCTYPE, NAME, INSN) \
 164static RESTYPE NAME(SRCTYPE src, uint8_t *pred_result, uint32_t *usr_result) \
 165{ \
 166    RESTYPE result; \
 167    uint8_t pred; \
 168    uint32_t usr; \
 169    asm(CLEAR_USRBITS \
 170        INSN  "\n\t" \
 171        "%1 = p2\n\t" \
 172        "%2 = usr\n\t" \
 173        : "=r"(result), "=r"(pred), "=r"(usr) \
 174        : "r"(src) \
 175        : "r2", "p2", "usr"); \
 176    *pred_result = pred; \
 177    *usr_result = usr & 0x3f; \
 178    return result; \
 179}
 180
 181#define FUNC_Rp_OP_R(NAME, INSN) \
 182FUNC_xp_OP_x(uint32_t, uint32_t, NAME, INSN)
 183
 184/* Template for instructions with two register operands */
 185#define FUNC_x_OP_xx(RESTYPE, SRC1TYPE, SRC2TYPE, NAME, INSN) \
 186static RESTYPE NAME(SRC1TYPE src1, SRC2TYPE src2, uint32_t *usr_result) \
 187{ \
 188    RESTYPE result; \
 189    uint32_t usr; \
 190    asm(CLEAR_USRBITS \
 191        INSN "\n\t" \
 192        "%1 = usr\n\t" \
 193        : "=r"(result), "=r"(usr) \
 194        : "r"(src1), "r"(src2) \
 195        : "r2", "usr"); \
 196    *usr_result = usr & 0x3f; \
 197    return result; \
 198}
 199
 200#define FUNC_P_OP_PP(NAME, INSN) \
 201FUNC_x_OP_xx(uint64_t, uint64_t, uint64_t, NAME, INSN)
 202
 203#define FUNC_R_OP_PP(NAME, INSN) \
 204FUNC_x_OP_xx(uint32_t, uint64_t, uint64_t, NAME, INSN)
 205
 206#define FUNC_P_OP_RR(NAME, INSN) \
 207FUNC_x_OP_xx(uint64_t, uint32_t, uint32_t, NAME, INSN)
 208
 209#define FUNC_R_OP_RR(NAME, INSN) \
 210FUNC_x_OP_xx(uint32_t, uint32_t, uint32_t, NAME, INSN)
 211
 212#define FUNC_R_OP_PR(NAME, INSN) \
 213FUNC_x_OP_xx(uint32_t, uint64_t, uint32_t, NAME, INSN)
 214
 215#define FUNC_P_OP_PR(NAME, INSN) \
 216FUNC_x_OP_xx(uint64_t, uint64_t, uint32_t, NAME, INSN)
 217
 218/*
 219 * Template for instructions with a register and predicate result
 220 * and two register operands
 221 */
 222#define FUNC_xp_OP_xx(RESTYPE, SRC1TYPE, SRC2TYPE, NAME, INSN) \
 223static RESTYPE NAME(SRC1TYPE src1, SRC2TYPE src2, \
 224                    uint8_t *pred_result, uint32_t *usr_result) \
 225{ \
 226    RESTYPE result; \
 227    uint8_t pred; \
 228    uint32_t usr; \
 229    asm(CLEAR_USRBITS \
 230        INSN  "\n\t" \
 231        "%1 = p2\n\t" \
 232        "%2 = usr\n\t" \
 233        : "=r"(result), "=r"(pred), "=r"(usr) \
 234        : "r"(src1), "r"(src2) \
 235        : "r2", "p2", "usr"); \
 236    *pred_result = pred; \
 237    *usr_result = usr & 0x3f; \
 238    return result; \
 239}
 240
 241#define FUNC_Rp_OP_RR(NAME, INSN) \
 242FUNC_xp_OP_xx(uint32_t, uint32_t, uint32_t, NAME, INSN)
 243
 244/* Template for instructions with one register and one immediate */
 245#define FUNC_x_OP_xI(RESTYPE, SRC1TYPE, NAME, INSN) \
 246static RESTYPE NAME(SRC1TYPE src1, int32_t src2, uint32_t *usr_result) \
 247{ \
 248    RESTYPE result; \
 249    uint32_t usr; \
 250    asm(CLEAR_USRBITS \
 251        INSN "\n\t" \
 252        "%1 = usr\n\t" \
 253        : "=r"(result), "=r"(usr) \
 254        : "r"(src1), "i"(src2) \
 255        : "r2", "usr"); \
 256    *usr_result = usr & 0x3f; \
 257    return result; \
 258}
 259
 260#define FUNC_R_OP_RI(NAME, INSN) \
 261FUNC_x_OP_xI(uint32_t, uint32_t, NAME, INSN)
 262
 263#define FUNC_R_OP_PI(NAME, INSN) \
 264FUNC_x_OP_xI(uint32_t, uint64_t, NAME, INSN)
 265
 266/*
 267 * Template for instructions with a read/write result
 268 * and two register operands
 269 */
 270#define FUNC_Xx_OP_xx(RESTYPE, SRC1TYPE, SRC2TYPE, NAME, INSN) \
 271static RESTYPE NAME(RESTYPE result, SRC1TYPE src1, SRC2TYPE src2, \
 272                    uint32_t *usr_result) \
 273{ \
 274    uint32_t usr; \
 275    asm(CLEAR_USRBITS \
 276        INSN "\n\t" \
 277        "%1 = usr\n\t" \
 278        : "+r"(result), "=r"(usr) \
 279        : "r"(src1), "r"(src2) \
 280        : "r2", "usr"); \
 281    *usr_result = usr & 0x3f; \
 282    return result; \
 283}
 284
 285#define FUNC_XR_OP_RR(NAME, INSN) \
 286FUNC_Xx_OP_xx(uint32_t, uint32_t, uint32_t, NAME, INSN)
 287
 288#define FUNC_XP_OP_PP(NAME, INSN) \
 289FUNC_Xx_OP_xx(uint64_t, uint64_t, uint64_t, NAME, INSN)
 290
 291#define FUNC_XP_OP_RR(NAME, INSN) \
 292FUNC_Xx_OP_xx(uint64_t, uint32_t, uint32_t, NAME, INSN)
 293
 294/*
 295 * Template for instructions with a read/write result
 296 * and two register operands
 297 */
 298#define FUNC_Xxp_OP_xx(RESTYPE, SRC1TYPE, SRC2TYPE, NAME, INSN) \
 299static RESTYPE NAME(RESTYPE result, SRC1TYPE src1, SRC2TYPE src2, \
 300                    uint8_t *pred_result, uint32_t *usr_result) \
 301{ \
 302    uint32_t usr; \
 303    uint8_t pred; \
 304    asm(CLEAR_USRBITS \
 305        INSN "\n\t" \
 306        "%1 = p2\n\t" \
 307        "%2 = usr\n\t" \
 308        : "+r"(result), "=r"(pred), "=r"(usr) \
 309        : "r"(src1), "r"(src2) \
 310        : "r2", "usr"); \
 311    *pred_result = pred; \
 312    *usr_result = usr & 0x3f; \
 313    return result; \
 314}
 315
 316#define FUNC_XPp_OP_PP(NAME, INSN) \
 317FUNC_Xxp_OP_xx(uint64_t, uint64_t, uint64_t, NAME, INSN)
 318
 319/*
 320 * Template for instructions with a read/write result and
 321 * two register and one predicate operands
 322 */
 323#define FUNC_Xx_OP_xxp(RESTYPE, SRC1TYPE, SRC2TYPE, NAME, INSN) \
 324static RESTYPE NAME(RESTYPE result, SRC1TYPE src1, SRC2TYPE src2, uint8_t pred,\
 325                    uint32_t *usr_result) \
 326{ \
 327    uint32_t usr; \
 328    asm(CLEAR_USRBITS \
 329        "p2 = %4\n\t" \
 330        INSN "\n\t" \
 331        "%1 = usr\n\t" \
 332        : "+r"(result), "=r"(usr) \
 333        : "r"(src1), "r"(src2), "r"(pred) \
 334        : "r2", "p2", "usr"); \
 335    *usr_result = usr & 0x3f; \
 336    return result; \
 337}
 338
 339#define FUNC_XR_OP_RRp(NAME, INSN) \
 340FUNC_Xx_OP_xxp(uint32_t, uint32_t, uint32_t, NAME, INSN)
 341
 342/* Template for compare instructions with two register operands */
 343#define FUNC_CMP_xx(SRC1TYPE, SRC2TYPE, NAME, INSN) \
 344static uint32_t NAME(SRC1TYPE src1, SRC2TYPE src2, uint32_t *usr_result) \
 345{ \
 346    uint32_t result; \
 347    uint32_t usr; \
 348    asm(CLEAR_USRBITS \
 349        INSN "\n\t" \
 350        "%0 = p1\n\t" \
 351        "%1 = usr\n\t" \
 352        : "=r"(result), "=r"(usr) \
 353        : "r"(src1), "r"(src2) \
 354        : "p1", "r2", "usr"); \
 355    *usr_result = usr & 0x3f; \
 356    return result; \
 357}
 358
 359#define FUNC_CMP_RR(NAME, INSN) \
 360FUNC_CMP_xx(uint32_t, uint32_t, NAME, INSN)
 361
 362#define FUNC_CMP_PP(NAME, INSN) \
 363FUNC_CMP_xx(uint64_t, uint64_t, NAME, INSN)
 364
 365/*
 366 * Function declarations using the templates
 367 */
 368FUNC_R_OP_R(satub,              "%0 = satub(%2)")
 369FUNC_P_OP_PP(vaddubs,           "%0 = vaddub(%2, %3):sat")
 370FUNC_P_OP_PP(vadduhs,           "%0 = vadduh(%2, %3):sat")
 371FUNC_P_OP_PP(vsububs,           "%0 = vsubub(%2, %3):sat")
 372FUNC_P_OP_PP(vsubuhs,           "%0 = vsubuh(%2, %3):sat")
 373
 374/* Add vector of half integers with saturation and pack to unsigned bytes */
 375FUNC_R_OP_PP(vaddhubs,          "%0 = vaddhub(%2, %3):sat")
 376
 377/* Vector saturate half to unsigned byte */
 378FUNC_R_OP_P(vsathub,            "%0 = vsathub(%2)")
 379
 380/* Similar to above but takes a 32-bit argument */
 381FUNC_R_OP_R(svsathub,           "%0 = vsathub(%2)")
 382
 383/* Vector saturate word to unsigned half */
 384FUNC_P_OP_P(vsatwuh_nopack,     "%0 = vsatwuh(%2)")
 385
 386/* Similar to above but returns a 32-bit result */
 387FUNC_R_OP_P(vsatwuh,            "%0 = vsatwuh(%2)")
 388
 389/* Vector arithmetic shift halfwords with saturate and pack */
 390FUNC_R_OP_PI(asrhub_sat,        "%0 = vasrhub(%2, #%3):sat")
 391
 392/* Vector arithmetic shift halfwords with round, saturate and pack */
 393FUNC_R_OP_PI(asrhub_rnd_sat,    "%0 = vasrhub(%2, #%3):raw")
 394
 395FUNC_R_OP_RR(addsat,            "%0 = add(%2, %3):sat")
 396/* Similar to above but with register pairs */
 397FUNC_P_OP_PP(addpsat,           "%0 = add(%2, %3):sat")
 398
 399FUNC_XR_OP_RR(mpy_acc_sat_hh_s0, "%0 += mpy(%2.H, %3.H):sat")
 400FUNC_R_OP_RR(mpy_sat_hh_s1,     "%0 = mpy(%2.H, %3.H):<<1:sat")
 401FUNC_R_OP_RR(mpy_sat_rnd_hh_s1, "%0 = mpy(%2.H, %3.H):<<1:rnd:sat")
 402FUNC_R_OP_RR(mpy_up_s1_sat,     "%0 = mpy(%2, %3):<<1:sat")
 403FUNC_P_OP_RR(vmpy2s_s1,         "%0 = vmpyh(%2, %3):<<1:sat")
 404FUNC_P_OP_RR(vmpy2su_s1,        "%0 = vmpyhsu(%2, %3):<<1:sat")
 405FUNC_R_OP_RR(vmpy2s_s1pack,     "%0 = vmpyh(%2, %3):<<1:rnd:sat")
 406FUNC_P_OP_PP(vmpy2es_s1,        "%0 = vmpyeh(%2, %3):<<1:sat")
 407FUNC_R_OP_PP(vdmpyrs_s1,        "%0 = vdmpy(%2, %3):<<1:rnd:sat")
 408FUNC_XP_OP_PP(vdmacs_s0,        "%0 += vdmpy(%2, %3):sat")
 409FUNC_R_OP_RR(cmpyrs_s0,         "%0 = cmpy(%2, %3):rnd:sat")
 410FUNC_XP_OP_RR(cmacs_s0,         "%0 += cmpy(%2, %3):sat")
 411FUNC_XP_OP_RR(cnacs_s0,         "%0 -= cmpy(%2, %3):sat")
 412FUNC_P_OP_PP(vrcmpys_s1_h,      "%0 = vrcmpys(%2, %3):<<1:sat:raw:hi")
 413FUNC_XP_OP_PP(mmacls_s0,        "%0 += vmpyweh(%2, %3):sat")
 414FUNC_R_OP_RR(hmmpyl_rs1,        "%0 = mpy(%2, %3.L):<<1:rnd:sat")
 415FUNC_XP_OP_PP(mmaculs_s0,       "%0 += vmpyweuh(%2, %3):sat")
 416FUNC_R_OP_PR(cmpyi_wh,          "%0 = cmpyiwh(%2, %3):<<1:rnd:sat")
 417FUNC_P_OP_PP(vcmpy_s0_sat_i,    "%0 = vcmpyi(%2, %3):sat")
 418FUNC_P_OP_PR(vcrotate,          "%0 = vcrotate(%2, %3)")
 419FUNC_P_OP_PR(vcnegh,            "%0 = vcnegh(%2, %3)")
 420
 421#if CORE_HAS_AUDIO
 422FUNC_R_OP_PP(wcmpyrw,           "%0 = cmpyrw(%2, %3):<<1:sat")
 423#endif
 424
 425FUNC_R_OP_RR(addh_l16_sat_ll,   "%0 = add(%2.L, %3.L):sat")
 426FUNC_P_OP_P(vconj,              "%0 = vconj(%2):sat")
 427FUNC_P_OP_PP(vxaddsubw,         "%0 = vxaddsubw(%2, %3):sat")
 428FUNC_P_OP_P(vabshsat,           "%0 = vabsh(%2):sat")
 429FUNC_P_OP_PP(vnavgwr,           "%0 = vnavgw(%2, %3):rnd:sat")
 430FUNC_R_OP_RI(round_ri_sat,      "%0 = round(%2, #%3):sat")
 431FUNC_R_OP_RR(asr_r_r_sat,       "%0 = asr(%2, %3):sat")
 432FUNC_R_OP_RR(asl_r_r_sat,       "%0 = asl(%2, %3):sat")
 433
 434FUNC_XPp_OP_PP(ACS,             "%0, p2 = vacsh(%3, %4)")
 435
 436/* Floating point */
 437FUNC_R_OP_RR(sfmin,             "%0 = sfmin(%2, %3)")
 438FUNC_R_OP_RR(sfmax,             "%0 = sfmax(%2, %3)")
 439FUNC_R_OP_RR(sfadd,             "%0 = sfadd(%2, %3)")
 440FUNC_R_OP_RR(sfsub,             "%0 = sfsub(%2, %3)")
 441FUNC_R_OP_RR(sfmpy,             "%0 = sfmpy(%2, %3)")
 442FUNC_XR_OP_RR(sffma,            "%0 += sfmpy(%2, %3)")
 443FUNC_XR_OP_RR(sffms,            "%0 -= sfmpy(%2, %3)")
 444FUNC_CMP_RR(sfcmpuo,            "p1 = sfcmp.uo(%2, %3)")
 445FUNC_CMP_RR(sfcmpeq,            "p1 = sfcmp.eq(%2, %3)")
 446FUNC_CMP_RR(sfcmpgt,            "p1 = sfcmp.gt(%2, %3)")
 447FUNC_CMP_RR(sfcmpge,            "p1 = sfcmp.ge(%2, %3)")
 448
 449FUNC_P_OP_PP(dfadd,             "%0 = dfadd(%2, %3)")
 450FUNC_P_OP_PP(dfsub,             "%0 = dfsub(%2, %3)")
 451
 452#if CORE_IS_V67
 453FUNC_P_OP_PP(dfmin,             "%0 = dfmin(%2, %3)")
 454FUNC_P_OP_PP(dfmax,             "%0 = dfmax(%2, %3)")
 455FUNC_XP_OP_PP(dfmpyhh,          "%0 += dfmpyhh(%2, %3)")
 456#endif
 457
 458FUNC_CMP_PP(dfcmpuo,            "p1 = dfcmp.uo(%2, %3)")
 459FUNC_CMP_PP(dfcmpeq,            "p1 = dfcmp.eq(%2, %3)")
 460FUNC_CMP_PP(dfcmpgt,            "p1 = dfcmp.gt(%2, %3)")
 461FUNC_CMP_PP(dfcmpge,            "p1 = dfcmp.ge(%2, %3)")
 462
 463/* Conversions from sf */
 464FUNC_P_OP_R(conv_sf2df,         "%0 = convert_sf2df(%2)")
 465FUNC_R_OP_R(conv_sf2uw,         "%0 = convert_sf2uw(%2)")
 466FUNC_R_OP_R(conv_sf2w,          "%0 = convert_sf2w(%2)")
 467FUNC_P_OP_R(conv_sf2ud,         "%0 = convert_sf2ud(%2)")
 468FUNC_P_OP_R(conv_sf2d,          "%0 = convert_sf2d(%2)")
 469FUNC_R_OP_R(conv_sf2uw_chop,    "%0 = convert_sf2uw(%2):chop")
 470FUNC_R_OP_R(conv_sf2w_chop,     "%0 = convert_sf2w(%2):chop")
 471FUNC_P_OP_R(conv_sf2ud_chop,    "%0 = convert_sf2ud(%2):chop")
 472FUNC_P_OP_R(conv_sf2d_chop,     "%0 = convert_sf2d(%2):chop")
 473
 474/* Conversions from df */
 475FUNC_R_OP_P(conv_df2sf,         "%0 = convert_df2sf(%2)")
 476FUNC_R_OP_P(conv_df2uw,         "%0 = convert_df2uw(%2)")
 477FUNC_R_OP_P(conv_df2w,          "%0 = convert_df2w(%2)")
 478FUNC_P_OP_P(conv_df2ud,         "%0 = convert_df2ud(%2)")
 479FUNC_P_OP_P(conv_df2d,          "%0 = convert_df2d(%2)")
 480FUNC_R_OP_P(conv_df2uw_chop,    "%0 = convert_df2uw(%2):chop")
 481FUNC_R_OP_P(conv_df2w_chop,     "%0 = convert_df2w(%2):chop")
 482FUNC_P_OP_P(conv_df2ud_chop,    "%0 = convert_df2ud(%2):chop")
 483FUNC_P_OP_P(conv_df2d_chop,     "%0 = convert_df2d(%2):chop")
 484
 485/* Integer to float conversions */
 486FUNC_R_OP_R(conv_uw2sf,         "%0 = convert_uw2sf(%2)")
 487FUNC_R_OP_R(conv_w2sf,          "%0 = convert_w2sf(%2)")
 488FUNC_R_OP_P(conv_ud2sf,         "%0 = convert_ud2sf(%2)")
 489FUNC_R_OP_P(conv_d2sf,          "%0 = convert_d2sf(%2)")
 490
 491/* Special purpose floating point instructions */
 492FUNC_XR_OP_RRp(sffma_sc,        "%0 += sfmpy(%2, %3, p2):scale")
 493FUNC_Rp_OP_RR(sfrecipa,         "%0, p2 = sfrecipa(%3, %4)")
 494FUNC_R_OP_RR(sffixupn,          "%0 = sffixupn(%2, %3)")
 495FUNC_R_OP_RR(sffixupd,          "%0 = sffixupd(%2, %3)")
 496FUNC_R_OP_R(sffixupr,           "%0 = sffixupr(%2)")
 497FUNC_Rp_OP_R(sfinvsqrta,        "%0, p2 = sfinvsqrta(%3)")
 498
 499/*
 500 * Templates for test cases
 501 *
 502 * Same naming convention as the function templates
 503 */
 504#define TEST_x_OP_x(RESTYPE, CHECKFN, SRCTYPE, FUNC, SRC, RES, USR_RES) \
 505    do { \
 506        RESTYPE result; \
 507        SRCTYPE src = SRC; \
 508        uint32_t usr_result; \
 509        result = FUNC(src, &usr_result); \
 510        CHECKFN(result, RES); \
 511        check(usr_result, USR_RES); \
 512    } while (0)
 513
 514#define TEST_R_OP_R(FUNC, SRC, RES, USR_RES) \
 515TEST_x_OP_x(uint32_t, check32, uint32_t, FUNC, SRC, RES, USR_RES)
 516
 517#define TEST_R_OP_P(FUNC, SRC, RES, USR_RES) \
 518TEST_x_OP_x(uint32_t, check32, uint64_t, FUNC, SRC, RES, USR_RES)
 519
 520#define TEST_P_OP_P(FUNC, SRC, RES, USR_RES) \
 521TEST_x_OP_x(uint64_t, check64, uint64_t, FUNC, SRC, RES, USR_RES)
 522
 523#define TEST_P_OP_R(FUNC, SRC, RES, USR_RES) \
 524TEST_x_OP_x(uint64_t, check64, uint32_t, FUNC, SRC, RES, USR_RES)
 525
 526#define TEST_xp_OP_x(RESTYPE, CHECKFN, SRCTYPE, FUNC, SRC, \
 527                     RES, PRED_RES, USR_RES) \
 528    do { \
 529        RESTYPE result; \
 530        SRCTYPE src = SRC; \
 531        uint8_t pred_result; \
 532        uint32_t usr_result; \
 533        result = FUNC(src, &pred_result, &usr_result); \
 534        CHECKFN(result, RES); \
 535        check(pred_result, PRED_RES); \
 536        check(usr_result, USR_RES); \
 537    } while (0)
 538
 539#define TEST_Rp_OP_R(FUNC, SRC, RES, PRED_RES, USR_RES) \
 540TEST_xp_OP_x(uint32_t, check32, uint32_t, FUNC, SRC, RES, PRED_RES, USR_RES)
 541
 542#define TEST_x_OP_xx(RESTYPE, CHECKFN, SRC1TYPE, SRC2TYPE, \
 543                     FUNC, SRC1, SRC2, RES, USR_RES) \
 544    do { \
 545        RESTYPE result; \
 546        SRC1TYPE src1 = SRC1; \
 547        SRC2TYPE src2 = SRC2; \
 548        uint32_t usr_result; \
 549        result = FUNC(src1, src2, &usr_result); \
 550        CHECKFN(result, RES); \
 551        check(usr_result, USR_RES); \
 552    } while (0)
 553
 554#define TEST_P_OP_PP(FUNC, SRC1, SRC2, RES, USR_RES) \
 555TEST_x_OP_xx(uint64_t, check64, uint64_t, uint64_t, \
 556             FUNC, SRC1, SRC2, RES, USR_RES)
 557
 558#define TEST_R_OP_PP(FUNC, SRC1, SRC2, RES, USR_RES) \
 559TEST_x_OP_xx(uint32_t, check32, uint64_t, uint64_t, \
 560             FUNC, SRC1, SRC2, RES, USR_RES)
 561
 562#define TEST_P_OP_RR(FUNC, SRC1, SRC2, RES, USR_RES) \
 563TEST_x_OP_xx(uint64_t, check64, uint32_t, uint32_t, \
 564             FUNC, SRC1, SRC2, RES, USR_RES)
 565
 566#define TEST_R_OP_RR(FUNC, SRC1, SRC2, RES, USR_RES) \
 567TEST_x_OP_xx(uint32_t, check32, uint32_t, uint32_t, \
 568             FUNC, SRC1, SRC2, RES, USR_RES)
 569
 570#define TEST_R_OP_PR(FUNC, SRC1, SRC2, RES, USR_RES) \
 571TEST_x_OP_xx(uint32_t, check32, uint64_t, uint32_t, \
 572             FUNC, SRC1, SRC2, RES, USR_RES)
 573
 574#define TEST_P_OP_PR(FUNC, SRC1, SRC2, RES, USR_RES) \
 575TEST_x_OP_xx(uint64_t, check64, uint64_t, uint32_t, \
 576             FUNC, SRC1, SRC2, RES, USR_RES)
 577
 578#define TEST_xp_OP_xx(RESTYPE, CHECKFN, SRC1TYPE, SRC2TYPE, FUNC, SRC1, SRC2, \
 579                      RES, PRED_RES, USR_RES) \
 580    do { \
 581        RESTYPE result; \
 582        SRC1TYPE src1 = SRC1; \
 583        SRC2TYPE src2 = SRC2; \
 584        uint8_t pred_result; \
 585        uint32_t usr_result; \
 586        result = FUNC(src1, src2, &pred_result, &usr_result); \
 587        CHECKFN(result, RES); \
 588        check(pred_result, PRED_RES); \
 589        check(usr_result, USR_RES); \
 590    } while (0)
 591
 592#define TEST_Rp_OP_RR(FUNC, SRC1, SRC2, RES, PRED_RES, USR_RES) \
 593TEST_xp_OP_xx(uint32_t, check32, uint32_t, uint32_t, FUNC, SRC1, SRC2, \
 594              RES, PRED_RES, USR_RES)
 595
 596#define TEST_x_OP_xI(RESTYPE, CHECKFN, SRC1TYPE, \
 597                     FUNC, SRC1, SRC2, RES, USR_RES) \
 598    do { \
 599        RESTYPE result; \
 600        SRC1TYPE src1 = SRC1; \
 601        uint32_t src2 = SRC2; \
 602        uint32_t usr_result; \
 603        result = FUNC(src1, src2, &usr_result); \
 604        CHECKFN(result, RES); \
 605        check(usr_result, USR_RES); \
 606    } while (0)
 607
 608#define TEST_R_OP_RI(FUNC, SRC1, SRC2, RES, USR_RES) \
 609TEST_x_OP_xI(uint32_t, check32, uint32_t, \
 610             FUNC, SRC1, SRC2, RES, USR_RES)
 611
 612#define TEST_R_OP_PI(FUNC, SRC1, SRC2, RES, USR_RES) \
 613TEST_x_OP_xI(uint32_t, check64, uint64_t, \
 614             FUNC, SRC1, SRC2, RES, USR_RES)
 615
 616#define TEST_Xx_OP_xx(RESTYPE, CHECKFN, SRC1TYPE, SRC2TYPE, \
 617                      FUNC, RESIN, SRC1, SRC2, RES, USR_RES) \
 618    do { \
 619        RESTYPE result = RESIN; \
 620        SRC1TYPE src1 = SRC1; \
 621        SRC2TYPE src2 = SRC2; \
 622        uint32_t usr_result; \
 623        result = FUNC(result, src1, src2, &usr_result); \
 624        CHECKFN(result, RES); \
 625        check(usr_result, USR_RES); \
 626    } while (0)
 627
 628#define TEST_XR_OP_RR(FUNC, RESIN, SRC1, SRC2, RES, USR_RES) \
 629TEST_Xx_OP_xx(uint32_t, check32, uint32_t, uint32_t, \
 630              FUNC, RESIN, SRC1, SRC2, RES, USR_RES)
 631
 632#define TEST_XP_OP_PP(FUNC, RESIN, SRC1, SRC2, RES, USR_RES) \
 633TEST_Xx_OP_xx(uint64_t, check64, uint64_t, uint64_t, \
 634              FUNC, RESIN, SRC1, SRC2, RES, USR_RES)
 635
 636#define TEST_XP_OP_RR(FUNC, RESIN, SRC1, SRC2, RES, USR_RES) \
 637TEST_Xx_OP_xx(uint64_t, check64, uint32_t, uint32_t, \
 638              FUNC, RESIN, SRC1, SRC2, RES, USR_RES)
 639
 640#define TEST_Xxp_OP_xx(RESTYPE, CHECKFN, SRC1TYPE, SRC2TYPE, \
 641                       FUNC, RESIN, SRC1, SRC2, RES, PRED_RES, USR_RES) \
 642    do { \
 643        RESTYPE result = RESIN; \
 644        SRC1TYPE src1 = SRC1; \
 645        SRC2TYPE src2 = SRC2; \
 646        uint8_t pred_res; \
 647        uint32_t usr_result; \
 648        result = FUNC(result, src1, src2, &pred_res, &usr_result); \
 649        CHECKFN(result, RES); \
 650        check(usr_result, USR_RES); \
 651    } while (0)
 652
 653#define TEST_XPp_OP_PP(FUNC, RESIN, SRC1, SRC2, RES, PRED_RES, USR_RES) \
 654TEST_Xxp_OP_xx(uint64_t, check64, uint64_t, uint64_t, FUNC, RESIN, SRC1, SRC2, \
 655               RES, PRED_RES, USR_RES)
 656
 657#define TEST_Xx_OP_xxp(RESTYPE, CHECKFN, SRC1TYPE, SRC2TYPE, \
 658                      FUNC, RESIN, SRC1, SRC2, PRED, RES, USR_RES) \
 659    do { \
 660        RESTYPE result = RESIN; \
 661        SRC1TYPE src1 = SRC1; \
 662        SRC2TYPE src2 = SRC2; \
 663        uint8_t pred = PRED; \
 664        uint32_t usr_result; \
 665        result = FUNC(result, src1, src2, pred, &usr_result); \
 666        CHECKFN(result, RES); \
 667        check(usr_result, USR_RES); \
 668    } while (0)
 669
 670#define TEST_XR_OP_RRp(FUNC, RESIN, SRC1, SRC2, PRED, RES, USR_RES) \
 671TEST_Xx_OP_xxp(uint32_t, check32, uint32_t, uint32_t, \
 672              FUNC, RESIN, SRC1, SRC2, PRED, RES, USR_RES)
 673
 674#define TEST_CMP_xx(SRC1TYPE, SRC2TYPE, \
 675                    FUNC, SRC1, SRC2, RES, USR_RES) \
 676    do { \
 677        uint32_t result; \
 678        SRC1TYPE src1 = SRC1; \
 679        SRC2TYPE src2 = SRC2; \
 680        uint32_t usr_result; \
 681        result = FUNC(src1, src2, &usr_result); \
 682        check(result, RES); \
 683        check(usr_result, USR_RES); \
 684    } while (0)
 685
 686#define TEST_CMP_RR(FUNC, SRC1, SRC2, RES, USR_RES) \
 687TEST_CMP_xx(uint32_t, uint32_t, FUNC, SRC1, SRC2, RES, USR_RES)
 688
 689#define TEST_CMP_PP(FUNC, SRC1, SRC2, RES, USR_RES) \
 690TEST_CMP_xx(uint64_t, uint64_t, FUNC, SRC1, SRC2, RES, USR_RES)
 691
 692int main()
 693{
 694    TEST_R_OP_R(satub,       0,         0,         USR_CLEAR);
 695    TEST_R_OP_R(satub,       0xff,      0xff,      USR_CLEAR);
 696    TEST_R_OP_R(satub,       0xfff,     0xff,      USR_OVF);
 697    TEST_R_OP_R(satub,       -1,        0,         USR_OVF);
 698
 699    TEST_P_OP_PP(vaddubs,    0xfeLL,    0x01LL,    0xffLL,    USR_CLEAR);
 700    TEST_P_OP_PP(vaddubs,    0xffLL,    0xffLL,    0xffLL,    USR_OVF);
 701
 702    TEST_P_OP_PP(vadduhs,    0xfffeLL,  0x1LL,     0xffffLL,  USR_CLEAR);
 703    TEST_P_OP_PP(vadduhs,    0xffffLL,  0x1LL,     0xffffLL,  USR_OVF);
 704
 705    TEST_P_OP_PP(vsububs, 0x0807060504030201LL, 0x0101010101010101LL,
 706                 0x0706050403020100LL, USR_CLEAR);
 707    TEST_P_OP_PP(vsububs, 0x0807060504030201LL, 0x0202020202020202LL,
 708                 0x0605040302010000LL, USR_OVF);
 709
 710    TEST_P_OP_PP(vsubuhs, 0x0004000300020001LL, 0x0001000100010001LL,
 711                 0x0003000200010000LL, USR_CLEAR);
 712    TEST_P_OP_PP(vsubuhs, 0x0004000300020001LL, 0x0002000200020002LL,
 713                 0x0002000100000000LL, USR_OVF);
 714
 715    TEST_R_OP_PP(vaddhubs, 0x0004000300020001LL, 0x0001000100010001LL,
 716                 0x05040302, USR_CLEAR);
 717    TEST_R_OP_PP(vaddhubs, 0x7fff000300020001LL, 0x0002000200020002LL,
 718                 0xff050403, USR_OVF);
 719
 720    TEST_R_OP_P(vsathub,         0x0001000300020001LL, 0x01030201, USR_CLEAR);
 721    TEST_R_OP_P(vsathub,         0x010000700080ffffLL, 0xff708000, USR_OVF);
 722
 723    TEST_R_OP_P(vsatwuh,         0x0000ffff00000001LL, 0xffff0001, USR_CLEAR);
 724    TEST_R_OP_P(vsatwuh,         0x800000000000ffffLL, 0x0000ffff, USR_OVF);
 725
 726    TEST_P_OP_P(vsatwuh_nopack,  0x0000ffff00000001LL, 0x0000ffff00000001LL,
 727                USR_CLEAR);
 728    TEST_P_OP_P(vsatwuh_nopack,  0x800000000000ffffLL, 0x000000000000ffffLL,
 729                USR_OVF);
 730
 731    TEST_R_OP_R(svsathub,        0x00020001,           0x0201,     USR_CLEAR);
 732    TEST_R_OP_R(svsathub,        0x0080ffff,           0x8000,     USR_OVF);
 733
 734    TEST_R_OP_PI(asrhub_sat,     0x004f003f002f001fLL, 3,    0x09070503,
 735                 USR_CLEAR);
 736    TEST_R_OP_PI(asrhub_sat,     0x004fffff8fff001fLL, 3,    0x09000003,
 737                 USR_OVF);
 738
 739    TEST_R_OP_PI(asrhub_rnd_sat, 0x004f003f002f001fLL, 2,    0x0a080604,
 740                 USR_CLEAR);
 741    TEST_R_OP_PI(asrhub_rnd_sat, 0x004fffff8fff001fLL, 2,    0x0a000004,
 742                 USR_OVF);
 743
 744    TEST_R_OP_RR(addsat,        1,              2,              3,
 745                 USR_CLEAR);
 746    TEST_R_OP_RR(addsat,        0x7fffffff,     0x00000010,     0x7fffffff,
 747                 USR_OVF);
 748    TEST_R_OP_RR(addsat,        0x80000000,     0x80000006,     0x80000000,
 749                 USR_OVF);
 750
 751    TEST_P_OP_PP(addpsat, 1LL, 2LL, 3LL, USR_CLEAR);
 752    /* overflow to max positive */
 753    TEST_P_OP_PP(addpsat, 0x7ffffffffffffff0LL, 0x0000000000000010LL,
 754                 0x7fffffffffffffffLL, USR_OVF);
 755    /* overflow to min negative */
 756    TEST_P_OP_PP(addpsat, 0x8000000000000003LL, 0x8000000000000006LL,
 757                 0x8000000000000000LL, USR_OVF);
 758
 759    TEST_XR_OP_RR(mpy_acc_sat_hh_s0, 0x7fffffff, 0xffff0000, 0x11110000,
 760                  0x7fffeeee, USR_CLEAR);
 761    TEST_XR_OP_RR(mpy_acc_sat_hh_s0, 0x7fffffff, 0x7fff0000, 0x7fff0000,
 762                  0x7fffffff, USR_OVF);
 763
 764    TEST_R_OP_RR(mpy_sat_hh_s1,        0xffff0000, 0x11110000, 0xffffddde,
 765                 USR_CLEAR);
 766    TEST_R_OP_RR(mpy_sat_hh_s1,        0x7fff0000, 0x7fff0000, 0x7ffe0002,
 767                 USR_CLEAR);
 768    TEST_R_OP_RR(mpy_sat_hh_s1,        0x80000000, 0x80000000, 0x7fffffff,
 769                 USR_OVF);
 770
 771    TEST_R_OP_RR(mpy_sat_rnd_hh_s1,    0xffff0000, 0x11110000, 0x00005dde,
 772                 USR_CLEAR);
 773    TEST_R_OP_RR(mpy_sat_rnd_hh_s1,    0x7fff0000, 0x7fff0000, 0x7ffe8002,
 774                 USR_CLEAR);
 775    TEST_R_OP_RR(mpy_sat_rnd_hh_s1,    0x80000000, 0x80000000, 0x7fffffff,
 776                 USR_OVF);
 777
 778    TEST_R_OP_RR(mpy_up_s1_sat,        0xffff0000, 0x11110000, 0xffffddde,
 779                 USR_CLEAR);
 780    TEST_R_OP_RR(mpy_up_s1_sat,        0x7fff0000, 0x7fff0000, 0x7ffe0002,
 781                 USR_CLEAR);
 782    TEST_R_OP_RR(mpy_up_s1_sat,        0x80000000, 0x80000000, 0x7fffffff,
 783                 USR_OVF);
 784
 785    TEST_P_OP_RR(vmpy2s_s1,  0x7fff0000, 0x7fff0000, 0x7ffe000200000000LL,
 786                 USR_CLEAR);
 787    TEST_P_OP_RR(vmpy2s_s1,  0x80000000, 0x80000000, 0x7fffffff00000000LL,
 788                 USR_OVF);
 789
 790    TEST_P_OP_RR(vmpy2su_s1, 0x7fff0000, 0x7fff0000, 0x7ffe000200000000LL,
 791                 USR_CLEAR);
 792    TEST_P_OP_RR(vmpy2su_s1, 0xffffbd97, 0xffffffff, 0xfffe000280000000LL,
 793                 USR_OVF);
 794
 795    TEST_R_OP_RR(vmpy2s_s1pack,        0x7fff0000, 0x7fff0000, 0x7ffe0000,
 796                 USR_CLEAR);
 797    TEST_R_OP_RR(vmpy2s_s1pack,        0x80008000, 0x80008000, 0x7fff7fff,
 798                 USR_OVF);
 799
 800    TEST_P_OP_PP(vmpy2es_s1, 0x7fff7fff7fff7fffLL, 0x1fff1fff1fff1fffLL,
 801                 0x1ffec0021ffec002LL, USR_CLEAR);
 802    TEST_P_OP_PP(vmpy2es_s1, 0x8000800080008000LL, 0x8000800080008000LL,
 803                 0x7fffffff7fffffffLL, USR_OVF);
 804
 805    TEST_R_OP_PP(vdmpyrs_s1, 0x7fff7fff7fff7fffLL, 0x1fff1fff1fff1fffLL,
 806                 0x3ffe3ffe, USR_CLEAR);
 807    TEST_R_OP_PP(vdmpyrs_s1, 0x8000800080008000LL, 0x8000800080008000LL,
 808                 0x7fff7fffLL, USR_OVF);
 809
 810    TEST_XP_OP_PP(vdmacs_s0, 0x0fffffffULL, 0x00ff00ff00ff00ffLL,
 811                  0x00ff00ff00ff00ffLL, 0x0001fc021001fc01LL, USR_CLEAR);
 812    TEST_XP_OP_PP(vdmacs_s0, 0x01111111ULL, 0x8000800080001000LL,
 813                  0x8000800080008000LL, 0x7fffffff39111111LL, USR_OVF);
 814
 815    TEST_R_OP_RR(cmpyrs_s0,            0x7fff0000, 0x7fff0000, 0x0000c001,
 816                 USR_CLEAR);
 817    TEST_R_OP_RR(cmpyrs_s0,            0x80008000, 0x80008000, 0x7fff0000,
 818                 USR_OVF);
 819
 820    TEST_XP_OP_RR(cmacs_s0, 0x0fffffff, 0x7fff0000, 0x7fff0000,
 821                  0x00000000d000fffeLL, USR_CLEAR);
 822    TEST_XP_OP_RR(cmacs_s0, 0x0fff1111, 0x80008000, 0x80008000,
 823                  0x7fffffff0fff1111LL, USR_OVF);
 824
 825    TEST_XP_OP_RR(cnacs_s0, 0x000000108fffffffULL, 0x7fff0000, 0x7fff0000,
 826                  0x00000010cfff0000ULL, USR_CLEAR);
 827    TEST_XP_OP_RR(cnacs_s0, 0x000000108ff1111fULL, 0x00002001, 0x00007ffd,
 828                  0x0000001080000000ULL, USR_OVF);
 829
 830    TEST_P_OP_PP(vrcmpys_s1_h, 0x00ff00ff00ff00ffLL, 0x00ff00ff00ff00ffLL,
 831                 0x0003f8040003f804LL, USR_CLEAR);
 832    TEST_P_OP_PP(vrcmpys_s1_h, 0x8000800080008000LL, 0x8000800080008000LL,
 833                 0x7fffffff7fffffffLL, USR_OVF);
 834
 835    TEST_XP_OP_PP(mmacls_s0, 0x6fffffff, 0x00ff00ff00ff00ffLL,
 836                  0x00ff00ff00ff00ffLL, 0x0000fe017000fe00LL, USR_CLEAR);
 837    TEST_XP_OP_PP(mmacls_s0, 0x6f1111ff, 0x8000800080008000LL,
 838                  0x1000100080008000LL, 0xf80008007fffffffLL, USR_OVF);
 839
 840    TEST_R_OP_RR(hmmpyl_rs1,           0x7fff0000, 0x7fff0001, 0x0000fffe,
 841                 USR_CLEAR);
 842    TEST_R_OP_RR(hmmpyl_rs1,           0x80000000, 0x80008000, 0x7fffffff,
 843                 USR_OVF);
 844
 845    TEST_XP_OP_PP(mmaculs_s0, 0x000000007fffffffULL, 0xffff800080008000LL,
 846                  0xffff800080008000LL, 0xffffc00040003fffLL, USR_CLEAR);
 847    TEST_XP_OP_PP(mmaculs_s0, 0x000011107fffffffULL, 0x00ff00ff00ff00ffLL,
 848                  0x00ff00ff001100ffLL, 0x00010f117fffffffLL, USR_OVF);
 849
 850    TEST_R_OP_PR(cmpyi_wh, 0x7fff000000000000LL, 0x7fff0001, 0x0000fffe,
 851                 USR_CLEAR);
 852    TEST_R_OP_PR(cmpyi_wh, 0x8000000000000000LL, 0x80008000, 0x7fffffff,
 853                 USR_OVF);
 854
 855    TEST_P_OP_PP(vcmpy_s0_sat_i, 0x00ff00ff00ff00ffLL, 0x00ff00ff00ff00ffLL,
 856                 0x0001fc020001fc02LL, USR_CLEAR);
 857    TEST_P_OP_PP(vcmpy_s0_sat_i, 0x8000800080008000LL, 0x8000800080008000LL,
 858                 0x7fffffff7fffffffLL, USR_OVF);
 859
 860    TEST_P_OP_PR(vcrotate, 0x8000000000000000LL, 0x00000002,
 861                 0x8000000000000000LL, USR_CLEAR);
 862    TEST_P_OP_PR(vcrotate, 0x7fff80007fff8000LL, 0x00000001,
 863                 0x7fff80007fff7fffLL, USR_OVF);
 864
 865    TEST_P_OP_PR(vcnegh, 0x8000000000000000LL, 0x00000002,
 866                 0x8000000000000000LL, USR_CLEAR);
 867    TEST_P_OP_PR(vcnegh, 0x7fff80007fff8000LL, 0x00000001,
 868                 0x7fff80007fff7fffLL, USR_OVF);
 869
 870#if CORE_HAS_AUDIO
 871    TEST_R_OP_PP(wcmpyrw, 0x8765432101234567LL, 0x00000002ffffffffLL,
 872                 0x00000001, USR_CLEAR);
 873    TEST_R_OP_PP(wcmpyrw, 0x800000007fffffffLL, 0x000000ff7fffffffLL,
 874                 0x7fffffff, USR_OVF);
 875    TEST_R_OP_PP(wcmpyrw, 0x7fffffff80000000LL, 0x7fffffff000000ffLL,
 876                 0x80000000, USR_OVF);
 877#else
 878    printf("Audio instructions skipped\n");
 879#endif
 880
 881    TEST_R_OP_RR(addh_l16_sat_ll,      0x0000ffff, 0x00000002, 0x00000001,
 882                 USR_CLEAR);
 883    TEST_R_OP_RR(addh_l16_sat_ll,      0x00007fff, 0x00000005, 0x00007fff,
 884                 USR_OVF);
 885    TEST_R_OP_RR(addh_l16_sat_ll,      0x00008000, 0x00008000, 0xffff8000,
 886                 USR_OVF);
 887
 888    TEST_P_OP_P(vconj, 0x0000ffff00000001LL, 0x0000ffff00000001LL, USR_CLEAR);
 889    TEST_P_OP_P(vconj, 0x800000000000ffffLL, 0x7fff00000000ffffLL, USR_OVF);
 890
 891    TEST_P_OP_PP(vxaddsubw, 0x8765432101234567LL, 0x00000002ffffffffLL,
 892                 0x8765432201234569LL, USR_CLEAR);
 893    TEST_P_OP_PP(vxaddsubw, 0x7fffffff7fffffffLL, 0xffffffffffffffffLL,
 894                 0x7fffffff7ffffffeLL, USR_OVF);
 895    TEST_P_OP_PP(vxaddsubw, 0x800000000fffffffLL, 0x0000000a00000008LL,
 896                 0x8000000010000009LL, USR_OVF);
 897
 898    TEST_P_OP_P(vabshsat, 0x0001000afffff800LL, 0x0001000a00010800LL,
 899                USR_CLEAR);
 900    TEST_P_OP_P(vabshsat, 0x8000000b000c000aLL, 0x7fff000b000c000aLL,
 901             USR_OVF);
 902
 903    TEST_P_OP_PP(vnavgwr, 0x8765432101234567LL, 0x00000002ffffffffLL,
 904                 0xc3b2a1900091a2b4LL, USR_CLEAR);
 905    TEST_P_OP_PP(vnavgwr, 0x7fffffff8000000aLL, 0x80000000ffffffffLL,
 906                 0x7fffffffc0000006LL, USR_OVF);
 907
 908    TEST_R_OP_RI(round_ri_sat,         0x0000ffff, 2, 0x00004000, USR_CLEAR);
 909    TEST_R_OP_RI(round_ri_sat,         0x7fffffff, 2, 0x1fffffff, USR_OVF);
 910
 911    TEST_R_OP_RR(asr_r_r_sat,  0x0000ffff, 0x02, 0x00003fff, USR_CLEAR);
 912    TEST_R_OP_RR(asr_r_r_sat,  0x80000000, 0x01, 0xc0000000, USR_CLEAR);
 913    TEST_R_OP_RR(asr_r_r_sat,  0xffffffff, 0x01, 0xffffffff, USR_CLEAR);
 914    TEST_R_OP_RR(asr_r_r_sat,  0x00ffffff, 0xf5, 0x7fffffff, USR_OVF);
 915    TEST_R_OP_RR(asr_r_r_sat,  0x80000000, 0xf5, 0x80000000, USR_OVF);
 916    TEST_R_OP_RR(asr_r_r_sat,  0x7fff0000, 0x42, 0x7fffffff, USR_OVF);
 917    TEST_R_OP_RR(asr_r_r_sat,  0xff000000, 0x42, 0x80000000, USR_OVF);
 918    TEST_R_OP_RR(asr_r_r_sat,        4096,   32, 0x00000000, USR_CLEAR);
 919    TEST_R_OP_RR(asr_r_r_sat,        4096,  -32, 0x7fffffff, USR_OVF);
 920    TEST_R_OP_RR(asr_r_r_sat,       -4096,   32, 0xffffffff, USR_CLEAR);
 921    TEST_R_OP_RR(asr_r_r_sat,       -4096,  -32, 0x80000000, USR_OVF);
 922    TEST_R_OP_RR(asr_r_r_sat,           0,  -32, 0x00000000, USR_CLEAR);
 923    TEST_R_OP_RR(asr_r_r_sat,           1,  -32, 0x7fffffff, USR_OVF);
 924
 925    TEST_R_OP_RR(asl_r_r_sat,  0x00000000, 0x40, 0x00000000, USR_CLEAR);
 926    TEST_R_OP_RR(asl_r_r_sat,  0x80000000, 0xff, 0xc0000000, USR_CLEAR);
 927    TEST_R_OP_RR(asl_r_r_sat,  0xffffffff, 0xff, 0xffffffff, USR_CLEAR);
 928    TEST_R_OP_RR(asl_r_r_sat,  0x00ffffff, 0x0b, 0x7fffffff, USR_OVF);
 929    TEST_R_OP_RR(asl_r_r_sat,  0x80000000, 0x0b, 0x80000000, USR_OVF);
 930    TEST_R_OP_RR(asl_r_r_sat,  0x7fff0000, 0xbe, 0x7fffffff, USR_OVF);
 931    TEST_R_OP_RR(asl_r_r_sat,  0xff000000, 0xbe, 0x80000000, USR_OVF);
 932    TEST_R_OP_RR(asl_r_r_sat,        4096,   32, 0x7fffffff, USR_OVF);
 933    TEST_R_OP_RR(asl_r_r_sat,        4096,  -32, 0x00000000, USR_CLEAR);
 934    TEST_R_OP_RR(asl_r_r_sat,       -4096,   32, 0x80000000, USR_OVF);
 935    TEST_R_OP_RR(asl_r_r_sat,       -4096,  -32, 0xffffffff, USR_CLEAR);
 936    TEST_R_OP_RR(asl_r_r_sat,           0,   32, 0x00000000, USR_CLEAR);
 937    TEST_R_OP_RR(asl_r_r_sat,           1,   32, 0x7fffffff, USR_OVF);
 938
 939    TEST_XPp_OP_PP(ACS, 0x0004000300020001ULL, 0x0001000200030004ULL,
 940                   0x0000000000000000ULL, 0x0004000300030004ULL, 0xf0,
 941                   USR_CLEAR);
 942    TEST_XPp_OP_PP(ACS, 0x0004000300020001ULL, 0x0001000200030004ULL,
 943                   0x000affff000d0000ULL, 0x000e0003000f0004ULL, 0xcc,
 944                   USR_CLEAR);
 945    TEST_XPp_OP_PP(ACS, 0x00047fff00020001ULL, 0x00017fff00030004ULL,
 946                  0x000a0fff000d0000ULL, 0x000e7fff000f0004ULL, 0xfc,
 947                  USR_OVF);
 948    TEST_XPp_OP_PP(ACS, 0x00047fff00020001ULL, 0x00017fff00030004ULL,
 949                   0x000a0fff000d0000ULL, 0x000e7fff000f0004ULL, 0xf0,
 950                   USR_OVF);
 951
 952    /* Floating point */
 953    TEST_R_OP_RR(sfmin,  SF_one,      SF_small_neg,   SF_small_neg, USR_CLEAR);
 954    TEST_R_OP_RR(sfmin,  SF_one,      SF_SNaN,        SF_one,       USR_FPINVF);
 955    TEST_R_OP_RR(sfmin,  SF_SNaN,     SF_one,         SF_one,       USR_FPINVF);
 956    TEST_R_OP_RR(sfmin,  SF_one,      SF_QNaN,        SF_one,       USR_CLEAR);
 957    TEST_R_OP_RR(sfmin,  SF_QNaN,     SF_one,         SF_one,       USR_CLEAR);
 958    TEST_R_OP_RR(sfmin,  SF_SNaN,     SF_QNaN,        SF_HEX_NaN,   USR_FPINVF);
 959    TEST_R_OP_RR(sfmin,  SF_QNaN,     SF_SNaN,        SF_HEX_NaN,   USR_FPINVF);
 960    TEST_R_OP_RR(sfmin,  SF_zero,     SF_zero_neg,    SF_zero_neg,  USR_CLEAR);
 961    TEST_R_OP_RR(sfmin,  SF_zero_neg, SF_zero,        SF_zero_neg,  USR_CLEAR);
 962
 963    TEST_R_OP_RR(sfmax,  SF_one,      SF_small_neg,   SF_one,       USR_CLEAR);
 964    TEST_R_OP_RR(sfmax,  SF_one,      SF_SNaN,        SF_one,       USR_FPINVF);
 965    TEST_R_OP_RR(sfmax,  SF_SNaN,     SF_one,         SF_one,       USR_FPINVF);
 966    TEST_R_OP_RR(sfmax,  SF_one,      SF_QNaN,        SF_one,       USR_CLEAR);
 967    TEST_R_OP_RR(sfmax,  SF_QNaN,     SF_one,         SF_one,       USR_CLEAR);
 968    TEST_R_OP_RR(sfmax,  SF_SNaN,     SF_QNaN,        SF_HEX_NaN,   USR_FPINVF);
 969    TEST_R_OP_RR(sfmax,  SF_QNaN,     SF_SNaN,        SF_HEX_NaN,   USR_FPINVF);
 970    TEST_R_OP_RR(sfmax,  SF_zero,     SF_zero_neg,    SF_zero,      USR_CLEAR);
 971    TEST_R_OP_RR(sfmax,  SF_zero_neg, SF_zero,        SF_zero,      USR_CLEAR);
 972
 973    TEST_R_OP_RR(sfadd,  SF_one,      SF_QNaN,        SF_HEX_NaN,   USR_CLEAR);
 974    TEST_R_OP_RR(sfadd,  SF_one,      SF_SNaN,        SF_HEX_NaN,   USR_FPINVF);
 975    TEST_R_OP_RR(sfadd,  SF_QNaN,     SF_SNaN,        SF_HEX_NaN,   USR_FPINVF);
 976    TEST_R_OP_RR(sfadd,  SF_SNaN,     SF_QNaN,        SF_HEX_NaN,   USR_FPINVF);
 977
 978    TEST_R_OP_RR(sfsub,  SF_one,      SF_QNaN,        SF_HEX_NaN,   USR_CLEAR);
 979    TEST_R_OP_RR(sfsub,  SF_one,      SF_SNaN,        SF_HEX_NaN,   USR_FPINVF);
 980    TEST_R_OP_RR(sfsub,  SF_QNaN,     SF_SNaN,        SF_HEX_NaN,   USR_FPINVF);
 981    TEST_R_OP_RR(sfsub,  SF_SNaN,     SF_QNaN,        SF_HEX_NaN,   USR_FPINVF);
 982
 983    TEST_R_OP_RR(sfmpy,  SF_one,      SF_QNaN,        SF_HEX_NaN,   USR_CLEAR);
 984    TEST_R_OP_RR(sfmpy,  SF_one,      SF_SNaN,        SF_HEX_NaN,   USR_FPINVF);
 985    TEST_R_OP_RR(sfmpy,  SF_QNaN,     SF_SNaN,        SF_HEX_NaN,   USR_FPINVF);
 986    TEST_R_OP_RR(sfmpy,  SF_SNaN,     SF_QNaN,        SF_HEX_NaN,   USR_FPINVF);
 987
 988    TEST_XR_OP_RR(sffma, SF_one,   SF_one,    SF_one,   SF_two,     USR_CLEAR);
 989    TEST_XR_OP_RR(sffma, SF_zero,  SF_one,    SF_QNaN,  SF_HEX_NaN, USR_CLEAR);
 990    TEST_XR_OP_RR(sffma, SF_zero,  SF_one,    SF_SNaN,  SF_HEX_NaN, USR_FPINVF);
 991    TEST_XR_OP_RR(sffma, SF_zero,  SF_QNaN,   SF_SNaN,  SF_HEX_NaN, USR_FPINVF);
 992    TEST_XR_OP_RR(sffma, SF_zero,  SF_SNaN,   SF_QNaN,  SF_HEX_NaN, USR_FPINVF);
 993
 994    TEST_XR_OP_RR(sffms, SF_one,   SF_one,    SF_one,   SF_zero,    USR_CLEAR);
 995    TEST_XR_OP_RR(sffms, SF_zero,  SF_one,    SF_QNaN,  SF_HEX_NaN, USR_CLEAR);
 996    TEST_XR_OP_RR(sffms, SF_zero,  SF_one,    SF_SNaN,  SF_HEX_NaN, USR_FPINVF);
 997    TEST_XR_OP_RR(sffms, SF_zero,  SF_QNaN,   SF_SNaN,  SF_HEX_NaN, USR_FPINVF);
 998    TEST_XR_OP_RR(sffms, SF_zero,  SF_SNaN,   SF_QNaN,  SF_HEX_NaN, USR_FPINVF);
 999
1000    TEST_CMP_RR(sfcmpuo, SF_one,      SF_large_pos,    0x00,    USR_CLEAR);
1001    TEST_CMP_RR(sfcmpuo, SF_INF,      SF_large_pos,    0x00,    USR_CLEAR);
1002    TEST_CMP_RR(sfcmpuo, SF_QNaN,     SF_large_pos,    0xff,    USR_CLEAR);
1003    TEST_CMP_RR(sfcmpuo, SF_QNaN_neg, SF_large_pos,    0xff,    USR_CLEAR);
1004    TEST_CMP_RR(sfcmpuo, SF_SNaN,     SF_large_pos,    0xff,    USR_FPINVF);
1005    TEST_CMP_RR(sfcmpuo, SF_SNaN_neg, SF_large_pos,    0xff,    USR_FPINVF);
1006    TEST_CMP_RR(sfcmpuo, SF_QNaN,     SF_QNaN,         0xff,    USR_CLEAR);
1007    TEST_CMP_RR(sfcmpuo, SF_QNaN,     SF_SNaN,         0xff,    USR_FPINVF);
1008
1009    TEST_CMP_RR(sfcmpeq, SF_one,      SF_QNaN,         0x00,    USR_CLEAR);
1010    TEST_CMP_RR(sfcmpeq, SF_one,      SF_SNaN,         0x00,    USR_FPINVF);
1011    TEST_CMP_RR(sfcmpgt, SF_one,      SF_QNaN,         0x00,    USR_CLEAR);
1012    TEST_CMP_RR(sfcmpgt, SF_one,      SF_SNaN,         0x00,    USR_FPINVF);
1013    TEST_CMP_RR(sfcmpge, SF_one,      SF_QNaN,         0x00,    USR_CLEAR);
1014    TEST_CMP_RR(sfcmpge, SF_one,      SF_SNaN,         0x00,    USR_FPINVF);
1015
1016    TEST_P_OP_PP(dfadd,  DF_any,    DF_QNaN,         DF_HEX_NaN,    USR_CLEAR);
1017    TEST_P_OP_PP(dfadd,  DF_any,    DF_SNaN,         DF_HEX_NaN,    USR_FPINVF);
1018    TEST_P_OP_PP(dfadd,  DF_QNaN,   DF_SNaN,         DF_HEX_NaN,    USR_FPINVF);
1019    TEST_P_OP_PP(dfadd,  DF_SNaN,   DF_QNaN,         DF_HEX_NaN,    USR_FPINVF);
1020
1021    TEST_P_OP_PP(dfsub,  DF_any,    DF_QNaN,         DF_HEX_NaN,    USR_CLEAR);
1022    TEST_P_OP_PP(dfsub,  DF_any,    DF_SNaN,         DF_HEX_NaN,    USR_FPINVF);
1023    TEST_P_OP_PP(dfsub,  DF_QNaN,   DF_SNaN,         DF_HEX_NaN,    USR_FPINVF);
1024    TEST_P_OP_PP(dfsub,  DF_SNaN,   DF_QNaN,         DF_HEX_NaN,    USR_FPINVF);
1025
1026#if CORE_IS_V67
1027    TEST_P_OP_PP(dfmin,  DF_any,    DF_small_neg,    DF_small_neg,  USR_CLEAR);
1028    TEST_P_OP_PP(dfmin,  DF_any,    DF_SNaN,         DF_any,        USR_FPINVF);
1029    TEST_P_OP_PP(dfmin,  DF_SNaN,   DF_any,          DF_any,        USR_FPINVF);
1030    TEST_P_OP_PP(dfmin,  DF_any,    DF_QNaN,         DF_any,        USR_CLEAR);
1031    TEST_P_OP_PP(dfmin,  DF_QNaN,   DF_any,          DF_any,        USR_CLEAR);
1032    TEST_P_OP_PP(dfmin,  DF_SNaN,   DF_QNaN,         DF_HEX_NaN,    USR_FPINVF);
1033    TEST_P_OP_PP(dfmin,  DF_QNaN,   DF_SNaN,         DF_HEX_NaN,    USR_FPINVF);
1034    TEST_P_OP_PP(dfmin,  DF_zero,   DF_zero_neg,     DF_zero_neg,   USR_CLEAR);
1035    TEST_P_OP_PP(dfmin,  DF_zero_neg, DF_zero,       DF_zero_neg,   USR_CLEAR);
1036
1037    TEST_P_OP_PP(dfmax,  DF_any,    DF_small_neg,    DF_any,        USR_CLEAR);
1038    TEST_P_OP_PP(dfmax,  DF_any,    DF_SNaN,         DF_any,        USR_FPINVF);
1039    TEST_P_OP_PP(dfmax,  DF_SNaN,   DF_any,          DF_any,        USR_FPINVF);
1040    TEST_P_OP_PP(dfmax,  DF_any,    DF_QNaN,         DF_any,        USR_CLEAR);
1041    TEST_P_OP_PP(dfmax,  DF_QNaN,   DF_any,          DF_any,        USR_CLEAR);
1042    TEST_P_OP_PP(dfmax,  DF_SNaN,   DF_QNaN,         DF_HEX_NaN,    USR_FPINVF);
1043    TEST_P_OP_PP(dfmax,  DF_QNaN,   DF_SNaN,         DF_HEX_NaN,    USR_FPINVF);
1044    TEST_P_OP_PP(dfmax,  DF_zero,   DF_zero_neg,     DF_zero,       USR_CLEAR);
1045    TEST_P_OP_PP(dfmax,  DF_zero_neg, DF_zero,       DF_zero,       USR_CLEAR);
1046
1047    TEST_XP_OP_PP(dfmpyhh, DF_one,   DF_one,  DF_one,   DF_one_hh,  USR_CLEAR);
1048    TEST_XP_OP_PP(dfmpyhh, DF_zero,  DF_any,  DF_QNaN,  DF_HEX_NaN, USR_CLEAR);
1049    TEST_XP_OP_PP(dfmpyhh, DF_zero,  DF_any,  DF_SNaN,  DF_HEX_NaN, USR_FPINVF);
1050    TEST_XP_OP_PP(dfmpyhh, DF_zero,  DF_QNaN, DF_SNaN,  DF_HEX_NaN, USR_FPINVF);
1051    TEST_XP_OP_PP(dfmpyhh, DF_zero,  DF_SNaN, DF_QNaN,  DF_HEX_NaN, USR_FPINVF);
1052#else
1053    printf("v67 instructions skipped\n");
1054#endif
1055
1056    TEST_CMP_PP(dfcmpuo, DF_small_neg, DF_any,          0x00,    USR_CLEAR);
1057    TEST_CMP_PP(dfcmpuo, DF_large_pos, DF_any,          0x00,    USR_CLEAR);
1058    TEST_CMP_PP(dfcmpuo, DF_QNaN,      DF_any,          0xff,    USR_CLEAR);
1059    TEST_CMP_PP(dfcmpuo, DF_QNaN_neg,  DF_any,          0xff,    USR_CLEAR);
1060    TEST_CMP_PP(dfcmpuo, DF_SNaN,      DF_any,          0xff,    USR_FPINVF);
1061    TEST_CMP_PP(dfcmpuo, DF_SNaN_neg,  DF_any,          0xff,    USR_FPINVF);
1062    TEST_CMP_PP(dfcmpuo, DF_QNaN,      DF_QNaN,         0xff,    USR_CLEAR);
1063    TEST_CMP_PP(dfcmpuo, DF_QNaN,      DF_SNaN,         0xff,    USR_FPINVF);
1064
1065    TEST_CMP_PP(dfcmpeq, DF_any,       DF_QNaN,         0x00,    USR_CLEAR);
1066    TEST_CMP_PP(dfcmpeq, DF_any,       DF_SNaN,         0x00,    USR_FPINVF);
1067    TEST_CMP_PP(dfcmpgt, DF_any,       DF_QNaN,         0x00,    USR_CLEAR);
1068    TEST_CMP_PP(dfcmpgt, DF_any,       DF_SNaN,         0x00,    USR_FPINVF);
1069    TEST_CMP_PP(dfcmpge, DF_any,       DF_QNaN,         0x00,    USR_CLEAR);
1070    TEST_CMP_PP(dfcmpge, DF_any,       DF_SNaN,         0x00,    USR_FPINVF);
1071
1072    TEST_P_OP_R(conv_sf2df,       SF_QNaN,  DF_HEX_NaN,             USR_CLEAR);
1073    TEST_P_OP_R(conv_sf2df,       SF_SNaN,  DF_HEX_NaN,             USR_FPINVF);
1074    TEST_R_OP_R(conv_sf2uw,       SF_QNaN,  0xffffffff,             USR_FPINVF);
1075    TEST_R_OP_R(conv_sf2uw,       SF_SNaN,  0xffffffff,             USR_FPINVF);
1076    TEST_R_OP_R(conv_sf2w,        SF_QNaN,  0xffffffff,             USR_FPINVF);
1077    TEST_R_OP_R(conv_sf2w,        SF_SNaN,  0xffffffff,             USR_FPINVF);
1078    TEST_P_OP_R(conv_sf2ud,       SF_QNaN,  0xffffffffffffffffULL,  USR_FPINVF);
1079    TEST_P_OP_R(conv_sf2ud,       SF_SNaN,  0xffffffffffffffffULL,  USR_FPINVF);
1080    TEST_P_OP_R(conv_sf2d,        SF_QNaN,  0xffffffffffffffffULL,  USR_FPINVF);
1081    TEST_P_OP_R(conv_sf2d,        SF_SNaN,  0xffffffffffffffffULL,  USR_FPINVF);
1082    TEST_R_OP_R(conv_sf2uw_chop,  SF_QNaN,  0xffffffff,             USR_FPINVF);
1083    TEST_R_OP_R(conv_sf2uw_chop,  SF_SNaN,  0xffffffff,             USR_FPINVF);
1084    TEST_R_OP_R(conv_sf2w_chop,   SF_QNaN,  0xffffffff,             USR_FPINVF);
1085    TEST_R_OP_R(conv_sf2w_chop,   SF_SNaN,  0xffffffff,             USR_FPINVF);
1086    TEST_P_OP_R(conv_sf2ud_chop,  SF_QNaN,  0xffffffffffffffffULL,  USR_FPINVF);
1087    TEST_P_OP_R(conv_sf2ud_chop,  SF_SNaN,  0xffffffffffffffffULL,  USR_FPINVF);
1088    TEST_P_OP_R(conv_sf2d_chop,   SF_QNaN,  0xffffffffffffffffULL,  USR_FPINVF);
1089    TEST_P_OP_R(conv_sf2d_chop,   SF_SNaN,  0xffffffffffffffffULL,  USR_FPINVF);
1090
1091    TEST_R_OP_P(conv_df2sf,       DF_QNaN,  SF_HEX_NaN,             USR_CLEAR);
1092    TEST_R_OP_P(conv_df2sf,       DF_SNaN,  SF_HEX_NaN,             USR_FPINVF);
1093    TEST_R_OP_P(conv_df2uw,       DF_QNaN,  0xffffffff,             USR_FPINVF);
1094    TEST_R_OP_P(conv_df2uw,       DF_SNaN,  0xffffffff,             USR_FPINVF);
1095    TEST_R_OP_P(conv_df2w,        DF_QNaN,  0xffffffff,             USR_FPINVF);
1096    TEST_R_OP_P(conv_df2w,        DF_SNaN,  0xffffffff,             USR_FPINVF);
1097    TEST_P_OP_P(conv_df2ud,       DF_QNaN,  0xffffffffffffffffULL,  USR_FPINVF);
1098    TEST_P_OP_P(conv_df2ud,       DF_SNaN,  0xffffffffffffffffULL,  USR_FPINVF);
1099    TEST_P_OP_P(conv_df2d,        DF_QNaN,  0xffffffffffffffffULL,  USR_FPINVF);
1100    TEST_P_OP_P(conv_df2d,        DF_SNaN,  0xffffffffffffffffULL,  USR_FPINVF);
1101    TEST_R_OP_P(conv_df2uw_chop,  DF_QNaN,  0xffffffff,             USR_FPINVF);
1102    TEST_R_OP_P(conv_df2uw_chop,  DF_SNaN,  0xffffffff,             USR_FPINVF);
1103
1104    /* Test for typo in HELPER(conv_df2uw_chop) */
1105    TEST_R_OP_P(conv_df2uw_chop, 0xffffff7f00000001ULL, 0xffffffff, USR_FPINVF);
1106
1107    TEST_R_OP_P(conv_df2w_chop,   DF_QNaN,  0xffffffff,             USR_FPINVF);
1108    TEST_R_OP_P(conv_df2w_chop,   DF_SNaN,  0xffffffff,             USR_FPINVF);
1109    TEST_P_OP_P(conv_df2ud_chop,  DF_QNaN,  0xffffffffffffffffULL,  USR_FPINVF);
1110    TEST_P_OP_P(conv_df2ud_chop,  DF_SNaN,  0xffffffffffffffffULL,  USR_FPINVF);
1111    TEST_P_OP_P(conv_df2d_chop,   DF_QNaN,  0xffffffffffffffffULL,  USR_FPINVF);
1112    TEST_P_OP_P(conv_df2d_chop,   DF_SNaN,  0xffffffffffffffffULL,  USR_FPINVF);
1113
1114    TEST_R_OP_R(conv_uw2sf,    0x00000001,             SF_one,      USR_CLEAR);
1115    TEST_R_OP_R(conv_uw2sf,    0x010020a5,             0x4b801052,  USR_FPINPF);
1116    TEST_R_OP_R(conv_w2sf,     0x00000001,             SF_one,      USR_CLEAR);
1117    TEST_R_OP_R(conv_w2sf,     0x010020a5,             0x4b801052,  USR_FPINPF);
1118    TEST_R_OP_P(conv_ud2sf,    0x0000000000000001ULL,  SF_one,      USR_CLEAR);
1119    TEST_R_OP_P(conv_ud2sf,    0x00000000010020a5ULL,  0x4b801052,  USR_FPINPF);
1120    TEST_R_OP_P(conv_d2sf,     0x0000000000000001ULL,  SF_one,      USR_CLEAR);
1121    TEST_R_OP_P(conv_d2sf,     0x00000000010020a5ULL,  0x4b801052,  USR_FPINPF);
1122
1123    TEST_XR_OP_RRp(sffma_sc, SF_one,   SF_one,    SF_one,   1, SF_four,
1124                   USR_CLEAR);
1125    TEST_XR_OP_RRp(sffma_sc, SF_QNaN,  SF_one,    SF_one,   1, SF_HEX_NaN,
1126                   USR_CLEAR);
1127    TEST_XR_OP_RRp(sffma_sc, SF_one,   SF_QNaN,   SF_one,   1, SF_HEX_NaN,
1128                   USR_CLEAR);
1129    TEST_XR_OP_RRp(sffma_sc, SF_one,   SF_one,    SF_QNaN,  1, SF_HEX_NaN,
1130                   USR_CLEAR);
1131    TEST_XR_OP_RRp(sffma_sc, SF_SNaN,  SF_one,    SF_one,   1, SF_HEX_NaN,
1132                   USR_FPINVF);
1133    TEST_XR_OP_RRp(sffma_sc, SF_one,   SF_SNaN,   SF_one,   1, SF_HEX_NaN,
1134                   USR_FPINVF);
1135    TEST_XR_OP_RRp(sffma_sc, SF_one,   SF_one,    SF_SNaN,  1, SF_HEX_NaN,
1136                   USR_FPINVF);
1137
1138    TEST_Rp_OP_RR(sfrecipa, SF_one,    SF_one,    SF_one_recip,   0x00,
1139                  USR_CLEAR);
1140    TEST_Rp_OP_RR(sfrecipa, SF_QNaN,   SF_one,    SF_HEX_NaN,     0x00,
1141                  USR_CLEAR);
1142    TEST_Rp_OP_RR(sfrecipa, SF_one,    SF_QNaN,   SF_HEX_NaN,     0x00,
1143                  USR_CLEAR);
1144    TEST_Rp_OP_RR(sfrecipa, SF_one,    SF_SNaN,   SF_HEX_NaN,     0x00,
1145                  USR_FPINVF);
1146    TEST_Rp_OP_RR(sfrecipa, SF_SNaN,   SF_one,    SF_HEX_NaN,     0x00,
1147                  USR_FPINVF);
1148
1149    TEST_R_OP_RR(sffixupn, SF_one,     SF_one,    SF_one,       USR_CLEAR);
1150    TEST_R_OP_RR(sffixupn, SF_QNaN,    SF_one,    SF_HEX_NaN,   USR_CLEAR);
1151    TEST_R_OP_RR(sffixupn, SF_one,     SF_QNaN,   SF_HEX_NaN,   USR_CLEAR);
1152    TEST_R_OP_RR(sffixupn, SF_SNaN,    SF_one,    SF_HEX_NaN,   USR_FPINVF);
1153    TEST_R_OP_RR(sffixupn, SF_one,     SF_SNaN,   SF_HEX_NaN,   USR_FPINVF);
1154
1155    TEST_R_OP_RR(sffixupd, SF_one,     SF_one,    SF_one,       USR_CLEAR);
1156    TEST_R_OP_RR(sffixupd, SF_QNaN,    SF_one,    SF_HEX_NaN,   USR_CLEAR);
1157    TEST_R_OP_RR(sffixupd, SF_one,     SF_QNaN,   SF_HEX_NaN,   USR_CLEAR);
1158    TEST_R_OP_RR(sffixupd, SF_SNaN,    SF_one,    SF_HEX_NaN,   USR_FPINVF);
1159    TEST_R_OP_RR(sffixupd, SF_one,     SF_SNaN,   SF_HEX_NaN,   USR_FPINVF);
1160
1161    TEST_R_OP_R(sffixupr, SF_one,             SF_one,           USR_CLEAR);
1162    TEST_R_OP_R(sffixupr, SF_QNaN,            SF_HEX_NaN,       USR_CLEAR);
1163    TEST_R_OP_R(sffixupr, SF_SNaN,            SF_HEX_NaN,       USR_FPINVF);
1164
1165    TEST_Rp_OP_R(sfinvsqrta, SF_one,        SF_one_invsqrta,  0x00, USR_CLEAR);
1166    TEST_Rp_OP_R(sfinvsqrta, SF_zero,       SF_one,           0x00, USR_CLEAR);
1167    TEST_Rp_OP_R(sfinvsqrta, SF_QNaN,       SF_HEX_NaN,       0x00, USR_CLEAR);
1168    TEST_Rp_OP_R(sfinvsqrta, SF_small_neg,  SF_HEX_NaN,       0x00, USR_FPINVF);
1169    TEST_Rp_OP_R(sfinvsqrta, SF_SNaN,       SF_HEX_NaN,       0x00, USR_FPINVF);
1170
1171    puts(err ? "FAIL" : "PASS");
1172    return err;
1173}
1174