qemu/target/mips/tcg/msa_helper.c
<<
>>
Prefs
   1/*
   2 * MIPS SIMD Architecture Module Instruction emulation helpers for QEMU.
   3 *
   4 * Copyright (c) 2014 Imagination Technologies
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2.1 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include "cpu.h"
  22#include "internal.h"
  23#include "tcg/tcg.h"
  24#include "exec/exec-all.h"
  25#include "exec/helper-proto.h"
  26#include "exec/memop.h"
  27#include "fpu/softfloat.h"
  28#include "fpu_helper.h"
  29
  30/* Data format min and max values */
  31#define DF_BITS(df) (1 << ((df) + 3))
  32
  33#define DF_MAX_INT(df)  (int64_t)((1LL << (DF_BITS(df) - 1)) - 1)
  34#define M_MAX_INT(m)    (int64_t)((1LL << ((m)         - 1)) - 1)
  35
  36#define DF_MIN_INT(df)  (int64_t)(-(1LL << (DF_BITS(df) - 1)))
  37#define M_MIN_INT(m)    (int64_t)(-(1LL << ((m)         - 1)))
  38
  39#define DF_MAX_UINT(df) (uint64_t)(-1ULL >> (64 - DF_BITS(df)))
  40#define M_MAX_UINT(m)   (uint64_t)(-1ULL >> (64 - (m)))
  41
  42#define UNSIGNED(x, df) ((x) & DF_MAX_UINT(df))
  43#define SIGNED(x, df)                                                   \
  44    ((((int64_t)x) << (64 - DF_BITS(df))) >> (64 - DF_BITS(df)))
  45
  46/* Element-by-element access macros */
  47#define DF_ELEMENTS(df) (MSA_WRLEN / DF_BITS(df))
  48
  49
  50
  51/*
  52 * Bit Count
  53 * ---------
  54 *
  55 * +---------------+----------------------------------------------------------+
  56 * | NLOC.B        | Vector Leading Ones Count (byte)                         |
  57 * | NLOC.H        | Vector Leading Ones Count (halfword)                     |
  58 * | NLOC.W        | Vector Leading Ones Count (word)                         |
  59 * | NLOC.D        | Vector Leading Ones Count (doubleword)                   |
  60 * | NLZC.B        | Vector Leading Zeros Count (byte)                        |
  61 * | NLZC.H        | Vector Leading Zeros Count (halfword)                    |
  62 * | NLZC.W        | Vector Leading Zeros Count (word)                        |
  63 * | NLZC.D        | Vector Leading Zeros Count (doubleword)                  |
  64 * | PCNT.B        | Vector Population Count (byte)                           |
  65 * | PCNT.H        | Vector Population Count (halfword)                       |
  66 * | PCNT.W        | Vector Population Count (word)                           |
  67 * | PCNT.D        | Vector Population Count (doubleword)                     |
  68 * +---------------+----------------------------------------------------------+
  69 */
  70
  71static inline int64_t msa_nlzc_df(uint32_t df, int64_t arg)
  72{
  73    uint64_t x, y;
  74    int n, c;
  75
  76    x = UNSIGNED(arg, df);
  77    n = DF_BITS(df);
  78    c = DF_BITS(df) / 2;
  79
  80    do {
  81        y = x >> c;
  82        if (y != 0) {
  83            n = n - c;
  84            x = y;
  85        }
  86        c = c >> 1;
  87    } while (c != 0);
  88
  89    return n - x;
  90}
  91
  92static inline int64_t msa_nloc_df(uint32_t df, int64_t arg)
  93{
  94    return msa_nlzc_df(df, UNSIGNED((~arg), df));
  95}
  96
  97void helper_msa_nloc_b(CPUMIPSState *env, uint32_t wd, uint32_t ws)
  98{
  99    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
 100    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
 101
 102    pwd->b[0]  = msa_nloc_df(DF_BYTE, pws->b[0]);
 103    pwd->b[1]  = msa_nloc_df(DF_BYTE, pws->b[1]);
 104    pwd->b[2]  = msa_nloc_df(DF_BYTE, pws->b[2]);
 105    pwd->b[3]  = msa_nloc_df(DF_BYTE, pws->b[3]);
 106    pwd->b[4]  = msa_nloc_df(DF_BYTE, pws->b[4]);
 107    pwd->b[5]  = msa_nloc_df(DF_BYTE, pws->b[5]);
 108    pwd->b[6]  = msa_nloc_df(DF_BYTE, pws->b[6]);
 109    pwd->b[7]  = msa_nloc_df(DF_BYTE, pws->b[7]);
 110    pwd->b[8]  = msa_nloc_df(DF_BYTE, pws->b[8]);
 111    pwd->b[9]  = msa_nloc_df(DF_BYTE, pws->b[9]);
 112    pwd->b[10] = msa_nloc_df(DF_BYTE, pws->b[10]);
 113    pwd->b[11] = msa_nloc_df(DF_BYTE, pws->b[11]);
 114    pwd->b[12] = msa_nloc_df(DF_BYTE, pws->b[12]);
 115    pwd->b[13] = msa_nloc_df(DF_BYTE, pws->b[13]);
 116    pwd->b[14] = msa_nloc_df(DF_BYTE, pws->b[14]);
 117    pwd->b[15] = msa_nloc_df(DF_BYTE, pws->b[15]);
 118}
 119
 120void helper_msa_nloc_h(CPUMIPSState *env, uint32_t wd, uint32_t ws)
 121{
 122    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
 123    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
 124
 125    pwd->h[0]  = msa_nloc_df(DF_HALF, pws->h[0]);
 126    pwd->h[1]  = msa_nloc_df(DF_HALF, pws->h[1]);
 127    pwd->h[2]  = msa_nloc_df(DF_HALF, pws->h[2]);
 128    pwd->h[3]  = msa_nloc_df(DF_HALF, pws->h[3]);
 129    pwd->h[4]  = msa_nloc_df(DF_HALF, pws->h[4]);
 130    pwd->h[5]  = msa_nloc_df(DF_HALF, pws->h[5]);
 131    pwd->h[6]  = msa_nloc_df(DF_HALF, pws->h[6]);
 132    pwd->h[7]  = msa_nloc_df(DF_HALF, pws->h[7]);
 133}
 134
 135void helper_msa_nloc_w(CPUMIPSState *env, uint32_t wd, uint32_t ws)
 136{
 137    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
 138    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
 139
 140    pwd->w[0]  = msa_nloc_df(DF_WORD, pws->w[0]);
 141    pwd->w[1]  = msa_nloc_df(DF_WORD, pws->w[1]);
 142    pwd->w[2]  = msa_nloc_df(DF_WORD, pws->w[2]);
 143    pwd->w[3]  = msa_nloc_df(DF_WORD, pws->w[3]);
 144}
 145
 146void helper_msa_nloc_d(CPUMIPSState *env, uint32_t wd, uint32_t ws)
 147{
 148    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
 149    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
 150
 151    pwd->d[0]  = msa_nloc_df(DF_DOUBLE, pws->d[0]);
 152    pwd->d[1]  = msa_nloc_df(DF_DOUBLE, pws->d[1]);
 153}
 154
 155void helper_msa_nlzc_b(CPUMIPSState *env, uint32_t wd, uint32_t ws)
 156{
 157    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
 158    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
 159
 160    pwd->b[0]  = msa_nlzc_df(DF_BYTE, pws->b[0]);
 161    pwd->b[1]  = msa_nlzc_df(DF_BYTE, pws->b[1]);
 162    pwd->b[2]  = msa_nlzc_df(DF_BYTE, pws->b[2]);
 163    pwd->b[3]  = msa_nlzc_df(DF_BYTE, pws->b[3]);
 164    pwd->b[4]  = msa_nlzc_df(DF_BYTE, pws->b[4]);
 165    pwd->b[5]  = msa_nlzc_df(DF_BYTE, pws->b[5]);
 166    pwd->b[6]  = msa_nlzc_df(DF_BYTE, pws->b[6]);
 167    pwd->b[7]  = msa_nlzc_df(DF_BYTE, pws->b[7]);
 168    pwd->b[8]  = msa_nlzc_df(DF_BYTE, pws->b[8]);
 169    pwd->b[9]  = msa_nlzc_df(DF_BYTE, pws->b[9]);
 170    pwd->b[10] = msa_nlzc_df(DF_BYTE, pws->b[10]);
 171    pwd->b[11] = msa_nlzc_df(DF_BYTE, pws->b[11]);
 172    pwd->b[12] = msa_nlzc_df(DF_BYTE, pws->b[12]);
 173    pwd->b[13] = msa_nlzc_df(DF_BYTE, pws->b[13]);
 174    pwd->b[14] = msa_nlzc_df(DF_BYTE, pws->b[14]);
 175    pwd->b[15] = msa_nlzc_df(DF_BYTE, pws->b[15]);
 176}
 177
 178void helper_msa_nlzc_h(CPUMIPSState *env, uint32_t wd, uint32_t ws)
 179{
 180    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
 181    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
 182
 183    pwd->h[0]  = msa_nlzc_df(DF_HALF, pws->h[0]);
 184    pwd->h[1]  = msa_nlzc_df(DF_HALF, pws->h[1]);
 185    pwd->h[2]  = msa_nlzc_df(DF_HALF, pws->h[2]);
 186    pwd->h[3]  = msa_nlzc_df(DF_HALF, pws->h[3]);
 187    pwd->h[4]  = msa_nlzc_df(DF_HALF, pws->h[4]);
 188    pwd->h[5]  = msa_nlzc_df(DF_HALF, pws->h[5]);
 189    pwd->h[6]  = msa_nlzc_df(DF_HALF, pws->h[6]);
 190    pwd->h[7]  = msa_nlzc_df(DF_HALF, pws->h[7]);
 191}
 192
 193void helper_msa_nlzc_w(CPUMIPSState *env, uint32_t wd, uint32_t ws)
 194{
 195    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
 196    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
 197
 198    pwd->w[0]  = msa_nlzc_df(DF_WORD, pws->w[0]);
 199    pwd->w[1]  = msa_nlzc_df(DF_WORD, pws->w[1]);
 200    pwd->w[2]  = msa_nlzc_df(DF_WORD, pws->w[2]);
 201    pwd->w[3]  = msa_nlzc_df(DF_WORD, pws->w[3]);
 202}
 203
 204void helper_msa_nlzc_d(CPUMIPSState *env, uint32_t wd, uint32_t ws)
 205{
 206    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
 207    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
 208
 209    pwd->d[0]  = msa_nlzc_df(DF_DOUBLE, pws->d[0]);
 210    pwd->d[1]  = msa_nlzc_df(DF_DOUBLE, pws->d[1]);
 211}
 212
 213static inline int64_t msa_pcnt_df(uint32_t df, int64_t arg)
 214{
 215    uint64_t x;
 216
 217    x = UNSIGNED(arg, df);
 218
 219    x = (x & 0x5555555555555555ULL) + ((x >>  1) & 0x5555555555555555ULL);
 220    x = (x & 0x3333333333333333ULL) + ((x >>  2) & 0x3333333333333333ULL);
 221    x = (x & 0x0F0F0F0F0F0F0F0FULL) + ((x >>  4) & 0x0F0F0F0F0F0F0F0FULL);
 222    x = (x & 0x00FF00FF00FF00FFULL) + ((x >>  8) & 0x00FF00FF00FF00FFULL);
 223    x = (x & 0x0000FFFF0000FFFFULL) + ((x >> 16) & 0x0000FFFF0000FFFFULL);
 224    x = (x & 0x00000000FFFFFFFFULL) + ((x >> 32));
 225
 226    return x;
 227}
 228
 229void helper_msa_pcnt_b(CPUMIPSState *env, uint32_t wd, uint32_t ws)
 230{
 231    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
 232    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
 233
 234    pwd->b[0]  = msa_pcnt_df(DF_BYTE, pws->b[0]);
 235    pwd->b[1]  = msa_pcnt_df(DF_BYTE, pws->b[1]);
 236    pwd->b[2]  = msa_pcnt_df(DF_BYTE, pws->b[2]);
 237    pwd->b[3]  = msa_pcnt_df(DF_BYTE, pws->b[3]);
 238    pwd->b[4]  = msa_pcnt_df(DF_BYTE, pws->b[4]);
 239    pwd->b[5]  = msa_pcnt_df(DF_BYTE, pws->b[5]);
 240    pwd->b[6]  = msa_pcnt_df(DF_BYTE, pws->b[6]);
 241    pwd->b[7]  = msa_pcnt_df(DF_BYTE, pws->b[7]);
 242    pwd->b[8]  = msa_pcnt_df(DF_BYTE, pws->b[8]);
 243    pwd->b[9]  = msa_pcnt_df(DF_BYTE, pws->b[9]);
 244    pwd->b[10] = msa_pcnt_df(DF_BYTE, pws->b[10]);
 245    pwd->b[11] = msa_pcnt_df(DF_BYTE, pws->b[11]);
 246    pwd->b[12] = msa_pcnt_df(DF_BYTE, pws->b[12]);
 247    pwd->b[13] = msa_pcnt_df(DF_BYTE, pws->b[13]);
 248    pwd->b[14] = msa_pcnt_df(DF_BYTE, pws->b[14]);
 249    pwd->b[15] = msa_pcnt_df(DF_BYTE, pws->b[15]);
 250}
 251
 252void helper_msa_pcnt_h(CPUMIPSState *env, uint32_t wd, uint32_t ws)
 253{
 254    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
 255    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
 256
 257    pwd->h[0]  = msa_pcnt_df(DF_HALF, pws->h[0]);
 258    pwd->h[1]  = msa_pcnt_df(DF_HALF, pws->h[1]);
 259    pwd->h[2]  = msa_pcnt_df(DF_HALF, pws->h[2]);
 260    pwd->h[3]  = msa_pcnt_df(DF_HALF, pws->h[3]);
 261    pwd->h[4]  = msa_pcnt_df(DF_HALF, pws->h[4]);
 262    pwd->h[5]  = msa_pcnt_df(DF_HALF, pws->h[5]);
 263    pwd->h[6]  = msa_pcnt_df(DF_HALF, pws->h[6]);
 264    pwd->h[7]  = msa_pcnt_df(DF_HALF, pws->h[7]);
 265}
 266
 267void helper_msa_pcnt_w(CPUMIPSState *env, uint32_t wd, uint32_t ws)
 268{
 269    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
 270    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
 271
 272    pwd->w[0]  = msa_pcnt_df(DF_WORD, pws->w[0]);
 273    pwd->w[1]  = msa_pcnt_df(DF_WORD, pws->w[1]);
 274    pwd->w[2]  = msa_pcnt_df(DF_WORD, pws->w[2]);
 275    pwd->w[3]  = msa_pcnt_df(DF_WORD, pws->w[3]);
 276}
 277
 278void helper_msa_pcnt_d(CPUMIPSState *env, uint32_t wd, uint32_t ws)
 279{
 280    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
 281    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
 282
 283    pwd->d[0]  = msa_pcnt_df(DF_DOUBLE, pws->d[0]);
 284    pwd->d[1]  = msa_pcnt_df(DF_DOUBLE, pws->d[1]);
 285}
 286
 287
 288/*
 289 * Bit Move
 290 * --------
 291 *
 292 * +---------------+----------------------------------------------------------+
 293 * | BINSL.B       | Vector Bit Insert Left (byte)                            |
 294 * | BINSL.H       | Vector Bit Insert Left (halfword)                        |
 295 * | BINSL.W       | Vector Bit Insert Left (word)                            |
 296 * | BINSL.D       | Vector Bit Insert Left (doubleword)                      |
 297 * | BINSR.B       | Vector Bit Insert Right (byte)                           |
 298 * | BINSR.H       | Vector Bit Insert Right (halfword)                       |
 299 * | BINSR.W       | Vector Bit Insert Right (word)                           |
 300 * | BINSR.D       | Vector Bit Insert Right (doubleword)                     |
 301 * | BMNZ.V        | Vector Bit Move If Not Zero                              |
 302 * | BMZ.V         | Vector Bit Move If Zero                                  |
 303 * | BSEL.V        | Vector Bit Select                                        |
 304 * +---------------+----------------------------------------------------------+
 305 */
 306
 307/* Data format bit position and unsigned values */
 308#define BIT_POSITION(x, df) ((uint64_t)(x) % DF_BITS(df))
 309
 310static inline int64_t msa_binsl_df(uint32_t df,
 311                                   int64_t dest, int64_t arg1, int64_t arg2)
 312{
 313    uint64_t u_arg1 = UNSIGNED(arg1, df);
 314    uint64_t u_dest = UNSIGNED(dest, df);
 315    int32_t sh_d = BIT_POSITION(arg2, df) + 1;
 316    int32_t sh_a = DF_BITS(df) - sh_d;
 317    if (sh_d == DF_BITS(df)) {
 318        return u_arg1;
 319    } else {
 320        return UNSIGNED(UNSIGNED(u_dest << sh_d, df) >> sh_d, df) |
 321               UNSIGNED(UNSIGNED(u_arg1 >> sh_a, df) << sh_a, df);
 322    }
 323}
 324
 325void helper_msa_binsl_b(CPUMIPSState *env,
 326                        uint32_t wd, uint32_t ws, uint32_t wt)
 327{
 328    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
 329    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
 330    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 331
 332    pwd->b[0]  = msa_binsl_df(DF_BYTE, pwd->b[0],  pws->b[0],  pwt->b[0]);
 333    pwd->b[1]  = msa_binsl_df(DF_BYTE, pwd->b[1],  pws->b[1],  pwt->b[1]);
 334    pwd->b[2]  = msa_binsl_df(DF_BYTE, pwd->b[2],  pws->b[2],  pwt->b[2]);
 335    pwd->b[3]  = msa_binsl_df(DF_BYTE, pwd->b[3],  pws->b[3],  pwt->b[3]);
 336    pwd->b[4]  = msa_binsl_df(DF_BYTE, pwd->b[4],  pws->b[4],  pwt->b[4]);
 337    pwd->b[5]  = msa_binsl_df(DF_BYTE, pwd->b[5],  pws->b[5],  pwt->b[5]);
 338    pwd->b[6]  = msa_binsl_df(DF_BYTE, pwd->b[6],  pws->b[6],  pwt->b[6]);
 339    pwd->b[7]  = msa_binsl_df(DF_BYTE, pwd->b[7],  pws->b[7],  pwt->b[7]);
 340    pwd->b[8]  = msa_binsl_df(DF_BYTE, pwd->b[8],  pws->b[8],  pwt->b[8]);
 341    pwd->b[9]  = msa_binsl_df(DF_BYTE, pwd->b[9],  pws->b[9],  pwt->b[9]);
 342    pwd->b[10] = msa_binsl_df(DF_BYTE, pwd->b[10], pws->b[10], pwt->b[10]);
 343    pwd->b[11] = msa_binsl_df(DF_BYTE, pwd->b[11], pws->b[11], pwt->b[11]);
 344    pwd->b[12] = msa_binsl_df(DF_BYTE, pwd->b[12], pws->b[12], pwt->b[12]);
 345    pwd->b[13] = msa_binsl_df(DF_BYTE, pwd->b[13], pws->b[13], pwt->b[13]);
 346    pwd->b[14] = msa_binsl_df(DF_BYTE, pwd->b[14], pws->b[14], pwt->b[14]);
 347    pwd->b[15] = msa_binsl_df(DF_BYTE, pwd->b[15], pws->b[15], pwt->b[15]);
 348}
 349
 350void helper_msa_binsl_h(CPUMIPSState *env,
 351                        uint32_t wd, uint32_t ws, uint32_t wt)
 352{
 353    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
 354    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
 355    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 356
 357    pwd->h[0]  = msa_binsl_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
 358    pwd->h[1]  = msa_binsl_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
 359    pwd->h[2]  = msa_binsl_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
 360    pwd->h[3]  = msa_binsl_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
 361    pwd->h[4]  = msa_binsl_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
 362    pwd->h[5]  = msa_binsl_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
 363    pwd->h[6]  = msa_binsl_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
 364    pwd->h[7]  = msa_binsl_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
 365}
 366
 367void helper_msa_binsl_w(CPUMIPSState *env,
 368                        uint32_t wd, uint32_t ws, uint32_t wt)
 369{
 370    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
 371    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
 372    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 373
 374    pwd->w[0]  = msa_binsl_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
 375    pwd->w[1]  = msa_binsl_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
 376    pwd->w[2]  = msa_binsl_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
 377    pwd->w[3]  = msa_binsl_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
 378}
 379
 380void helper_msa_binsl_d(CPUMIPSState *env,
 381                        uint32_t wd, uint32_t ws, uint32_t wt)
 382{
 383    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
 384    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
 385    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 386
 387    pwd->d[0]  = msa_binsl_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
 388    pwd->d[1]  = msa_binsl_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
 389}
 390
 391static inline int64_t msa_binsr_df(uint32_t df,
 392                                   int64_t dest, int64_t arg1, int64_t arg2)
 393{
 394    uint64_t u_arg1 = UNSIGNED(arg1, df);
 395    uint64_t u_dest = UNSIGNED(dest, df);
 396    int32_t sh_d = BIT_POSITION(arg2, df) + 1;
 397    int32_t sh_a = DF_BITS(df) - sh_d;
 398    if (sh_d == DF_BITS(df)) {
 399        return u_arg1;
 400    } else {
 401        return UNSIGNED(UNSIGNED(u_dest >> sh_d, df) << sh_d, df) |
 402               UNSIGNED(UNSIGNED(u_arg1 << sh_a, df) >> sh_a, df);
 403    }
 404}
 405
 406void helper_msa_binsr_b(CPUMIPSState *env,
 407                        uint32_t wd, uint32_t ws, uint32_t wt)
 408{
 409    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
 410    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
 411    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 412
 413    pwd->b[0]  = msa_binsr_df(DF_BYTE, pwd->b[0],  pws->b[0],  pwt->b[0]);
 414    pwd->b[1]  = msa_binsr_df(DF_BYTE, pwd->b[1],  pws->b[1],  pwt->b[1]);
 415    pwd->b[2]  = msa_binsr_df(DF_BYTE, pwd->b[2],  pws->b[2],  pwt->b[2]);
 416    pwd->b[3]  = msa_binsr_df(DF_BYTE, pwd->b[3],  pws->b[3],  pwt->b[3]);
 417    pwd->b[4]  = msa_binsr_df(DF_BYTE, pwd->b[4],  pws->b[4],  pwt->b[4]);
 418    pwd->b[5]  = msa_binsr_df(DF_BYTE, pwd->b[5],  pws->b[5],  pwt->b[5]);
 419    pwd->b[6]  = msa_binsr_df(DF_BYTE, pwd->b[6],  pws->b[6],  pwt->b[6]);
 420    pwd->b[7]  = msa_binsr_df(DF_BYTE, pwd->b[7],  pws->b[7],  pwt->b[7]);
 421    pwd->b[8]  = msa_binsr_df(DF_BYTE, pwd->b[8],  pws->b[8],  pwt->b[8]);
 422    pwd->b[9]  = msa_binsr_df(DF_BYTE, pwd->b[9],  pws->b[9],  pwt->b[9]);
 423    pwd->b[10] = msa_binsr_df(DF_BYTE, pwd->b[10], pws->b[10], pwt->b[10]);
 424    pwd->b[11] = msa_binsr_df(DF_BYTE, pwd->b[11], pws->b[11], pwt->b[11]);
 425    pwd->b[12] = msa_binsr_df(DF_BYTE, pwd->b[12], pws->b[12], pwt->b[12]);
 426    pwd->b[13] = msa_binsr_df(DF_BYTE, pwd->b[13], pws->b[13], pwt->b[13]);
 427    pwd->b[14] = msa_binsr_df(DF_BYTE, pwd->b[14], pws->b[14], pwt->b[14]);
 428    pwd->b[15] = msa_binsr_df(DF_BYTE, pwd->b[15], pws->b[15], pwt->b[15]);
 429}
 430
 431void helper_msa_binsr_h(CPUMIPSState *env,
 432                        uint32_t wd, uint32_t ws, uint32_t wt)
 433{
 434    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
 435    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
 436    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 437
 438    pwd->h[0]  = msa_binsr_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
 439    pwd->h[1]  = msa_binsr_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
 440    pwd->h[2]  = msa_binsr_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
 441    pwd->h[3]  = msa_binsr_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
 442    pwd->h[4]  = msa_binsr_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
 443    pwd->h[5]  = msa_binsr_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
 444    pwd->h[6]  = msa_binsr_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
 445    pwd->h[7]  = msa_binsr_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
 446}
 447
 448void helper_msa_binsr_w(CPUMIPSState *env,
 449                        uint32_t wd, uint32_t ws, uint32_t wt)
 450{
 451    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
 452    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
 453    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 454
 455    pwd->w[0]  = msa_binsr_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
 456    pwd->w[1]  = msa_binsr_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
 457    pwd->w[2]  = msa_binsr_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
 458    pwd->w[3]  = msa_binsr_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
 459}
 460
 461void helper_msa_binsr_d(CPUMIPSState *env,
 462                        uint32_t wd, uint32_t ws, uint32_t wt)
 463{
 464    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
 465    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
 466    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 467
 468    pwd->d[0]  = msa_binsr_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
 469    pwd->d[1]  = msa_binsr_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
 470}
 471
 472void helper_msa_bmnz_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
 473{
 474    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
 475    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
 476    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 477
 478    pwd->d[0] = UNSIGNED(                                                     \
 479        ((pwd->d[0] & (~pwt->d[0])) | (pws->d[0] & pwt->d[0])), DF_DOUBLE);
 480    pwd->d[1] = UNSIGNED(                                                     \
 481        ((pwd->d[1] & (~pwt->d[1])) | (pws->d[1] & pwt->d[1])), DF_DOUBLE);
 482}
 483
 484void helper_msa_bmz_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
 485{
 486    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
 487    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
 488    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 489
 490    pwd->d[0] = UNSIGNED(                                                     \
 491        ((pwd->d[0] & pwt->d[0]) | (pws->d[0] & (~pwt->d[0]))), DF_DOUBLE);
 492    pwd->d[1] = UNSIGNED(                                                     \
 493        ((pwd->d[1] & pwt->d[1]) | (pws->d[1] & (~pwt->d[1]))), DF_DOUBLE);
 494}
 495
 496void helper_msa_bsel_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
 497{
 498    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
 499    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
 500    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 501
 502    pwd->d[0] = UNSIGNED(                                                     \
 503        (pws->d[0] & (~pwd->d[0])) | (pwt->d[0] & pwd->d[0]), DF_DOUBLE);
 504    pwd->d[1] = UNSIGNED(                                                     \
 505        (pws->d[1] & (~pwd->d[1])) | (pwt->d[1] & pwd->d[1]), DF_DOUBLE);
 506}
 507
 508
 509/*
 510 * Bit Set
 511 * -------
 512 *
 513 * +---------------+----------------------------------------------------------+
 514 * | BCLR.B        | Vector Bit Clear (byte)                                  |
 515 * | BCLR.H        | Vector Bit Clear (halfword)                              |
 516 * | BCLR.W        | Vector Bit Clear (word)                                  |
 517 * | BCLR.D        | Vector Bit Clear (doubleword)                            |
 518 * | BNEG.B        | Vector Bit Negate (byte)                                 |
 519 * | BNEG.H        | Vector Bit Negate (halfword)                             |
 520 * | BNEG.W        | Vector Bit Negate (word)                                 |
 521 * | BNEG.D        | Vector Bit Negate (doubleword)                           |
 522 * | BSET.B        | Vector Bit Set (byte)                                    |
 523 * | BSET.H        | Vector Bit Set (halfword)                                |
 524 * | BSET.W        | Vector Bit Set (word)                                    |
 525 * | BSET.D        | Vector Bit Set (doubleword)                              |
 526 * +---------------+----------------------------------------------------------+
 527 */
 528
 529static inline int64_t msa_bclr_df(uint32_t df, int64_t arg1, int64_t arg2)
 530{
 531    int32_t b_arg2 = BIT_POSITION(arg2, df);
 532    return UNSIGNED(arg1 & (~(1LL << b_arg2)), df);
 533}
 534
 535void helper_msa_bclr_b(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
 536{
 537    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
 538    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
 539    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 540
 541    pwd->b[0]  = msa_bclr_df(DF_BYTE, pws->b[0],  pwt->b[0]);
 542    pwd->b[1]  = msa_bclr_df(DF_BYTE, pws->b[1],  pwt->b[1]);
 543    pwd->b[2]  = msa_bclr_df(DF_BYTE, pws->b[2],  pwt->b[2]);
 544    pwd->b[3]  = msa_bclr_df(DF_BYTE, pws->b[3],  pwt->b[3]);
 545    pwd->b[4]  = msa_bclr_df(DF_BYTE, pws->b[4],  pwt->b[4]);
 546    pwd->b[5]  = msa_bclr_df(DF_BYTE, pws->b[5],  pwt->b[5]);
 547    pwd->b[6]  = msa_bclr_df(DF_BYTE, pws->b[6],  pwt->b[6]);
 548    pwd->b[7]  = msa_bclr_df(DF_BYTE, pws->b[7],  pwt->b[7]);
 549    pwd->b[8]  = msa_bclr_df(DF_BYTE, pws->b[8],  pwt->b[8]);
 550    pwd->b[9]  = msa_bclr_df(DF_BYTE, pws->b[9],  pwt->b[9]);
 551    pwd->b[10] = msa_bclr_df(DF_BYTE, pws->b[10], pwt->b[10]);
 552    pwd->b[11] = msa_bclr_df(DF_BYTE, pws->b[11], pwt->b[11]);
 553    pwd->b[12] = msa_bclr_df(DF_BYTE, pws->b[12], pwt->b[12]);
 554    pwd->b[13] = msa_bclr_df(DF_BYTE, pws->b[13], pwt->b[13]);
 555    pwd->b[14] = msa_bclr_df(DF_BYTE, pws->b[14], pwt->b[14]);
 556    pwd->b[15] = msa_bclr_df(DF_BYTE, pws->b[15], pwt->b[15]);
 557}
 558
 559void helper_msa_bclr_h(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
 560{
 561    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
 562    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
 563    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 564
 565    pwd->h[0]  = msa_bclr_df(DF_HALF, pws->h[0],  pwt->h[0]);
 566    pwd->h[1]  = msa_bclr_df(DF_HALF, pws->h[1],  pwt->h[1]);
 567    pwd->h[2]  = msa_bclr_df(DF_HALF, pws->h[2],  pwt->h[2]);
 568    pwd->h[3]  = msa_bclr_df(DF_HALF, pws->h[3],  pwt->h[3]);
 569    pwd->h[4]  = msa_bclr_df(DF_HALF, pws->h[4],  pwt->h[4]);
 570    pwd->h[5]  = msa_bclr_df(DF_HALF, pws->h[5],  pwt->h[5]);
 571    pwd->h[6]  = msa_bclr_df(DF_HALF, pws->h[6],  pwt->h[6]);
 572    pwd->h[7]  = msa_bclr_df(DF_HALF, pws->h[7],  pwt->h[7]);
 573}
 574
 575void helper_msa_bclr_w(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
 576{
 577    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
 578    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
 579    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 580
 581    pwd->w[0]  = msa_bclr_df(DF_WORD, pws->w[0],  pwt->w[0]);
 582    pwd->w[1]  = msa_bclr_df(DF_WORD, pws->w[1],  pwt->w[1]);
 583    pwd->w[2]  = msa_bclr_df(DF_WORD, pws->w[2],  pwt->w[2]);
 584    pwd->w[3]  = msa_bclr_df(DF_WORD, pws->w[3],  pwt->w[3]);
 585}
 586
 587void helper_msa_bclr_d(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
 588{
 589    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
 590    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
 591    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 592
 593    pwd->d[0]  = msa_bclr_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
 594    pwd->d[1]  = msa_bclr_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
 595}
 596
 597static inline int64_t msa_bneg_df(uint32_t df, int64_t arg1, int64_t arg2)
 598{
 599    int32_t b_arg2 = BIT_POSITION(arg2, df);
 600    return UNSIGNED(arg1 ^ (1LL << b_arg2), df);
 601}
 602
 603void helper_msa_bneg_b(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
 604{
 605    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
 606    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
 607    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 608
 609    pwd->b[0]  = msa_bneg_df(DF_BYTE, pws->b[0],  pwt->b[0]);
 610    pwd->b[1]  = msa_bneg_df(DF_BYTE, pws->b[1],  pwt->b[1]);
 611    pwd->b[2]  = msa_bneg_df(DF_BYTE, pws->b[2],  pwt->b[2]);
 612    pwd->b[3]  = msa_bneg_df(DF_BYTE, pws->b[3],  pwt->b[3]);
 613    pwd->b[4]  = msa_bneg_df(DF_BYTE, pws->b[4],  pwt->b[4]);
 614    pwd->b[5]  = msa_bneg_df(DF_BYTE, pws->b[5],  pwt->b[5]);
 615    pwd->b[6]  = msa_bneg_df(DF_BYTE, pws->b[6],  pwt->b[6]);
 616    pwd->b[7]  = msa_bneg_df(DF_BYTE, pws->b[7],  pwt->b[7]);
 617    pwd->b[8]  = msa_bneg_df(DF_BYTE, pws->b[8],  pwt->b[8]);
 618    pwd->b[9]  = msa_bneg_df(DF_BYTE, pws->b[9],  pwt->b[9]);
 619    pwd->b[10] = msa_bneg_df(DF_BYTE, pws->b[10], pwt->b[10]);
 620    pwd->b[11] = msa_bneg_df(DF_BYTE, pws->b[11], pwt->b[11]);
 621    pwd->b[12] = msa_bneg_df(DF_BYTE, pws->b[12], pwt->b[12]);
 622    pwd->b[13] = msa_bneg_df(DF_BYTE, pws->b[13], pwt->b[13]);
 623    pwd->b[14] = msa_bneg_df(DF_BYTE, pws->b[14], pwt->b[14]);
 624    pwd->b[15] = msa_bneg_df(DF_BYTE, pws->b[15], pwt->b[15]);
 625}
 626
 627void helper_msa_bneg_h(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
 628{
 629    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
 630    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
 631    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 632
 633    pwd->h[0]  = msa_bneg_df(DF_HALF, pws->h[0],  pwt->h[0]);
 634    pwd->h[1]  = msa_bneg_df(DF_HALF, pws->h[1],  pwt->h[1]);
 635    pwd->h[2]  = msa_bneg_df(DF_HALF, pws->h[2],  pwt->h[2]);
 636    pwd->h[3]  = msa_bneg_df(DF_HALF, pws->h[3],  pwt->h[3]);
 637    pwd->h[4]  = msa_bneg_df(DF_HALF, pws->h[4],  pwt->h[4]);
 638    pwd->h[5]  = msa_bneg_df(DF_HALF, pws->h[5],  pwt->h[5]);
 639    pwd->h[6]  = msa_bneg_df(DF_HALF, pws->h[6],  pwt->h[6]);
 640    pwd->h[7]  = msa_bneg_df(DF_HALF, pws->h[7],  pwt->h[7]);
 641}
 642
 643void helper_msa_bneg_w(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
 644{
 645    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
 646    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
 647    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 648
 649    pwd->w[0]  = msa_bneg_df(DF_WORD, pws->w[0],  pwt->w[0]);
 650    pwd->w[1]  = msa_bneg_df(DF_WORD, pws->w[1],  pwt->w[1]);
 651    pwd->w[2]  = msa_bneg_df(DF_WORD, pws->w[2],  pwt->w[2]);
 652    pwd->w[3]  = msa_bneg_df(DF_WORD, pws->w[3],  pwt->w[3]);
 653}
 654
 655void helper_msa_bneg_d(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
 656{
 657    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
 658    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
 659    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 660
 661    pwd->d[0]  = msa_bneg_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
 662    pwd->d[1]  = msa_bneg_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
 663}
 664
 665static inline int64_t msa_bset_df(uint32_t df, int64_t arg1,
 666        int64_t arg2)
 667{
 668    int32_t b_arg2 = BIT_POSITION(arg2, df);
 669    return UNSIGNED(arg1 | (1LL << b_arg2), df);
 670}
 671
 672void helper_msa_bset_b(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
 673{
 674    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
 675    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
 676    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 677
 678    pwd->b[0]  = msa_bset_df(DF_BYTE, pws->b[0],  pwt->b[0]);
 679    pwd->b[1]  = msa_bset_df(DF_BYTE, pws->b[1],  pwt->b[1]);
 680    pwd->b[2]  = msa_bset_df(DF_BYTE, pws->b[2],  pwt->b[2]);
 681    pwd->b[3]  = msa_bset_df(DF_BYTE, pws->b[3],  pwt->b[3]);
 682    pwd->b[4]  = msa_bset_df(DF_BYTE, pws->b[4],  pwt->b[4]);
 683    pwd->b[5]  = msa_bset_df(DF_BYTE, pws->b[5],  pwt->b[5]);
 684    pwd->b[6]  = msa_bset_df(DF_BYTE, pws->b[6],  pwt->b[6]);
 685    pwd->b[7]  = msa_bset_df(DF_BYTE, pws->b[7],  pwt->b[7]);
 686    pwd->b[8]  = msa_bset_df(DF_BYTE, pws->b[8],  pwt->b[8]);
 687    pwd->b[9]  = msa_bset_df(DF_BYTE, pws->b[9],  pwt->b[9]);
 688    pwd->b[10] = msa_bset_df(DF_BYTE, pws->b[10], pwt->b[10]);
 689    pwd->b[11] = msa_bset_df(DF_BYTE, pws->b[11], pwt->b[11]);
 690    pwd->b[12] = msa_bset_df(DF_BYTE, pws->b[12], pwt->b[12]);
 691    pwd->b[13] = msa_bset_df(DF_BYTE, pws->b[13], pwt->b[13]);
 692    pwd->b[14] = msa_bset_df(DF_BYTE, pws->b[14], pwt->b[14]);
 693    pwd->b[15] = msa_bset_df(DF_BYTE, pws->b[15], pwt->b[15]);
 694}
 695
 696void helper_msa_bset_h(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
 697{
 698    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
 699    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
 700    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 701
 702    pwd->h[0]  = msa_bset_df(DF_HALF, pws->h[0],  pwt->h[0]);
 703    pwd->h[1]  = msa_bset_df(DF_HALF, pws->h[1],  pwt->h[1]);
 704    pwd->h[2]  = msa_bset_df(DF_HALF, pws->h[2],  pwt->h[2]);
 705    pwd->h[3]  = msa_bset_df(DF_HALF, pws->h[3],  pwt->h[3]);
 706    pwd->h[4]  = msa_bset_df(DF_HALF, pws->h[4],  pwt->h[4]);
 707    pwd->h[5]  = msa_bset_df(DF_HALF, pws->h[5],  pwt->h[5]);
 708    pwd->h[6]  = msa_bset_df(DF_HALF, pws->h[6],  pwt->h[6]);
 709    pwd->h[7]  = msa_bset_df(DF_HALF, pws->h[7],  pwt->h[7]);
 710}
 711
 712void helper_msa_bset_w(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
 713{
 714    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
 715    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
 716    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 717
 718    pwd->w[0]  = msa_bset_df(DF_WORD, pws->w[0],  pwt->w[0]);
 719    pwd->w[1]  = msa_bset_df(DF_WORD, pws->w[1],  pwt->w[1]);
 720    pwd->w[2]  = msa_bset_df(DF_WORD, pws->w[2],  pwt->w[2]);
 721    pwd->w[3]  = msa_bset_df(DF_WORD, pws->w[3],  pwt->w[3]);
 722}
 723
 724void helper_msa_bset_d(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
 725{
 726    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
 727    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
 728    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 729
 730    pwd->d[0]  = msa_bset_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
 731    pwd->d[1]  = msa_bset_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
 732}
 733
 734
 735/*
 736 * Fixed Multiply
 737 * --------------
 738 *
 739 * +---------------+----------------------------------------------------------+
 740 * | MADD_Q.H      | Vector Fixed-Point Multiply and Add (halfword)           |
 741 * | MADD_Q.W      | Vector Fixed-Point Multiply and Add (word)               |
 742 * | MADDR_Q.H     | Vector Fixed-Point Multiply and Add Rounded (halfword)   |
 743 * | MADDR_Q.W     | Vector Fixed-Point Multiply and Add Rounded (word)       |
 744 * | MSUB_Q.H      | Vector Fixed-Point Multiply and Subtr. (halfword)        |
 745 * | MSUB_Q.W      | Vector Fixed-Point Multiply and Subtr. (word)            |
 746 * | MSUBR_Q.H     | Vector Fixed-Point Multiply and Subtr. Rounded (halfword)|
 747 * | MSUBR_Q.W     | Vector Fixed-Point Multiply and Subtr. Rounded (word)    |
 748 * | MUL_Q.H       | Vector Fixed-Point Multiply (halfword)                   |
 749 * | MUL_Q.W       | Vector Fixed-Point Multiply (word)                       |
 750 * | MULR_Q.H      | Vector Fixed-Point Multiply Rounded (halfword)           |
 751 * | MULR_Q.W      | Vector Fixed-Point Multiply Rounded (word)               |
 752 * +---------------+----------------------------------------------------------+
 753 */
 754
 755/* TODO: insert Fixed Multiply group helpers here */
 756
 757
 758/*
 759 * Float Max Min
 760 * -------------
 761 *
 762 * +---------------+----------------------------------------------------------+
 763 * | FMAX_A.W      | Vector Floating-Point Maximum (Absolute) (word)          |
 764 * | FMAX_A.D      | Vector Floating-Point Maximum (Absolute) (doubleword)    |
 765 * | FMAX.W        | Vector Floating-Point Maximum (word)                     |
 766 * | FMAX.D        | Vector Floating-Point Maximum (doubleword)               |
 767 * | FMIN_A.W      | Vector Floating-Point Minimum (Absolute) (word)          |
 768 * | FMIN_A.D      | Vector Floating-Point Minimum (Absolute) (doubleword)    |
 769 * | FMIN.W        | Vector Floating-Point Minimum (word)                     |
 770 * | FMIN.D        | Vector Floating-Point Minimum (doubleword)               |
 771 * +---------------+----------------------------------------------------------+
 772 */
 773
 774/* TODO: insert Float Max Min group helpers here */
 775
 776
 777/*
 778 * Int Add
 779 * -------
 780 *
 781 * +---------------+----------------------------------------------------------+
 782 * | ADD_A.B       | Vector Add Absolute Values (byte)                        |
 783 * | ADD_A.H       | Vector Add Absolute Values (halfword)                    |
 784 * | ADD_A.W       | Vector Add Absolute Values (word)                        |
 785 * | ADD_A.D       | Vector Add Absolute Values (doubleword)                  |
 786 * | ADDS_A.B      | Vector Signed Saturated Add (of Absolute) (byte)         |
 787 * | ADDS_A.H      | Vector Signed Saturated Add (of Absolute) (halfword)     |
 788 * | ADDS_A.W      | Vector Signed Saturated Add (of Absolute) (word)         |
 789 * | ADDS_A.D      | Vector Signed Saturated Add (of Absolute) (doubleword)   |
 790 * | ADDS_S.B      | Vector Signed Saturated Add (of Signed) (byte)           |
 791 * | ADDS_S.H      | Vector Signed Saturated Add (of Signed) (halfword)       |
 792 * | ADDS_S.W      | Vector Signed Saturated Add (of Signed) (word)           |
 793 * | ADDS_S.D      | Vector Signed Saturated Add (of Signed) (doubleword)     |
 794 * | ADDS_U.B      | Vector Unsigned Saturated Add (of Unsigned) (byte)       |
 795 * | ADDS_U.H      | Vector Unsigned Saturated Add (of Unsigned) (halfword)   |
 796 * | ADDS_U.W      | Vector Unsigned Saturated Add (of Unsigned) (word)       |
 797 * | ADDS_U.D      | Vector Unsigned Saturated Add (of Unsigned) (doubleword) |
 798 * | ADDV.B        | Vector Add (byte)                                        |
 799 * | ADDV.H        | Vector Add (halfword)                                    |
 800 * | ADDV.W        | Vector Add (word)                                        |
 801 * | ADDV.D        | Vector Add (doubleword)                                  |
 802 * | HADD_S.H      | Vector Signed Horizontal Add (halfword)                  |
 803 * | HADD_S.W      | Vector Signed Horizontal Add (word)                      |
 804 * | HADD_S.D      | Vector Signed Horizontal Add (doubleword)                |
 805 * | HADD_U.H      | Vector Unigned Horizontal Add (halfword)                 |
 806 * | HADD_U.W      | Vector Unigned Horizontal Add (word)                     |
 807 * | HADD_U.D      | Vector Unigned Horizontal Add (doubleword)               |
 808 * +---------------+----------------------------------------------------------+
 809 */
 810
 811
 812static inline int64_t msa_add_a_df(uint32_t df, int64_t arg1, int64_t arg2)
 813{
 814    uint64_t abs_arg1 = arg1 >= 0 ? arg1 : -arg1;
 815    uint64_t abs_arg2 = arg2 >= 0 ? arg2 : -arg2;
 816    return abs_arg1 + abs_arg2;
 817}
 818
 819void helper_msa_add_a_b(CPUMIPSState *env,
 820                        uint32_t wd, uint32_t ws, uint32_t wt)
 821{
 822    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
 823    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
 824    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 825
 826    pwd->b[0]  = msa_add_a_df(DF_BYTE, pws->b[0],  pwt->b[0]);
 827    pwd->b[1]  = msa_add_a_df(DF_BYTE, pws->b[1],  pwt->b[1]);
 828    pwd->b[2]  = msa_add_a_df(DF_BYTE, pws->b[2],  pwt->b[2]);
 829    pwd->b[3]  = msa_add_a_df(DF_BYTE, pws->b[3],  pwt->b[3]);
 830    pwd->b[4]  = msa_add_a_df(DF_BYTE, pws->b[4],  pwt->b[4]);
 831    pwd->b[5]  = msa_add_a_df(DF_BYTE, pws->b[5],  pwt->b[5]);
 832    pwd->b[6]  = msa_add_a_df(DF_BYTE, pws->b[6],  pwt->b[6]);
 833    pwd->b[7]  = msa_add_a_df(DF_BYTE, pws->b[7],  pwt->b[7]);
 834    pwd->b[8]  = msa_add_a_df(DF_BYTE, pws->b[8],  pwt->b[8]);
 835    pwd->b[9]  = msa_add_a_df(DF_BYTE, pws->b[9],  pwt->b[9]);
 836    pwd->b[10] = msa_add_a_df(DF_BYTE, pws->b[10], pwt->b[10]);
 837    pwd->b[11] = msa_add_a_df(DF_BYTE, pws->b[11], pwt->b[11]);
 838    pwd->b[12] = msa_add_a_df(DF_BYTE, pws->b[12], pwt->b[12]);
 839    pwd->b[13] = msa_add_a_df(DF_BYTE, pws->b[13], pwt->b[13]);
 840    pwd->b[14] = msa_add_a_df(DF_BYTE, pws->b[14], pwt->b[14]);
 841    pwd->b[15] = msa_add_a_df(DF_BYTE, pws->b[15], pwt->b[15]);
 842}
 843
 844void helper_msa_add_a_h(CPUMIPSState *env,
 845                        uint32_t wd, uint32_t ws, uint32_t wt)
 846{
 847    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
 848    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
 849    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 850
 851    pwd->h[0]  = msa_add_a_df(DF_HALF, pws->h[0],  pwt->h[0]);
 852    pwd->h[1]  = msa_add_a_df(DF_HALF, pws->h[1],  pwt->h[1]);
 853    pwd->h[2]  = msa_add_a_df(DF_HALF, pws->h[2],  pwt->h[2]);
 854    pwd->h[3]  = msa_add_a_df(DF_HALF, pws->h[3],  pwt->h[3]);
 855    pwd->h[4]  = msa_add_a_df(DF_HALF, pws->h[4],  pwt->h[4]);
 856    pwd->h[5]  = msa_add_a_df(DF_HALF, pws->h[5],  pwt->h[5]);
 857    pwd->h[6]  = msa_add_a_df(DF_HALF, pws->h[6],  pwt->h[6]);
 858    pwd->h[7]  = msa_add_a_df(DF_HALF, pws->h[7],  pwt->h[7]);
 859}
 860
 861void helper_msa_add_a_w(CPUMIPSState *env,
 862                        uint32_t wd, uint32_t ws, uint32_t wt)
 863{
 864    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
 865    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
 866    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 867
 868    pwd->w[0]  = msa_add_a_df(DF_WORD, pws->w[0],  pwt->w[0]);
 869    pwd->w[1]  = msa_add_a_df(DF_WORD, pws->w[1],  pwt->w[1]);
 870    pwd->w[2]  = msa_add_a_df(DF_WORD, pws->w[2],  pwt->w[2]);
 871    pwd->w[3]  = msa_add_a_df(DF_WORD, pws->w[3],  pwt->w[3]);
 872}
 873
 874void helper_msa_add_a_d(CPUMIPSState *env,
 875                        uint32_t wd, uint32_t ws, uint32_t wt)
 876{
 877    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
 878    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
 879    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 880
 881    pwd->d[0]  = msa_add_a_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
 882    pwd->d[1]  = msa_add_a_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
 883}
 884
 885
 886static inline int64_t msa_adds_a_df(uint32_t df, int64_t arg1, int64_t arg2)
 887{
 888    uint64_t max_int = (uint64_t)DF_MAX_INT(df);
 889    uint64_t abs_arg1 = arg1 >= 0 ? arg1 : -arg1;
 890    uint64_t abs_arg2 = arg2 >= 0 ? arg2 : -arg2;
 891    if (abs_arg1 > max_int || abs_arg2 > max_int) {
 892        return (int64_t)max_int;
 893    } else {
 894        return (abs_arg1 < max_int - abs_arg2) ? abs_arg1 + abs_arg2 : max_int;
 895    }
 896}
 897
 898void helper_msa_adds_a_b(CPUMIPSState *env,
 899                         uint32_t wd, uint32_t ws, uint32_t wt)
 900{
 901    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
 902    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
 903    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 904
 905    pwd->b[0]  = msa_adds_a_df(DF_BYTE, pws->b[0],  pwt->b[0]);
 906    pwd->b[1]  = msa_adds_a_df(DF_BYTE, pws->b[1],  pwt->b[1]);
 907    pwd->b[2]  = msa_adds_a_df(DF_BYTE, pws->b[2],  pwt->b[2]);
 908    pwd->b[3]  = msa_adds_a_df(DF_BYTE, pws->b[3],  pwt->b[3]);
 909    pwd->b[4]  = msa_adds_a_df(DF_BYTE, pws->b[4],  pwt->b[4]);
 910    pwd->b[5]  = msa_adds_a_df(DF_BYTE, pws->b[5],  pwt->b[5]);
 911    pwd->b[6]  = msa_adds_a_df(DF_BYTE, pws->b[6],  pwt->b[6]);
 912    pwd->b[7]  = msa_adds_a_df(DF_BYTE, pws->b[7],  pwt->b[7]);
 913    pwd->b[8]  = msa_adds_a_df(DF_BYTE, pws->b[8],  pwt->b[8]);
 914    pwd->b[9]  = msa_adds_a_df(DF_BYTE, pws->b[9],  pwt->b[9]);
 915    pwd->b[10] = msa_adds_a_df(DF_BYTE, pws->b[10], pwt->b[10]);
 916    pwd->b[11] = msa_adds_a_df(DF_BYTE, pws->b[11], pwt->b[11]);
 917    pwd->b[12] = msa_adds_a_df(DF_BYTE, pws->b[12], pwt->b[12]);
 918    pwd->b[13] = msa_adds_a_df(DF_BYTE, pws->b[13], pwt->b[13]);
 919    pwd->b[14] = msa_adds_a_df(DF_BYTE, pws->b[14], pwt->b[14]);
 920    pwd->b[15] = msa_adds_a_df(DF_BYTE, pws->b[15], pwt->b[15]);
 921}
 922
 923void helper_msa_adds_a_h(CPUMIPSState *env,
 924                         uint32_t wd, uint32_t ws, uint32_t wt)
 925{
 926    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
 927    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
 928    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 929
 930    pwd->h[0]  = msa_adds_a_df(DF_HALF, pws->h[0],  pwt->h[0]);
 931    pwd->h[1]  = msa_adds_a_df(DF_HALF, pws->h[1],  pwt->h[1]);
 932    pwd->h[2]  = msa_adds_a_df(DF_HALF, pws->h[2],  pwt->h[2]);
 933    pwd->h[3]  = msa_adds_a_df(DF_HALF, pws->h[3],  pwt->h[3]);
 934    pwd->h[4]  = msa_adds_a_df(DF_HALF, pws->h[4],  pwt->h[4]);
 935    pwd->h[5]  = msa_adds_a_df(DF_HALF, pws->h[5],  pwt->h[5]);
 936    pwd->h[6]  = msa_adds_a_df(DF_HALF, pws->h[6],  pwt->h[6]);
 937    pwd->h[7]  = msa_adds_a_df(DF_HALF, pws->h[7],  pwt->h[7]);
 938}
 939
 940void helper_msa_adds_a_w(CPUMIPSState *env,
 941                         uint32_t wd, uint32_t ws, uint32_t wt)
 942{
 943    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
 944    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
 945    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 946
 947    pwd->w[0]  = msa_adds_a_df(DF_WORD, pws->w[0],  pwt->w[0]);
 948    pwd->w[1]  = msa_adds_a_df(DF_WORD, pws->w[1],  pwt->w[1]);
 949    pwd->w[2]  = msa_adds_a_df(DF_WORD, pws->w[2],  pwt->w[2]);
 950    pwd->w[3]  = msa_adds_a_df(DF_WORD, pws->w[3],  pwt->w[3]);
 951}
 952
 953void helper_msa_adds_a_d(CPUMIPSState *env,
 954                         uint32_t wd, uint32_t ws, uint32_t wt)
 955{
 956    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
 957    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
 958    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 959
 960    pwd->d[0]  = msa_adds_a_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
 961    pwd->d[1]  = msa_adds_a_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
 962}
 963
 964
 965static inline int64_t msa_adds_s_df(uint32_t df, int64_t arg1, int64_t arg2)
 966{
 967    int64_t max_int = DF_MAX_INT(df);
 968    int64_t min_int = DF_MIN_INT(df);
 969    if (arg1 < 0) {
 970        return (min_int - arg1 < arg2) ? arg1 + arg2 : min_int;
 971    } else {
 972        return (arg2 < max_int - arg1) ? arg1 + arg2 : max_int;
 973    }
 974}
 975
 976void helper_msa_adds_s_b(CPUMIPSState *env,
 977                         uint32_t wd, uint32_t ws, uint32_t wt)
 978{
 979    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
 980    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
 981    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
 982
 983    pwd->b[0]  = msa_adds_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
 984    pwd->b[1]  = msa_adds_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
 985    pwd->b[2]  = msa_adds_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
 986    pwd->b[3]  = msa_adds_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
 987    pwd->b[4]  = msa_adds_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
 988    pwd->b[5]  = msa_adds_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
 989    pwd->b[6]  = msa_adds_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
 990    pwd->b[7]  = msa_adds_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
 991    pwd->b[8]  = msa_adds_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
 992    pwd->b[9]  = msa_adds_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
 993    pwd->b[10] = msa_adds_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
 994    pwd->b[11] = msa_adds_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
 995    pwd->b[12] = msa_adds_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
 996    pwd->b[13] = msa_adds_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
 997    pwd->b[14] = msa_adds_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
 998    pwd->b[15] = msa_adds_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
 999}
1000
1001void helper_msa_adds_s_h(CPUMIPSState *env,
1002                         uint32_t wd, uint32_t ws, uint32_t wt)
1003{
1004    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1005    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1006    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1007
1008    pwd->h[0]  = msa_adds_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
1009    pwd->h[1]  = msa_adds_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
1010    pwd->h[2]  = msa_adds_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
1011    pwd->h[3]  = msa_adds_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
1012    pwd->h[4]  = msa_adds_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
1013    pwd->h[5]  = msa_adds_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
1014    pwd->h[6]  = msa_adds_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
1015    pwd->h[7]  = msa_adds_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
1016}
1017
1018void helper_msa_adds_s_w(CPUMIPSState *env,
1019                         uint32_t wd, uint32_t ws, uint32_t wt)
1020{
1021    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1022    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1023    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1024
1025    pwd->w[0]  = msa_adds_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
1026    pwd->w[1]  = msa_adds_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
1027    pwd->w[2]  = msa_adds_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
1028    pwd->w[3]  = msa_adds_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
1029}
1030
1031void helper_msa_adds_s_d(CPUMIPSState *env,
1032                         uint32_t wd, uint32_t ws, uint32_t wt)
1033{
1034    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1035    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1036    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1037
1038    pwd->d[0]  = msa_adds_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1039    pwd->d[1]  = msa_adds_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1040}
1041
1042
1043static inline uint64_t msa_adds_u_df(uint32_t df, uint64_t arg1, uint64_t arg2)
1044{
1045    uint64_t max_uint = DF_MAX_UINT(df);
1046    uint64_t u_arg1 = UNSIGNED(arg1, df);
1047    uint64_t u_arg2 = UNSIGNED(arg2, df);
1048    return (u_arg1 < max_uint - u_arg2) ? u_arg1 + u_arg2 : max_uint;
1049}
1050
1051void helper_msa_adds_u_b(CPUMIPSState *env,
1052                         uint32_t wd, uint32_t ws, uint32_t wt)
1053{
1054    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1055    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1056    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1057
1058    pwd->b[0]  = msa_adds_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1059    pwd->b[1]  = msa_adds_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1060    pwd->b[2]  = msa_adds_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1061    pwd->b[3]  = msa_adds_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1062    pwd->b[4]  = msa_adds_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1063    pwd->b[5]  = msa_adds_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1064    pwd->b[6]  = msa_adds_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1065    pwd->b[7]  = msa_adds_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1066    pwd->b[8]  = msa_adds_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1067    pwd->b[9]  = msa_adds_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1068    pwd->b[10] = msa_adds_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
1069    pwd->b[11] = msa_adds_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
1070    pwd->b[12] = msa_adds_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
1071    pwd->b[13] = msa_adds_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
1072    pwd->b[14] = msa_adds_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
1073    pwd->b[15] = msa_adds_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
1074}
1075
1076void helper_msa_adds_u_h(CPUMIPSState *env,
1077                         uint32_t wd, uint32_t ws, uint32_t wt)
1078{
1079    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1080    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1081    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1082
1083    pwd->h[0]  = msa_adds_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
1084    pwd->h[1]  = msa_adds_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
1085    pwd->h[2]  = msa_adds_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
1086    pwd->h[3]  = msa_adds_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
1087    pwd->h[4]  = msa_adds_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
1088    pwd->h[5]  = msa_adds_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
1089    pwd->h[6]  = msa_adds_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
1090    pwd->h[7]  = msa_adds_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
1091}
1092
1093void helper_msa_adds_u_w(CPUMIPSState *env,
1094                         uint32_t wd, uint32_t ws, uint32_t wt)
1095{
1096    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1097    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1098    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1099
1100    pwd->w[0]  = msa_adds_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
1101    pwd->w[1]  = msa_adds_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
1102    pwd->w[2]  = msa_adds_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
1103    pwd->w[3]  = msa_adds_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
1104}
1105
1106void helper_msa_adds_u_d(CPUMIPSState *env,
1107                         uint32_t wd, uint32_t ws, uint32_t wt)
1108{
1109    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1110    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1111    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1112
1113    pwd->d[0]  = msa_adds_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1114    pwd->d[1]  = msa_adds_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1115}
1116
1117
1118static inline int64_t msa_addv_df(uint32_t df, int64_t arg1, int64_t arg2)
1119{
1120    return arg1 + arg2;
1121}
1122
1123void helper_msa_addv_b(CPUMIPSState *env,
1124                       uint32_t wd, uint32_t ws, uint32_t wt)
1125{
1126    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1127    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1128    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1129
1130    pwd->b[0]  = msa_addv_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1131    pwd->b[1]  = msa_addv_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1132    pwd->b[2]  = msa_addv_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1133    pwd->b[3]  = msa_addv_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1134    pwd->b[4]  = msa_addv_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1135    pwd->b[5]  = msa_addv_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1136    pwd->b[6]  = msa_addv_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1137    pwd->b[7]  = msa_addv_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1138    pwd->b[8]  = msa_addv_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1139    pwd->b[9]  = msa_addv_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1140    pwd->b[10] = msa_addv_df(DF_BYTE, pws->b[10], pwt->b[10]);
1141    pwd->b[11] = msa_addv_df(DF_BYTE, pws->b[11], pwt->b[11]);
1142    pwd->b[12] = msa_addv_df(DF_BYTE, pws->b[12], pwt->b[12]);
1143    pwd->b[13] = msa_addv_df(DF_BYTE, pws->b[13], pwt->b[13]);
1144    pwd->b[14] = msa_addv_df(DF_BYTE, pws->b[14], pwt->b[14]);
1145    pwd->b[15] = msa_addv_df(DF_BYTE, pws->b[15], pwt->b[15]);
1146}
1147
1148void helper_msa_addv_h(CPUMIPSState *env,
1149                       uint32_t wd, uint32_t ws, uint32_t wt)
1150{
1151    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1152    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1153    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1154
1155    pwd->h[0]  = msa_addv_df(DF_HALF, pws->h[0],  pwt->h[0]);
1156    pwd->h[1]  = msa_addv_df(DF_HALF, pws->h[1],  pwt->h[1]);
1157    pwd->h[2]  = msa_addv_df(DF_HALF, pws->h[2],  pwt->h[2]);
1158    pwd->h[3]  = msa_addv_df(DF_HALF, pws->h[3],  pwt->h[3]);
1159    pwd->h[4]  = msa_addv_df(DF_HALF, pws->h[4],  pwt->h[4]);
1160    pwd->h[5]  = msa_addv_df(DF_HALF, pws->h[5],  pwt->h[5]);
1161    pwd->h[6]  = msa_addv_df(DF_HALF, pws->h[6],  pwt->h[6]);
1162    pwd->h[7]  = msa_addv_df(DF_HALF, pws->h[7],  pwt->h[7]);
1163}
1164
1165void helper_msa_addv_w(CPUMIPSState *env,
1166                       uint32_t wd, uint32_t ws, uint32_t wt)
1167{
1168    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1169    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1170    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1171
1172    pwd->w[0]  = msa_addv_df(DF_WORD, pws->w[0],  pwt->w[0]);
1173    pwd->w[1]  = msa_addv_df(DF_WORD, pws->w[1],  pwt->w[1]);
1174    pwd->w[2]  = msa_addv_df(DF_WORD, pws->w[2],  pwt->w[2]);
1175    pwd->w[3]  = msa_addv_df(DF_WORD, pws->w[3],  pwt->w[3]);
1176}
1177
1178void helper_msa_addv_d(CPUMIPSState *env,
1179                       uint32_t wd, uint32_t ws, uint32_t wt)
1180{
1181    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1182    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1183    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1184
1185    pwd->d[0]  = msa_addv_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1186    pwd->d[1]  = msa_addv_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1187}
1188
1189
1190#define SIGNED_EVEN(a, df) \
1191        ((((int64_t)(a)) << (64 - DF_BITS(df) / 2)) >> (64 - DF_BITS(df) / 2))
1192
1193#define UNSIGNED_EVEN(a, df) \
1194        ((((uint64_t)(a)) << (64 - DF_BITS(df) / 2)) >> (64 - DF_BITS(df) / 2))
1195
1196#define SIGNED_ODD(a, df) \
1197        ((((int64_t)(a)) << (64 - DF_BITS(df))) >> (64 - DF_BITS(df) / 2))
1198
1199#define UNSIGNED_ODD(a, df) \
1200        ((((uint64_t)(a)) << (64 - DF_BITS(df))) >> (64 - DF_BITS(df) / 2))
1201
1202
1203static inline int64_t msa_hadd_s_df(uint32_t df, int64_t arg1, int64_t arg2)
1204{
1205    return SIGNED_ODD(arg1, df) + SIGNED_EVEN(arg2, df);
1206}
1207
1208void helper_msa_hadd_s_h(CPUMIPSState *env,
1209                         uint32_t wd, uint32_t ws, uint32_t wt)
1210{
1211    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1212    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1213    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1214
1215    pwd->h[0]  = msa_hadd_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
1216    pwd->h[1]  = msa_hadd_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
1217    pwd->h[2]  = msa_hadd_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
1218    pwd->h[3]  = msa_hadd_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
1219    pwd->h[4]  = msa_hadd_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
1220    pwd->h[5]  = msa_hadd_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
1221    pwd->h[6]  = msa_hadd_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
1222    pwd->h[7]  = msa_hadd_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
1223}
1224
1225void helper_msa_hadd_s_w(CPUMIPSState *env,
1226                         uint32_t wd, uint32_t ws, uint32_t wt)
1227{
1228    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1229    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1230    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1231
1232    pwd->w[0]  = msa_hadd_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
1233    pwd->w[1]  = msa_hadd_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
1234    pwd->w[2]  = msa_hadd_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
1235    pwd->w[3]  = msa_hadd_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
1236}
1237
1238void helper_msa_hadd_s_d(CPUMIPSState *env,
1239                         uint32_t wd, uint32_t ws, uint32_t wt)
1240{
1241    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1242    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1243    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1244
1245    pwd->d[0]  = msa_hadd_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1246    pwd->d[1]  = msa_hadd_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1247}
1248
1249
1250static inline int64_t msa_hadd_u_df(uint32_t df, int64_t arg1, int64_t arg2)
1251{
1252    return UNSIGNED_ODD(arg1, df) + UNSIGNED_EVEN(arg2, df);
1253}
1254
1255void helper_msa_hadd_u_h(CPUMIPSState *env,
1256                         uint32_t wd, uint32_t ws, uint32_t wt)
1257{
1258    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1259    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1260    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1261
1262    pwd->h[0]  = msa_hadd_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
1263    pwd->h[1]  = msa_hadd_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
1264    pwd->h[2]  = msa_hadd_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
1265    pwd->h[3]  = msa_hadd_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
1266    pwd->h[4]  = msa_hadd_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
1267    pwd->h[5]  = msa_hadd_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
1268    pwd->h[6]  = msa_hadd_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
1269    pwd->h[7]  = msa_hadd_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
1270}
1271
1272void helper_msa_hadd_u_w(CPUMIPSState *env,
1273                         uint32_t wd, uint32_t ws, uint32_t wt)
1274{
1275    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1276    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1277    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1278
1279    pwd->w[0]  = msa_hadd_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
1280    pwd->w[1]  = msa_hadd_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
1281    pwd->w[2]  = msa_hadd_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
1282    pwd->w[3]  = msa_hadd_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
1283}
1284
1285void helper_msa_hadd_u_d(CPUMIPSState *env,
1286                         uint32_t wd, uint32_t ws, uint32_t wt)
1287{
1288    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1289    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1290    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1291
1292    pwd->d[0]  = msa_hadd_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1293    pwd->d[1]  = msa_hadd_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1294}
1295
1296
1297/*
1298 * Int Average
1299 * -----------
1300 *
1301 * +---------------+----------------------------------------------------------+
1302 * | AVE_S.B       | Vector Signed Average (byte)                             |
1303 * | AVE_S.H       | Vector Signed Average (halfword)                         |
1304 * | AVE_S.W       | Vector Signed Average (word)                             |
1305 * | AVE_S.D       | Vector Signed Average (doubleword)                       |
1306 * | AVE_U.B       | Vector Unsigned Average (byte)                           |
1307 * | AVE_U.H       | Vector Unsigned Average (halfword)                       |
1308 * | AVE_U.W       | Vector Unsigned Average (word)                           |
1309 * | AVE_U.D       | Vector Unsigned Average (doubleword)                     |
1310 * | AVER_S.B      | Vector Signed Average Rounded (byte)                     |
1311 * | AVER_S.H      | Vector Signed Average Rounded (halfword)                 |
1312 * | AVER_S.W      | Vector Signed Average Rounded (word)                     |
1313 * | AVER_S.D      | Vector Signed Average Rounded (doubleword)               |
1314 * | AVER_U.B      | Vector Unsigned Average Rounded (byte)                   |
1315 * | AVER_U.H      | Vector Unsigned Average Rounded (halfword)               |
1316 * | AVER_U.W      | Vector Unsigned Average Rounded (word)                   |
1317 * | AVER_U.D      | Vector Unsigned Average Rounded (doubleword)             |
1318 * +---------------+----------------------------------------------------------+
1319 */
1320
1321static inline int64_t msa_ave_s_df(uint32_t df, int64_t arg1, int64_t arg2)
1322{
1323    /* signed shift */
1324    return (arg1 >> 1) + (arg2 >> 1) + (arg1 & arg2 & 1);
1325}
1326
1327void helper_msa_ave_s_b(CPUMIPSState *env,
1328                        uint32_t wd, uint32_t ws, uint32_t wt)
1329{
1330    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1331    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1332    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1333
1334    pwd->b[0]  = msa_ave_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1335    pwd->b[1]  = msa_ave_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1336    pwd->b[2]  = msa_ave_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1337    pwd->b[3]  = msa_ave_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1338    pwd->b[4]  = msa_ave_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1339    pwd->b[5]  = msa_ave_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1340    pwd->b[6]  = msa_ave_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1341    pwd->b[7]  = msa_ave_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1342    pwd->b[8]  = msa_ave_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1343    pwd->b[9]  = msa_ave_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1344    pwd->b[10] = msa_ave_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
1345    pwd->b[11] = msa_ave_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
1346    pwd->b[12] = msa_ave_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
1347    pwd->b[13] = msa_ave_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
1348    pwd->b[14] = msa_ave_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
1349    pwd->b[15] = msa_ave_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
1350}
1351
1352void helper_msa_ave_s_h(CPUMIPSState *env,
1353                        uint32_t wd, uint32_t ws, uint32_t wt)
1354{
1355    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1356    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1357    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1358
1359    pwd->h[0]  = msa_ave_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
1360    pwd->h[1]  = msa_ave_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
1361    pwd->h[2]  = msa_ave_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
1362    pwd->h[3]  = msa_ave_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
1363    pwd->h[4]  = msa_ave_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
1364    pwd->h[5]  = msa_ave_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
1365    pwd->h[6]  = msa_ave_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
1366    pwd->h[7]  = msa_ave_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
1367}
1368
1369void helper_msa_ave_s_w(CPUMIPSState *env,
1370                        uint32_t wd, uint32_t ws, uint32_t wt)
1371{
1372    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1373    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1374    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1375
1376    pwd->w[0]  = msa_ave_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
1377    pwd->w[1]  = msa_ave_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
1378    pwd->w[2]  = msa_ave_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
1379    pwd->w[3]  = msa_ave_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
1380}
1381
1382void helper_msa_ave_s_d(CPUMIPSState *env,
1383                        uint32_t wd, uint32_t ws, uint32_t wt)
1384{
1385    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1386    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1387    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1388
1389    pwd->d[0]  = msa_ave_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1390    pwd->d[1]  = msa_ave_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1391}
1392
1393static inline uint64_t msa_ave_u_df(uint32_t df, uint64_t arg1, uint64_t arg2)
1394{
1395    uint64_t u_arg1 = UNSIGNED(arg1, df);
1396    uint64_t u_arg2 = UNSIGNED(arg2, df);
1397    /* unsigned shift */
1398    return (u_arg1 >> 1) + (u_arg2 >> 1) + (u_arg1 & u_arg2 & 1);
1399}
1400
1401void helper_msa_ave_u_b(CPUMIPSState *env,
1402                        uint32_t wd, uint32_t ws, uint32_t wt)
1403{
1404    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1405    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1406    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1407
1408    pwd->b[0]  = msa_ave_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1409    pwd->b[1]  = msa_ave_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1410    pwd->b[2]  = msa_ave_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1411    pwd->b[3]  = msa_ave_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1412    pwd->b[4]  = msa_ave_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1413    pwd->b[5]  = msa_ave_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1414    pwd->b[6]  = msa_ave_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1415    pwd->b[7]  = msa_ave_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1416    pwd->b[8]  = msa_ave_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1417    pwd->b[9]  = msa_ave_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1418    pwd->b[10] = msa_ave_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
1419    pwd->b[11] = msa_ave_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
1420    pwd->b[12] = msa_ave_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
1421    pwd->b[13] = msa_ave_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
1422    pwd->b[14] = msa_ave_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
1423    pwd->b[15] = msa_ave_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
1424}
1425
1426void helper_msa_ave_u_h(CPUMIPSState *env,
1427                        uint32_t wd, uint32_t ws, uint32_t wt)
1428{
1429    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1430    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1431    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1432
1433    pwd->h[0]  = msa_ave_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
1434    pwd->h[1]  = msa_ave_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
1435    pwd->h[2]  = msa_ave_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
1436    pwd->h[3]  = msa_ave_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
1437    pwd->h[4]  = msa_ave_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
1438    pwd->h[5]  = msa_ave_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
1439    pwd->h[6]  = msa_ave_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
1440    pwd->h[7]  = msa_ave_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
1441}
1442
1443void helper_msa_ave_u_w(CPUMIPSState *env,
1444                        uint32_t wd, uint32_t ws, uint32_t wt)
1445{
1446    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1447    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1448    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1449
1450    pwd->w[0]  = msa_ave_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
1451    pwd->w[1]  = msa_ave_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
1452    pwd->w[2]  = msa_ave_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
1453    pwd->w[3]  = msa_ave_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
1454}
1455
1456void helper_msa_ave_u_d(CPUMIPSState *env,
1457                        uint32_t wd, uint32_t ws, uint32_t wt)
1458{
1459    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1460    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1461    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1462
1463    pwd->d[0]  = msa_ave_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1464    pwd->d[1]  = msa_ave_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1465}
1466
1467static inline int64_t msa_aver_s_df(uint32_t df, int64_t arg1, int64_t arg2)
1468{
1469    /* signed shift */
1470    return (arg1 >> 1) + (arg2 >> 1) + ((arg1 | arg2) & 1);
1471}
1472
1473void helper_msa_aver_s_b(CPUMIPSState *env,
1474                         uint32_t wd, uint32_t ws, uint32_t wt)
1475{
1476    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1477    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1478    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1479
1480    pwd->b[0]  = msa_aver_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1481    pwd->b[1]  = msa_aver_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1482    pwd->b[2]  = msa_aver_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1483    pwd->b[3]  = msa_aver_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1484    pwd->b[4]  = msa_aver_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1485    pwd->b[5]  = msa_aver_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1486    pwd->b[6]  = msa_aver_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1487    pwd->b[7]  = msa_aver_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1488    pwd->b[8]  = msa_aver_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1489    pwd->b[9]  = msa_aver_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1490    pwd->b[10] = msa_aver_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
1491    pwd->b[11] = msa_aver_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
1492    pwd->b[12] = msa_aver_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
1493    pwd->b[13] = msa_aver_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
1494    pwd->b[14] = msa_aver_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
1495    pwd->b[15] = msa_aver_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
1496}
1497
1498void helper_msa_aver_s_h(CPUMIPSState *env,
1499                         uint32_t wd, uint32_t ws, uint32_t wt)
1500{
1501    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1502    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1503    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1504
1505    pwd->h[0]  = msa_aver_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
1506    pwd->h[1]  = msa_aver_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
1507    pwd->h[2]  = msa_aver_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
1508    pwd->h[3]  = msa_aver_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
1509    pwd->h[4]  = msa_aver_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
1510    pwd->h[5]  = msa_aver_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
1511    pwd->h[6]  = msa_aver_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
1512    pwd->h[7]  = msa_aver_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
1513}
1514
1515void helper_msa_aver_s_w(CPUMIPSState *env,
1516                         uint32_t wd, uint32_t ws, uint32_t wt)
1517{
1518    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1519    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1520    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1521
1522    pwd->w[0]  = msa_aver_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
1523    pwd->w[1]  = msa_aver_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
1524    pwd->w[2]  = msa_aver_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
1525    pwd->w[3]  = msa_aver_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
1526}
1527
1528void helper_msa_aver_s_d(CPUMIPSState *env,
1529                         uint32_t wd, uint32_t ws, uint32_t wt)
1530{
1531    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1532    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1533    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1534
1535    pwd->d[0]  = msa_aver_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1536    pwd->d[1]  = msa_aver_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1537}
1538
1539static inline uint64_t msa_aver_u_df(uint32_t df, uint64_t arg1, uint64_t arg2)
1540{
1541    uint64_t u_arg1 = UNSIGNED(arg1, df);
1542    uint64_t u_arg2 = UNSIGNED(arg2, df);
1543    /* unsigned shift */
1544    return (u_arg1 >> 1) + (u_arg2 >> 1) + ((u_arg1 | u_arg2) & 1);
1545}
1546
1547void helper_msa_aver_u_b(CPUMIPSState *env,
1548                         uint32_t wd, uint32_t ws, uint32_t wt)
1549{
1550    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1551    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1552    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1553
1554    pwd->b[0]  = msa_aver_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1555    pwd->b[1]  = msa_aver_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1556    pwd->b[2]  = msa_aver_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1557    pwd->b[3]  = msa_aver_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1558    pwd->b[4]  = msa_aver_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1559    pwd->b[5]  = msa_aver_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1560    pwd->b[6]  = msa_aver_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1561    pwd->b[7]  = msa_aver_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1562    pwd->b[8]  = msa_aver_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1563    pwd->b[9]  = msa_aver_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1564    pwd->b[10] = msa_aver_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
1565    pwd->b[11] = msa_aver_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
1566    pwd->b[12] = msa_aver_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
1567    pwd->b[13] = msa_aver_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
1568    pwd->b[14] = msa_aver_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
1569    pwd->b[15] = msa_aver_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
1570}
1571
1572void helper_msa_aver_u_h(CPUMIPSState *env,
1573                         uint32_t wd, uint32_t ws, uint32_t wt)
1574{
1575    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1576    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1577    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1578
1579    pwd->h[0]  = msa_aver_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
1580    pwd->h[1]  = msa_aver_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
1581    pwd->h[2]  = msa_aver_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
1582    pwd->h[3]  = msa_aver_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
1583    pwd->h[4]  = msa_aver_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
1584    pwd->h[5]  = msa_aver_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
1585    pwd->h[6]  = msa_aver_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
1586    pwd->h[7]  = msa_aver_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
1587}
1588
1589void helper_msa_aver_u_w(CPUMIPSState *env,
1590                         uint32_t wd, uint32_t ws, uint32_t wt)
1591{
1592    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1593    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1594    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1595
1596    pwd->w[0]  = msa_aver_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
1597    pwd->w[1]  = msa_aver_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
1598    pwd->w[2]  = msa_aver_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
1599    pwd->w[3]  = msa_aver_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
1600}
1601
1602void helper_msa_aver_u_d(CPUMIPSState *env,
1603                         uint32_t wd, uint32_t ws, uint32_t wt)
1604{
1605    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1606    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1607    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1608
1609    pwd->d[0]  = msa_aver_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1610    pwd->d[1]  = msa_aver_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1611}
1612
1613
1614/*
1615 * Int Compare
1616 * -----------
1617 *
1618 * +---------------+----------------------------------------------------------+
1619 * | CEQ.B         | Vector Compare Equal (byte)                              |
1620 * | CEQ.H         | Vector Compare Equal (halfword)                          |
1621 * | CEQ.W         | Vector Compare Equal (word)                              |
1622 * | CEQ.D         | Vector Compare Equal (doubleword)                        |
1623 * | CLE_S.B       | Vector Compare Signed Less Than or Equal (byte)          |
1624 * | CLE_S.H       | Vector Compare Signed Less Than or Equal (halfword)      |
1625 * | CLE_S.W       | Vector Compare Signed Less Than or Equal (word)          |
1626 * | CLE_S.D       | Vector Compare Signed Less Than or Equal (doubleword)    |
1627 * | CLE_U.B       | Vector Compare Unsigned Less Than or Equal (byte)        |
1628 * | CLE_U.H       | Vector Compare Unsigned Less Than or Equal (halfword)    |
1629 * | CLE_U.W       | Vector Compare Unsigned Less Than or Equal (word)        |
1630 * | CLE_U.D       | Vector Compare Unsigned Less Than or Equal (doubleword)  |
1631 * | CLT_S.B       | Vector Compare Signed Less Than (byte)                   |
1632 * | CLT_S.H       | Vector Compare Signed Less Than (halfword)               |
1633 * | CLT_S.W       | Vector Compare Signed Less Than (word)                   |
1634 * | CLT_S.D       | Vector Compare Signed Less Than (doubleword)             |
1635 * | CLT_U.B       | Vector Compare Unsigned Less Than (byte)                 |
1636 * | CLT_U.H       | Vector Compare Unsigned Less Than (halfword)             |
1637 * | CLT_U.W       | Vector Compare Unsigned Less Than (word)                 |
1638 * | CLT_U.D       | Vector Compare Unsigned Less Than (doubleword)           |
1639 * +---------------+----------------------------------------------------------+
1640 */
1641
1642static inline int64_t msa_ceq_df(uint32_t df, int64_t arg1, int64_t arg2)
1643{
1644    return arg1 == arg2 ? -1 : 0;
1645}
1646
1647static inline int8_t msa_ceq_b(int8_t arg1, int8_t arg2)
1648{
1649    return arg1 == arg2 ? -1 : 0;
1650}
1651
1652void helper_msa_ceq_b(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
1653{
1654    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1655    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1656    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1657
1658    pwd->b[0]  = msa_ceq_b(pws->b[0],  pwt->b[0]);
1659    pwd->b[1]  = msa_ceq_b(pws->b[1],  pwt->b[1]);
1660    pwd->b[2]  = msa_ceq_b(pws->b[2],  pwt->b[2]);
1661    pwd->b[3]  = msa_ceq_b(pws->b[3],  pwt->b[3]);
1662    pwd->b[4]  = msa_ceq_b(pws->b[4],  pwt->b[4]);
1663    pwd->b[5]  = msa_ceq_b(pws->b[5],  pwt->b[5]);
1664    pwd->b[6]  = msa_ceq_b(pws->b[6],  pwt->b[6]);
1665    pwd->b[7]  = msa_ceq_b(pws->b[7],  pwt->b[7]);
1666    pwd->b[8]  = msa_ceq_b(pws->b[8],  pwt->b[8]);
1667    pwd->b[9]  = msa_ceq_b(pws->b[9],  pwt->b[9]);
1668    pwd->b[10] = msa_ceq_b(pws->b[10], pwt->b[10]);
1669    pwd->b[11] = msa_ceq_b(pws->b[11], pwt->b[11]);
1670    pwd->b[12] = msa_ceq_b(pws->b[12], pwt->b[12]);
1671    pwd->b[13] = msa_ceq_b(pws->b[13], pwt->b[13]);
1672    pwd->b[14] = msa_ceq_b(pws->b[14], pwt->b[14]);
1673    pwd->b[15] = msa_ceq_b(pws->b[15], pwt->b[15]);
1674}
1675
1676static inline int16_t msa_ceq_h(int16_t arg1, int16_t arg2)
1677{
1678    return arg1 == arg2 ? -1 : 0;
1679}
1680
1681void helper_msa_ceq_h(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
1682{
1683    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1684    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1685    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1686
1687    pwd->h[0]  = msa_ceq_h(pws->h[0],  pwt->h[0]);
1688    pwd->h[1]  = msa_ceq_h(pws->h[1],  pwt->h[1]);
1689    pwd->h[2]  = msa_ceq_h(pws->h[2],  pwt->h[2]);
1690    pwd->h[3]  = msa_ceq_h(pws->h[3],  pwt->h[3]);
1691    pwd->h[4]  = msa_ceq_h(pws->h[4],  pwt->h[4]);
1692    pwd->h[5]  = msa_ceq_h(pws->h[5],  pwt->h[5]);
1693    pwd->h[6]  = msa_ceq_h(pws->h[6],  pwt->h[6]);
1694    pwd->h[7]  = msa_ceq_h(pws->h[7],  pwt->h[7]);
1695}
1696
1697static inline int32_t msa_ceq_w(int32_t arg1, int32_t arg2)
1698{
1699    return arg1 == arg2 ? -1 : 0;
1700}
1701
1702void helper_msa_ceq_w(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
1703{
1704    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1705    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1706    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1707
1708    pwd->w[0]  = msa_ceq_w(pws->w[0],  pwt->w[0]);
1709    pwd->w[1]  = msa_ceq_w(pws->w[1],  pwt->w[1]);
1710    pwd->w[2]  = msa_ceq_w(pws->w[2],  pwt->w[2]);
1711    pwd->w[3]  = msa_ceq_w(pws->w[3],  pwt->w[3]);
1712}
1713
1714static inline int64_t msa_ceq_d(int64_t arg1, int64_t arg2)
1715{
1716    return arg1 == arg2 ? -1 : 0;
1717}
1718
1719void helper_msa_ceq_d(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
1720{
1721    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1722    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1723    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1724
1725    pwd->d[0]  = msa_ceq_d(pws->d[0],  pwt->d[0]);
1726    pwd->d[1]  = msa_ceq_d(pws->d[1],  pwt->d[1]);
1727}
1728
1729static inline int64_t msa_cle_s_df(uint32_t df, int64_t arg1, int64_t arg2)
1730{
1731    return arg1 <= arg2 ? -1 : 0;
1732}
1733
1734void helper_msa_cle_s_b(CPUMIPSState *env,
1735                        uint32_t wd, uint32_t ws, uint32_t wt)
1736{
1737    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1738    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1739    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1740
1741    pwd->b[0]  = msa_cle_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1742    pwd->b[1]  = msa_cle_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1743    pwd->b[2]  = msa_cle_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1744    pwd->b[3]  = msa_cle_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1745    pwd->b[4]  = msa_cle_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1746    pwd->b[5]  = msa_cle_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1747    pwd->b[6]  = msa_cle_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1748    pwd->b[7]  = msa_cle_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1749    pwd->b[8]  = msa_cle_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1750    pwd->b[9]  = msa_cle_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1751    pwd->b[10] = msa_cle_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
1752    pwd->b[11] = msa_cle_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
1753    pwd->b[12] = msa_cle_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
1754    pwd->b[13] = msa_cle_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
1755    pwd->b[14] = msa_cle_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
1756    pwd->b[15] = msa_cle_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
1757}
1758
1759void helper_msa_cle_s_h(CPUMIPSState *env,
1760                        uint32_t wd, uint32_t ws, uint32_t wt)
1761{
1762    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1763    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1764    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1765
1766    pwd->h[0]  = msa_cle_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
1767    pwd->h[1]  = msa_cle_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
1768    pwd->h[2]  = msa_cle_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
1769    pwd->h[3]  = msa_cle_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
1770    pwd->h[4]  = msa_cle_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
1771    pwd->h[5]  = msa_cle_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
1772    pwd->h[6]  = msa_cle_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
1773    pwd->h[7]  = msa_cle_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
1774}
1775
1776void helper_msa_cle_s_w(CPUMIPSState *env,
1777                        uint32_t wd, uint32_t ws, uint32_t wt)
1778{
1779    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1780    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1781    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1782
1783    pwd->w[0]  = msa_cle_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
1784    pwd->w[1]  = msa_cle_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
1785    pwd->w[2]  = msa_cle_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
1786    pwd->w[3]  = msa_cle_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
1787}
1788
1789void helper_msa_cle_s_d(CPUMIPSState *env,
1790                        uint32_t wd, uint32_t ws, uint32_t wt)
1791{
1792    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1793    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1794    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1795
1796    pwd->d[0]  = msa_cle_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1797    pwd->d[1]  = msa_cle_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1798}
1799
1800static inline int64_t msa_cle_u_df(uint32_t df, int64_t arg1, int64_t arg2)
1801{
1802    uint64_t u_arg1 = UNSIGNED(arg1, df);
1803    uint64_t u_arg2 = UNSIGNED(arg2, df);
1804    return u_arg1 <= u_arg2 ? -1 : 0;
1805}
1806
1807void helper_msa_cle_u_b(CPUMIPSState *env,
1808                        uint32_t wd, uint32_t ws, uint32_t wt)
1809{
1810    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1811    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1812    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1813
1814    pwd->b[0]  = msa_cle_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1815    pwd->b[1]  = msa_cle_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1816    pwd->b[2]  = msa_cle_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1817    pwd->b[3]  = msa_cle_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1818    pwd->b[4]  = msa_cle_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1819    pwd->b[5]  = msa_cle_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1820    pwd->b[6]  = msa_cle_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1821    pwd->b[7]  = msa_cle_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1822    pwd->b[8]  = msa_cle_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1823    pwd->b[9]  = msa_cle_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1824    pwd->b[10] = msa_cle_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
1825    pwd->b[11] = msa_cle_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
1826    pwd->b[12] = msa_cle_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
1827    pwd->b[13] = msa_cle_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
1828    pwd->b[14] = msa_cle_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
1829    pwd->b[15] = msa_cle_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
1830}
1831
1832void helper_msa_cle_u_h(CPUMIPSState *env,
1833                        uint32_t wd, uint32_t ws, uint32_t wt)
1834{
1835    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1836    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1837    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1838
1839    pwd->h[0]  = msa_cle_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
1840    pwd->h[1]  = msa_cle_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
1841    pwd->h[2]  = msa_cle_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
1842    pwd->h[3]  = msa_cle_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
1843    pwd->h[4]  = msa_cle_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
1844    pwd->h[5]  = msa_cle_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
1845    pwd->h[6]  = msa_cle_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
1846    pwd->h[7]  = msa_cle_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
1847}
1848
1849void helper_msa_cle_u_w(CPUMIPSState *env,
1850                        uint32_t wd, uint32_t ws, uint32_t wt)
1851{
1852    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1853    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1854    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1855
1856    pwd->w[0]  = msa_cle_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
1857    pwd->w[1]  = msa_cle_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
1858    pwd->w[2]  = msa_cle_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
1859    pwd->w[3]  = msa_cle_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
1860}
1861
1862void helper_msa_cle_u_d(CPUMIPSState *env,
1863                        uint32_t wd, uint32_t ws, uint32_t wt)
1864{
1865    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1866    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1867    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1868
1869    pwd->d[0]  = msa_cle_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
1870    pwd->d[1]  = msa_cle_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
1871}
1872
1873static inline int64_t msa_clt_s_df(uint32_t df, int64_t arg1, int64_t arg2)
1874{
1875    return arg1 < arg2 ? -1 : 0;
1876}
1877
1878static inline int8_t msa_clt_s_b(int8_t arg1, int8_t arg2)
1879{
1880    return arg1 < arg2 ? -1 : 0;
1881}
1882
1883void helper_msa_clt_s_b(CPUMIPSState *env,
1884                        uint32_t wd, uint32_t ws, uint32_t wt)
1885{
1886    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1887    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1888    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1889
1890    pwd->b[0]  = msa_clt_s_b(pws->b[0],  pwt->b[0]);
1891    pwd->b[1]  = msa_clt_s_b(pws->b[1],  pwt->b[1]);
1892    pwd->b[2]  = msa_clt_s_b(pws->b[2],  pwt->b[2]);
1893    pwd->b[3]  = msa_clt_s_b(pws->b[3],  pwt->b[3]);
1894    pwd->b[4]  = msa_clt_s_b(pws->b[4],  pwt->b[4]);
1895    pwd->b[5]  = msa_clt_s_b(pws->b[5],  pwt->b[5]);
1896    pwd->b[6]  = msa_clt_s_b(pws->b[6],  pwt->b[6]);
1897    pwd->b[7]  = msa_clt_s_b(pws->b[7],  pwt->b[7]);
1898    pwd->b[8]  = msa_clt_s_b(pws->b[8],  pwt->b[8]);
1899    pwd->b[9]  = msa_clt_s_b(pws->b[9],  pwt->b[9]);
1900    pwd->b[10] = msa_clt_s_b(pws->b[10], pwt->b[10]);
1901    pwd->b[11] = msa_clt_s_b(pws->b[11], pwt->b[11]);
1902    pwd->b[12] = msa_clt_s_b(pws->b[12], pwt->b[12]);
1903    pwd->b[13] = msa_clt_s_b(pws->b[13], pwt->b[13]);
1904    pwd->b[14] = msa_clt_s_b(pws->b[14], pwt->b[14]);
1905    pwd->b[15] = msa_clt_s_b(pws->b[15], pwt->b[15]);
1906}
1907
1908static inline int16_t msa_clt_s_h(int16_t arg1, int16_t arg2)
1909{
1910    return arg1 < arg2 ? -1 : 0;
1911}
1912
1913void helper_msa_clt_s_h(CPUMIPSState *env,
1914                        uint32_t wd, uint32_t ws, uint32_t wt)
1915{
1916    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1917    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1918    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1919
1920    pwd->h[0]  = msa_clt_s_h(pws->h[0],  pwt->h[0]);
1921    pwd->h[1]  = msa_clt_s_h(pws->h[1],  pwt->h[1]);
1922    pwd->h[2]  = msa_clt_s_h(pws->h[2],  pwt->h[2]);
1923    pwd->h[3]  = msa_clt_s_h(pws->h[3],  pwt->h[3]);
1924    pwd->h[4]  = msa_clt_s_h(pws->h[4],  pwt->h[4]);
1925    pwd->h[5]  = msa_clt_s_h(pws->h[5],  pwt->h[5]);
1926    pwd->h[6]  = msa_clt_s_h(pws->h[6],  pwt->h[6]);
1927    pwd->h[7]  = msa_clt_s_h(pws->h[7],  pwt->h[7]);
1928}
1929
1930static inline int32_t msa_clt_s_w(int32_t arg1, int32_t arg2)
1931{
1932    return arg1 < arg2 ? -1 : 0;
1933}
1934
1935void helper_msa_clt_s_w(CPUMIPSState *env,
1936                        uint32_t wd, uint32_t ws, uint32_t wt)
1937{
1938    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1939    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1940    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1941
1942    pwd->w[0]  = msa_clt_s_w(pws->w[0],  pwt->w[0]);
1943    pwd->w[1]  = msa_clt_s_w(pws->w[1],  pwt->w[1]);
1944    pwd->w[2]  = msa_clt_s_w(pws->w[2],  pwt->w[2]);
1945    pwd->w[3]  = msa_clt_s_w(pws->w[3],  pwt->w[3]);
1946}
1947
1948static inline int64_t msa_clt_s_d(int64_t arg1, int64_t arg2)
1949{
1950    return arg1 < arg2 ? -1 : 0;
1951}
1952
1953void helper_msa_clt_s_d(CPUMIPSState *env,
1954                        uint32_t wd, uint32_t ws, uint32_t wt)
1955{
1956    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1957    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1958    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1959
1960    pwd->d[0]  = msa_clt_s_d(pws->d[0],  pwt->d[0]);
1961    pwd->d[1]  = msa_clt_s_d(pws->d[1],  pwt->d[1]);
1962}
1963
1964static inline int64_t msa_clt_u_df(uint32_t df, int64_t arg1, int64_t arg2)
1965{
1966    uint64_t u_arg1 = UNSIGNED(arg1, df);
1967    uint64_t u_arg2 = UNSIGNED(arg2, df);
1968    return u_arg1 < u_arg2 ? -1 : 0;
1969}
1970
1971void helper_msa_clt_u_b(CPUMIPSState *env,
1972                        uint32_t wd, uint32_t ws, uint32_t wt)
1973{
1974    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
1975    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
1976    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
1977
1978    pwd->b[0]  = msa_clt_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
1979    pwd->b[1]  = msa_clt_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
1980    pwd->b[2]  = msa_clt_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
1981    pwd->b[3]  = msa_clt_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
1982    pwd->b[4]  = msa_clt_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
1983    pwd->b[5]  = msa_clt_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
1984    pwd->b[6]  = msa_clt_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
1985    pwd->b[7]  = msa_clt_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
1986    pwd->b[8]  = msa_clt_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
1987    pwd->b[9]  = msa_clt_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
1988    pwd->b[10] = msa_clt_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
1989    pwd->b[11] = msa_clt_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
1990    pwd->b[12] = msa_clt_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
1991    pwd->b[13] = msa_clt_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
1992    pwd->b[14] = msa_clt_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
1993    pwd->b[15] = msa_clt_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
1994}
1995
1996void helper_msa_clt_u_h(CPUMIPSState *env,
1997                        uint32_t wd, uint32_t ws, uint32_t wt)
1998{
1999    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2000    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2001    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2002
2003    pwd->h[0]  = msa_clt_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
2004    pwd->h[1]  = msa_clt_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
2005    pwd->h[2]  = msa_clt_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
2006    pwd->h[3]  = msa_clt_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
2007    pwd->h[4]  = msa_clt_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
2008    pwd->h[5]  = msa_clt_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
2009    pwd->h[6]  = msa_clt_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
2010    pwd->h[7]  = msa_clt_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
2011}
2012
2013void helper_msa_clt_u_w(CPUMIPSState *env,
2014                        uint32_t wd, uint32_t ws, uint32_t wt)
2015{
2016    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2017    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2018    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2019
2020    pwd->w[0]  = msa_clt_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
2021    pwd->w[1]  = msa_clt_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
2022    pwd->w[2]  = msa_clt_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
2023    pwd->w[3]  = msa_clt_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
2024}
2025
2026void helper_msa_clt_u_d(CPUMIPSState *env,
2027                        uint32_t wd, uint32_t ws, uint32_t wt)
2028{
2029    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2030    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2031    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2032
2033    pwd->d[0]  = msa_clt_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2034    pwd->d[1]  = msa_clt_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2035}
2036
2037
2038/*
2039 * Int Divide
2040 * ----------
2041 *
2042 * +---------------+----------------------------------------------------------+
2043 * | DIV_S.B       | Vector Signed Divide (byte)                              |
2044 * | DIV_S.H       | Vector Signed Divide (halfword)                          |
2045 * | DIV_S.W       | Vector Signed Divide (word)                              |
2046 * | DIV_S.D       | Vector Signed Divide (doubleword)                        |
2047 * | DIV_U.B       | Vector Unsigned Divide (byte)                            |
2048 * | DIV_U.H       | Vector Unsigned Divide (halfword)                        |
2049 * | DIV_U.W       | Vector Unsigned Divide (word)                            |
2050 * | DIV_U.D       | Vector Unsigned Divide (doubleword)                      |
2051 * +---------------+----------------------------------------------------------+
2052 */
2053
2054
2055static inline int64_t msa_div_s_df(uint32_t df, int64_t arg1, int64_t arg2)
2056{
2057    if (arg1 == DF_MIN_INT(df) && arg2 == -1) {
2058        return DF_MIN_INT(df);
2059    }
2060    return arg2 ? arg1 / arg2
2061                : arg1 >= 0 ? -1 : 1;
2062}
2063
2064void helper_msa_div_s_b(CPUMIPSState *env,
2065                        uint32_t wd, uint32_t ws, uint32_t wt)
2066{
2067    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2068    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2069    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2070
2071    pwd->b[0]  = msa_div_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2072    pwd->b[1]  = msa_div_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2073    pwd->b[2]  = msa_div_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2074    pwd->b[3]  = msa_div_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2075    pwd->b[4]  = msa_div_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2076    pwd->b[5]  = msa_div_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2077    pwd->b[6]  = msa_div_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2078    pwd->b[7]  = msa_div_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2079    pwd->b[8]  = msa_div_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2080    pwd->b[9]  = msa_div_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2081    pwd->b[10] = msa_div_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
2082    pwd->b[11] = msa_div_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
2083    pwd->b[12] = msa_div_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
2084    pwd->b[13] = msa_div_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
2085    pwd->b[14] = msa_div_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
2086    pwd->b[15] = msa_div_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
2087}
2088
2089void helper_msa_div_s_h(CPUMIPSState *env,
2090                        uint32_t wd, uint32_t ws, uint32_t wt)
2091{
2092    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2093    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2094    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2095
2096    pwd->h[0]  = msa_div_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
2097    pwd->h[1]  = msa_div_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
2098    pwd->h[2]  = msa_div_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
2099    pwd->h[3]  = msa_div_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
2100    pwd->h[4]  = msa_div_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
2101    pwd->h[5]  = msa_div_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
2102    pwd->h[6]  = msa_div_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
2103    pwd->h[7]  = msa_div_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
2104}
2105
2106void helper_msa_div_s_w(CPUMIPSState *env,
2107                        uint32_t wd, uint32_t ws, uint32_t wt)
2108{
2109    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2110    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2111    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2112
2113    pwd->w[0]  = msa_div_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
2114    pwd->w[1]  = msa_div_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
2115    pwd->w[2]  = msa_div_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
2116    pwd->w[3]  = msa_div_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
2117}
2118
2119void helper_msa_div_s_d(CPUMIPSState *env,
2120                        uint32_t wd, uint32_t ws, uint32_t wt)
2121{
2122    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2123    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2124    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2125
2126    pwd->d[0]  = msa_div_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2127    pwd->d[1]  = msa_div_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2128}
2129
2130static inline int64_t msa_div_u_df(uint32_t df, int64_t arg1, int64_t arg2)
2131{
2132    uint64_t u_arg1 = UNSIGNED(arg1, df);
2133    uint64_t u_arg2 = UNSIGNED(arg2, df);
2134    return arg2 ? u_arg1 / u_arg2 : -1;
2135}
2136
2137void helper_msa_div_u_b(CPUMIPSState *env,
2138                        uint32_t wd, uint32_t ws, uint32_t wt)
2139{
2140    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2141    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2142    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2143
2144    pwd->b[0]  = msa_div_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2145    pwd->b[1]  = msa_div_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2146    pwd->b[2]  = msa_div_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2147    pwd->b[3]  = msa_div_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2148    pwd->b[4]  = msa_div_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2149    pwd->b[5]  = msa_div_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2150    pwd->b[6]  = msa_div_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2151    pwd->b[7]  = msa_div_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2152    pwd->b[8]  = msa_div_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2153    pwd->b[9]  = msa_div_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2154    pwd->b[10] = msa_div_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
2155    pwd->b[11] = msa_div_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
2156    pwd->b[12] = msa_div_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
2157    pwd->b[13] = msa_div_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
2158    pwd->b[14] = msa_div_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
2159    pwd->b[15] = msa_div_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
2160}
2161
2162void helper_msa_div_u_h(CPUMIPSState *env,
2163                        uint32_t wd, uint32_t ws, uint32_t wt)
2164{
2165    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2166    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2167    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2168
2169    pwd->h[0]  = msa_div_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
2170    pwd->h[1]  = msa_div_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
2171    pwd->h[2]  = msa_div_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
2172    pwd->h[3]  = msa_div_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
2173    pwd->h[4]  = msa_div_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
2174    pwd->h[5]  = msa_div_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
2175    pwd->h[6]  = msa_div_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
2176    pwd->h[7]  = msa_div_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
2177}
2178
2179void helper_msa_div_u_w(CPUMIPSState *env,
2180                        uint32_t wd, uint32_t ws, uint32_t wt)
2181{
2182    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2183    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2184    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2185
2186    pwd->w[0]  = msa_div_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
2187    pwd->w[1]  = msa_div_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
2188    pwd->w[2]  = msa_div_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
2189    pwd->w[3]  = msa_div_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
2190}
2191
2192void helper_msa_div_u_d(CPUMIPSState *env,
2193                        uint32_t wd, uint32_t ws, uint32_t wt)
2194{
2195    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2196    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2197    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2198
2199    pwd->d[0]  = msa_div_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2200    pwd->d[1]  = msa_div_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2201}
2202
2203
2204/*
2205 * Int Dot Product
2206 * ---------------
2207 *
2208 * +---------------+----------------------------------------------------------+
2209 * | DOTP_S.H      | Vector Signed Dot Product (halfword)                     |
2210 * | DOTP_S.W      | Vector Signed Dot Product (word)                         |
2211 * | DOTP_S.D      | Vector Signed Dot Product (doubleword)                   |
2212 * | DOTP_U.H      | Vector Unsigned Dot Product (halfword)                   |
2213 * | DOTP_U.W      | Vector Unsigned Dot Product (word)                       |
2214 * | DOTP_U.D      | Vector Unsigned Dot Product (doubleword)                 |
2215 * | DPADD_S.H     | Vector Signed Dot Product (halfword)                     |
2216 * | DPADD_S.W     | Vector Signed Dot Product (word)                         |
2217 * | DPADD_S.D     | Vector Signed Dot Product (doubleword)                   |
2218 * | DPADD_U.H     | Vector Unsigned Dot Product (halfword)                   |
2219 * | DPADD_U.W     | Vector Unsigned Dot Product (word)                       |
2220 * | DPADD_U.D     | Vector Unsigned Dot Product (doubleword)                 |
2221 * | DPSUB_S.H     | Vector Signed Dot Product (halfword)                     |
2222 * | DPSUB_S.W     | Vector Signed Dot Product (word)                         |
2223 * | DPSUB_S.D     | Vector Signed Dot Product (doubleword)                   |
2224 * | DPSUB_U.H     | Vector Unsigned Dot Product (halfword)                   |
2225 * | DPSUB_U.W     | Vector Unsigned Dot Product (word)                       |
2226 * | DPSUB_U.D     | Vector Unsigned Dot Product (doubleword)                 |
2227 * +---------------+----------------------------------------------------------+
2228 */
2229
2230#define SIGNED_EXTRACT(e, o, a, df)     \
2231    do {                                \
2232        e = SIGNED_EVEN(a, df);         \
2233        o = SIGNED_ODD(a, df);          \
2234    } while (0)
2235
2236#define UNSIGNED_EXTRACT(e, o, a, df)   \
2237    do {                                \
2238        e = UNSIGNED_EVEN(a, df);       \
2239        o = UNSIGNED_ODD(a, df);        \
2240    } while (0)
2241
2242
2243static inline int64_t msa_dotp_s_df(uint32_t df, int64_t arg1, int64_t arg2)
2244{
2245    int64_t even_arg1;
2246    int64_t even_arg2;
2247    int64_t odd_arg1;
2248    int64_t odd_arg2;
2249    SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
2250    SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
2251    return (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
2252}
2253
2254void helper_msa_dotp_s_h(CPUMIPSState *env,
2255                         uint32_t wd, uint32_t ws, uint32_t wt)
2256{
2257    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2258    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2259    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2260
2261    pwd->h[0]  = msa_dotp_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
2262    pwd->h[1]  = msa_dotp_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
2263    pwd->h[2]  = msa_dotp_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
2264    pwd->h[3]  = msa_dotp_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
2265    pwd->h[4]  = msa_dotp_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
2266    pwd->h[5]  = msa_dotp_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
2267    pwd->h[6]  = msa_dotp_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
2268    pwd->h[7]  = msa_dotp_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
2269}
2270
2271void helper_msa_dotp_s_w(CPUMIPSState *env,
2272                         uint32_t wd, uint32_t ws, uint32_t wt)
2273{
2274    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2275    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2276    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2277
2278    pwd->w[0]  = msa_dotp_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
2279    pwd->w[1]  = msa_dotp_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
2280    pwd->w[2]  = msa_dotp_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
2281    pwd->w[3]  = msa_dotp_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
2282}
2283
2284void helper_msa_dotp_s_d(CPUMIPSState *env,
2285                         uint32_t wd, uint32_t ws, uint32_t wt)
2286{
2287    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2288    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2289    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2290
2291    pwd->d[0]  = msa_dotp_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2292    pwd->d[1]  = msa_dotp_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2293}
2294
2295
2296static inline int64_t msa_dotp_u_df(uint32_t df, int64_t arg1, int64_t arg2)
2297{
2298    int64_t even_arg1;
2299    int64_t even_arg2;
2300    int64_t odd_arg1;
2301    int64_t odd_arg2;
2302    UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
2303    UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
2304    return (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
2305}
2306
2307void helper_msa_dotp_u_h(CPUMIPSState *env,
2308                         uint32_t wd, uint32_t ws, uint32_t wt)
2309{
2310    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2311    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2312    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2313
2314    pwd->h[0]  = msa_dotp_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
2315    pwd->h[1]  = msa_dotp_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
2316    pwd->h[2]  = msa_dotp_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
2317    pwd->h[3]  = msa_dotp_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
2318    pwd->h[4]  = msa_dotp_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
2319    pwd->h[5]  = msa_dotp_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
2320    pwd->h[6]  = msa_dotp_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
2321    pwd->h[7]  = msa_dotp_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
2322}
2323
2324void helper_msa_dotp_u_w(CPUMIPSState *env,
2325                         uint32_t wd, uint32_t ws, uint32_t wt)
2326{
2327    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2328    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2329    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2330
2331    pwd->w[0]  = msa_dotp_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
2332    pwd->w[1]  = msa_dotp_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
2333    pwd->w[2]  = msa_dotp_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
2334    pwd->w[3]  = msa_dotp_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
2335}
2336
2337void helper_msa_dotp_u_d(CPUMIPSState *env,
2338                         uint32_t wd, uint32_t ws, uint32_t wt)
2339{
2340    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2341    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2342    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2343
2344    pwd->d[0]  = msa_dotp_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2345    pwd->d[1]  = msa_dotp_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2346}
2347
2348
2349static inline int64_t msa_dpadd_s_df(uint32_t df, int64_t dest, int64_t arg1,
2350                                     int64_t arg2)
2351{
2352    int64_t even_arg1;
2353    int64_t even_arg2;
2354    int64_t odd_arg1;
2355    int64_t odd_arg2;
2356    SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
2357    SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
2358    return dest + (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
2359}
2360
2361void helper_msa_dpadd_s_h(CPUMIPSState *env,
2362                          uint32_t wd, uint32_t ws, uint32_t wt)
2363{
2364    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2365    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2366    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2367
2368    pwd->h[0]  = msa_dpadd_s_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
2369    pwd->h[1]  = msa_dpadd_s_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
2370    pwd->h[2]  = msa_dpadd_s_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
2371    pwd->h[3]  = msa_dpadd_s_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
2372    pwd->h[4]  = msa_dpadd_s_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
2373    pwd->h[5]  = msa_dpadd_s_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
2374    pwd->h[6]  = msa_dpadd_s_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
2375    pwd->h[7]  = msa_dpadd_s_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
2376}
2377
2378void helper_msa_dpadd_s_w(CPUMIPSState *env,
2379                          uint32_t wd, uint32_t ws, uint32_t wt)
2380{
2381    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2382    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2383    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2384
2385    pwd->w[0]  = msa_dpadd_s_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
2386    pwd->w[1]  = msa_dpadd_s_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
2387    pwd->w[2]  = msa_dpadd_s_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
2388    pwd->w[3]  = msa_dpadd_s_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
2389}
2390
2391void helper_msa_dpadd_s_d(CPUMIPSState *env,
2392                          uint32_t wd, uint32_t ws, uint32_t wt)
2393{
2394    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2395    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2396    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2397
2398    pwd->d[0]  = msa_dpadd_s_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
2399    pwd->d[1]  = msa_dpadd_s_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
2400}
2401
2402
2403static inline int64_t msa_dpadd_u_df(uint32_t df, int64_t dest, int64_t arg1,
2404                                     int64_t arg2)
2405{
2406    int64_t even_arg1;
2407    int64_t even_arg2;
2408    int64_t odd_arg1;
2409    int64_t odd_arg2;
2410    UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
2411    UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
2412    return dest + (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
2413}
2414
2415void helper_msa_dpadd_u_h(CPUMIPSState *env,
2416                          uint32_t wd, uint32_t ws, uint32_t wt)
2417{
2418    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2419    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2420    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2421
2422    pwd->h[0]  = msa_dpadd_u_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
2423    pwd->h[1]  = msa_dpadd_u_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
2424    pwd->h[2]  = msa_dpadd_u_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
2425    pwd->h[3]  = msa_dpadd_u_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
2426    pwd->h[4]  = msa_dpadd_u_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
2427    pwd->h[5]  = msa_dpadd_u_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
2428    pwd->h[6]  = msa_dpadd_u_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
2429    pwd->h[7]  = msa_dpadd_u_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
2430}
2431
2432void helper_msa_dpadd_u_w(CPUMIPSState *env,
2433                          uint32_t wd, uint32_t ws, uint32_t wt)
2434{
2435    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2436    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2437    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2438
2439    pwd->w[0]  = msa_dpadd_u_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
2440    pwd->w[1]  = msa_dpadd_u_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
2441    pwd->w[2]  = msa_dpadd_u_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
2442    pwd->w[3]  = msa_dpadd_u_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
2443}
2444
2445void helper_msa_dpadd_u_d(CPUMIPSState *env,
2446                          uint32_t wd, uint32_t ws, uint32_t wt)
2447{
2448    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2449    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2450    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2451
2452    pwd->d[0]  = msa_dpadd_u_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
2453    pwd->d[1]  = msa_dpadd_u_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
2454}
2455
2456
2457static inline int64_t msa_dpsub_s_df(uint32_t df, int64_t dest, int64_t arg1,
2458                                     int64_t arg2)
2459{
2460    int64_t even_arg1;
2461    int64_t even_arg2;
2462    int64_t odd_arg1;
2463    int64_t odd_arg2;
2464    SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
2465    SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
2466    return dest - ((even_arg1 * even_arg2) + (odd_arg1 * odd_arg2));
2467}
2468
2469void helper_msa_dpsub_s_h(CPUMIPSState *env,
2470                          uint32_t wd, uint32_t ws, uint32_t wt)
2471{
2472    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2473    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2474    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2475
2476    pwd->h[0]  = msa_dpsub_s_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
2477    pwd->h[1]  = msa_dpsub_s_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
2478    pwd->h[2]  = msa_dpsub_s_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
2479    pwd->h[3]  = msa_dpsub_s_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
2480    pwd->h[4]  = msa_dpsub_s_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
2481    pwd->h[5]  = msa_dpsub_s_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
2482    pwd->h[6]  = msa_dpsub_s_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
2483    pwd->h[7]  = msa_dpsub_s_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
2484}
2485
2486void helper_msa_dpsub_s_w(CPUMIPSState *env,
2487                          uint32_t wd, uint32_t ws, uint32_t wt)
2488{
2489    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2490    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2491    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2492
2493    pwd->w[0]  = msa_dpsub_s_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
2494    pwd->w[1]  = msa_dpsub_s_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
2495    pwd->w[2]  = msa_dpsub_s_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
2496    pwd->w[3]  = msa_dpsub_s_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
2497}
2498
2499void helper_msa_dpsub_s_d(CPUMIPSState *env,
2500                          uint32_t wd, uint32_t ws, uint32_t wt)
2501{
2502    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2503    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2504    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2505
2506    pwd->d[0]  = msa_dpsub_s_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
2507    pwd->d[1]  = msa_dpsub_s_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
2508}
2509
2510
2511static inline int64_t msa_dpsub_u_df(uint32_t df, int64_t dest, int64_t arg1,
2512                                     int64_t arg2)
2513{
2514    int64_t even_arg1;
2515    int64_t even_arg2;
2516    int64_t odd_arg1;
2517    int64_t odd_arg2;
2518    UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
2519    UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
2520    return dest - ((even_arg1 * even_arg2) + (odd_arg1 * odd_arg2));
2521}
2522
2523void helper_msa_dpsub_u_h(CPUMIPSState *env,
2524                          uint32_t wd, uint32_t ws, uint32_t wt)
2525{
2526    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2527    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2528    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2529
2530    pwd->h[0]  = msa_dpsub_u_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
2531    pwd->h[1]  = msa_dpsub_u_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
2532    pwd->h[2]  = msa_dpsub_u_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
2533    pwd->h[3]  = msa_dpsub_u_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
2534    pwd->h[4]  = msa_dpsub_u_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
2535    pwd->h[5]  = msa_dpsub_u_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
2536    pwd->h[6]  = msa_dpsub_u_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
2537    pwd->h[7]  = msa_dpsub_u_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
2538}
2539
2540void helper_msa_dpsub_u_w(CPUMIPSState *env,
2541                          uint32_t wd, uint32_t ws, uint32_t wt)
2542{
2543    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2544    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2545    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2546
2547    pwd->w[0]  = msa_dpsub_u_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
2548    pwd->w[1]  = msa_dpsub_u_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
2549    pwd->w[2]  = msa_dpsub_u_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
2550    pwd->w[3]  = msa_dpsub_u_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
2551}
2552
2553void helper_msa_dpsub_u_d(CPUMIPSState *env,
2554                          uint32_t wd, uint32_t ws, uint32_t wt)
2555{
2556    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2557    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2558    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2559
2560    pwd->d[0]  = msa_dpsub_u_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
2561    pwd->d[1]  = msa_dpsub_u_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
2562}
2563
2564
2565/*
2566 * Int Max Min
2567 * -----------
2568 *
2569 * +---------------+----------------------------------------------------------+
2570 * | MAX_A.B       | Vector Maximum Based on Absolute Value (byte)            |
2571 * | MAX_A.H       | Vector Maximum Based on Absolute Value (halfword)        |
2572 * | MAX_A.W       | Vector Maximum Based on Absolute Value (word)            |
2573 * | MAX_A.D       | Vector Maximum Based on Absolute Value (doubleword)      |
2574 * | MAX_S.B       | Vector Signed Maximum (byte)                             |
2575 * | MAX_S.H       | Vector Signed Maximum (halfword)                         |
2576 * | MAX_S.W       | Vector Signed Maximum (word)                             |
2577 * | MAX_S.D       | Vector Signed Maximum (doubleword)                       |
2578 * | MAX_U.B       | Vector Unsigned Maximum (byte)                           |
2579 * | MAX_U.H       | Vector Unsigned Maximum (halfword)                       |
2580 * | MAX_U.W       | Vector Unsigned Maximum (word)                           |
2581 * | MAX_U.D       | Vector Unsigned Maximum (doubleword)                     |
2582 * | MIN_A.B       | Vector Minimum Based on Absolute Value (byte)            |
2583 * | MIN_A.H       | Vector Minimum Based on Absolute Value (halfword)        |
2584 * | MIN_A.W       | Vector Minimum Based on Absolute Value (word)            |
2585 * | MIN_A.D       | Vector Minimum Based on Absolute Value (doubleword)      |
2586 * | MIN_S.B       | Vector Signed Minimum (byte)                             |
2587 * | MIN_S.H       | Vector Signed Minimum (halfword)                         |
2588 * | MIN_S.W       | Vector Signed Minimum (word)                             |
2589 * | MIN_S.D       | Vector Signed Minimum (doubleword)                       |
2590 * | MIN_U.B       | Vector Unsigned Minimum (byte)                           |
2591 * | MIN_U.H       | Vector Unsigned Minimum (halfword)                       |
2592 * | MIN_U.W       | Vector Unsigned Minimum (word)                           |
2593 * | MIN_U.D       | Vector Unsigned Minimum (doubleword)                     |
2594 * +---------------+----------------------------------------------------------+
2595 */
2596
2597static inline int64_t msa_max_a_df(uint32_t df, int64_t arg1, int64_t arg2)
2598{
2599    uint64_t abs_arg1 = arg1 >= 0 ? arg1 : -arg1;
2600    uint64_t abs_arg2 = arg2 >= 0 ? arg2 : -arg2;
2601    return abs_arg1 > abs_arg2 ? arg1 : arg2;
2602}
2603
2604void helper_msa_max_a_b(CPUMIPSState *env,
2605                        uint32_t wd, uint32_t ws, uint32_t wt)
2606{
2607    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2608    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2609    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2610
2611    pwd->b[0]  = msa_max_a_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2612    pwd->b[1]  = msa_max_a_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2613    pwd->b[2]  = msa_max_a_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2614    pwd->b[3]  = msa_max_a_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2615    pwd->b[4]  = msa_max_a_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2616    pwd->b[5]  = msa_max_a_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2617    pwd->b[6]  = msa_max_a_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2618    pwd->b[7]  = msa_max_a_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2619    pwd->b[8]  = msa_max_a_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2620    pwd->b[9]  = msa_max_a_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2621    pwd->b[10] = msa_max_a_df(DF_BYTE, pws->b[10], pwt->b[10]);
2622    pwd->b[11] = msa_max_a_df(DF_BYTE, pws->b[11], pwt->b[11]);
2623    pwd->b[12] = msa_max_a_df(DF_BYTE, pws->b[12], pwt->b[12]);
2624    pwd->b[13] = msa_max_a_df(DF_BYTE, pws->b[13], pwt->b[13]);
2625    pwd->b[14] = msa_max_a_df(DF_BYTE, pws->b[14], pwt->b[14]);
2626    pwd->b[15] = msa_max_a_df(DF_BYTE, pws->b[15], pwt->b[15]);
2627}
2628
2629void helper_msa_max_a_h(CPUMIPSState *env,
2630                        uint32_t wd, uint32_t ws, uint32_t wt)
2631{
2632    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2633    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2634    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2635
2636    pwd->h[0]  = msa_max_a_df(DF_HALF, pws->h[0],  pwt->h[0]);
2637    pwd->h[1]  = msa_max_a_df(DF_HALF, pws->h[1],  pwt->h[1]);
2638    pwd->h[2]  = msa_max_a_df(DF_HALF, pws->h[2],  pwt->h[2]);
2639    pwd->h[3]  = msa_max_a_df(DF_HALF, pws->h[3],  pwt->h[3]);
2640    pwd->h[4]  = msa_max_a_df(DF_HALF, pws->h[4],  pwt->h[4]);
2641    pwd->h[5]  = msa_max_a_df(DF_HALF, pws->h[5],  pwt->h[5]);
2642    pwd->h[6]  = msa_max_a_df(DF_HALF, pws->h[6],  pwt->h[6]);
2643    pwd->h[7]  = msa_max_a_df(DF_HALF, pws->h[7],  pwt->h[7]);
2644}
2645
2646void helper_msa_max_a_w(CPUMIPSState *env,
2647                        uint32_t wd, uint32_t ws, uint32_t wt)
2648{
2649    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2650    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2651    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2652
2653    pwd->w[0]  = msa_max_a_df(DF_WORD, pws->w[0],  pwt->w[0]);
2654    pwd->w[1]  = msa_max_a_df(DF_WORD, pws->w[1],  pwt->w[1]);
2655    pwd->w[2]  = msa_max_a_df(DF_WORD, pws->w[2],  pwt->w[2]);
2656    pwd->w[3]  = msa_max_a_df(DF_WORD, pws->w[3],  pwt->w[3]);
2657}
2658
2659void helper_msa_max_a_d(CPUMIPSState *env,
2660                        uint32_t wd, uint32_t ws, uint32_t wt)
2661{
2662    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2663    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2664    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2665
2666    pwd->d[0]  = msa_max_a_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2667    pwd->d[1]  = msa_max_a_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2668}
2669
2670
2671static inline int64_t msa_max_s_df(uint32_t df, int64_t arg1, int64_t arg2)
2672{
2673    return arg1 > arg2 ? arg1 : arg2;
2674}
2675
2676void helper_msa_max_s_b(CPUMIPSState *env,
2677                        uint32_t wd, uint32_t ws, uint32_t wt)
2678{
2679    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2680    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2681    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2682
2683    pwd->b[0]  = msa_max_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2684    pwd->b[1]  = msa_max_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2685    pwd->b[2]  = msa_max_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2686    pwd->b[3]  = msa_max_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2687    pwd->b[4]  = msa_max_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2688    pwd->b[5]  = msa_max_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2689    pwd->b[6]  = msa_max_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2690    pwd->b[7]  = msa_max_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2691    pwd->b[8]  = msa_max_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2692    pwd->b[9]  = msa_max_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2693    pwd->b[10] = msa_max_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
2694    pwd->b[11] = msa_max_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
2695    pwd->b[12] = msa_max_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
2696    pwd->b[13] = msa_max_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
2697    pwd->b[14] = msa_max_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
2698    pwd->b[15] = msa_max_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
2699}
2700
2701void helper_msa_max_s_h(CPUMIPSState *env,
2702                        uint32_t wd, uint32_t ws, uint32_t wt)
2703{
2704    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2705    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2706    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2707
2708    pwd->h[0]  = msa_max_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
2709    pwd->h[1]  = msa_max_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
2710    pwd->h[2]  = msa_max_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
2711    pwd->h[3]  = msa_max_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
2712    pwd->h[4]  = msa_max_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
2713    pwd->h[5]  = msa_max_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
2714    pwd->h[6]  = msa_max_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
2715    pwd->h[7]  = msa_max_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
2716}
2717
2718void helper_msa_max_s_w(CPUMIPSState *env,
2719                        uint32_t wd, uint32_t ws, uint32_t wt)
2720{
2721    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2722    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2723    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2724
2725    pwd->w[0]  = msa_max_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
2726    pwd->w[1]  = msa_max_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
2727    pwd->w[2]  = msa_max_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
2728    pwd->w[3]  = msa_max_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
2729}
2730
2731void helper_msa_max_s_d(CPUMIPSState *env,
2732                        uint32_t wd, uint32_t ws, uint32_t wt)
2733{
2734    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2735    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2736    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2737
2738    pwd->d[0]  = msa_max_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2739    pwd->d[1]  = msa_max_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2740}
2741
2742
2743static inline int64_t msa_max_u_df(uint32_t df, int64_t arg1, int64_t arg2)
2744{
2745    uint64_t u_arg1 = UNSIGNED(arg1, df);
2746    uint64_t u_arg2 = UNSIGNED(arg2, df);
2747    return u_arg1 > u_arg2 ? arg1 : arg2;
2748}
2749
2750void helper_msa_max_u_b(CPUMIPSState *env,
2751                        uint32_t wd, uint32_t ws, uint32_t wt)
2752{
2753    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2754    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2755    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2756
2757    pwd->b[0]  = msa_max_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2758    pwd->b[1]  = msa_max_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2759    pwd->b[2]  = msa_max_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2760    pwd->b[3]  = msa_max_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2761    pwd->b[4]  = msa_max_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2762    pwd->b[5]  = msa_max_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2763    pwd->b[6]  = msa_max_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2764    pwd->b[7]  = msa_max_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2765    pwd->b[8]  = msa_max_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2766    pwd->b[9]  = msa_max_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2767    pwd->b[10] = msa_max_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
2768    pwd->b[11] = msa_max_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
2769    pwd->b[12] = msa_max_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
2770    pwd->b[13] = msa_max_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
2771    pwd->b[14] = msa_max_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
2772    pwd->b[15] = msa_max_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
2773}
2774
2775void helper_msa_max_u_h(CPUMIPSState *env,
2776                        uint32_t wd, uint32_t ws, uint32_t wt)
2777{
2778    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2779    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2780    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2781
2782    pwd->h[0]  = msa_max_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
2783    pwd->h[1]  = msa_max_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
2784    pwd->h[2]  = msa_max_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
2785    pwd->h[3]  = msa_max_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
2786    pwd->h[4]  = msa_max_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
2787    pwd->h[5]  = msa_max_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
2788    pwd->h[6]  = msa_max_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
2789    pwd->h[7]  = msa_max_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
2790}
2791
2792void helper_msa_max_u_w(CPUMIPSState *env,
2793                        uint32_t wd, uint32_t ws, uint32_t wt)
2794{
2795    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2796    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2797    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2798
2799    pwd->w[0]  = msa_max_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
2800    pwd->w[1]  = msa_max_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
2801    pwd->w[2]  = msa_max_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
2802    pwd->w[3]  = msa_max_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
2803}
2804
2805void helper_msa_max_u_d(CPUMIPSState *env,
2806                        uint32_t wd, uint32_t ws, uint32_t wt)
2807{
2808    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2809    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2810    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2811
2812    pwd->d[0]  = msa_max_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2813    pwd->d[1]  = msa_max_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2814}
2815
2816
2817static inline int64_t msa_min_a_df(uint32_t df, int64_t arg1, int64_t arg2)
2818{
2819    uint64_t abs_arg1 = arg1 >= 0 ? arg1 : -arg1;
2820    uint64_t abs_arg2 = arg2 >= 0 ? arg2 : -arg2;
2821    return abs_arg1 < abs_arg2 ? arg1 : arg2;
2822}
2823
2824void helper_msa_min_a_b(CPUMIPSState *env,
2825                        uint32_t wd, uint32_t ws, uint32_t wt)
2826{
2827    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2828    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2829    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2830
2831    pwd->b[0]  = msa_min_a_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2832    pwd->b[1]  = msa_min_a_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2833    pwd->b[2]  = msa_min_a_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2834    pwd->b[3]  = msa_min_a_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2835    pwd->b[4]  = msa_min_a_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2836    pwd->b[5]  = msa_min_a_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2837    pwd->b[6]  = msa_min_a_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2838    pwd->b[7]  = msa_min_a_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2839    pwd->b[8]  = msa_min_a_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2840    pwd->b[9]  = msa_min_a_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2841    pwd->b[10] = msa_min_a_df(DF_BYTE, pws->b[10], pwt->b[10]);
2842    pwd->b[11] = msa_min_a_df(DF_BYTE, pws->b[11], pwt->b[11]);
2843    pwd->b[12] = msa_min_a_df(DF_BYTE, pws->b[12], pwt->b[12]);
2844    pwd->b[13] = msa_min_a_df(DF_BYTE, pws->b[13], pwt->b[13]);
2845    pwd->b[14] = msa_min_a_df(DF_BYTE, pws->b[14], pwt->b[14]);
2846    pwd->b[15] = msa_min_a_df(DF_BYTE, pws->b[15], pwt->b[15]);
2847}
2848
2849void helper_msa_min_a_h(CPUMIPSState *env,
2850                        uint32_t wd, uint32_t ws, uint32_t wt)
2851{
2852    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2853    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2854    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2855
2856    pwd->h[0]  = msa_min_a_df(DF_HALF, pws->h[0],  pwt->h[0]);
2857    pwd->h[1]  = msa_min_a_df(DF_HALF, pws->h[1],  pwt->h[1]);
2858    pwd->h[2]  = msa_min_a_df(DF_HALF, pws->h[2],  pwt->h[2]);
2859    pwd->h[3]  = msa_min_a_df(DF_HALF, pws->h[3],  pwt->h[3]);
2860    pwd->h[4]  = msa_min_a_df(DF_HALF, pws->h[4],  pwt->h[4]);
2861    pwd->h[5]  = msa_min_a_df(DF_HALF, pws->h[5],  pwt->h[5]);
2862    pwd->h[6]  = msa_min_a_df(DF_HALF, pws->h[6],  pwt->h[6]);
2863    pwd->h[7]  = msa_min_a_df(DF_HALF, pws->h[7],  pwt->h[7]);
2864}
2865
2866void helper_msa_min_a_w(CPUMIPSState *env,
2867                        uint32_t wd, uint32_t ws, uint32_t wt)
2868{
2869    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2870    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2871    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2872
2873    pwd->w[0]  = msa_min_a_df(DF_WORD, pws->w[0],  pwt->w[0]);
2874    pwd->w[1]  = msa_min_a_df(DF_WORD, pws->w[1],  pwt->w[1]);
2875    pwd->w[2]  = msa_min_a_df(DF_WORD, pws->w[2],  pwt->w[2]);
2876    pwd->w[3]  = msa_min_a_df(DF_WORD, pws->w[3],  pwt->w[3]);
2877}
2878
2879void helper_msa_min_a_d(CPUMIPSState *env,
2880                        uint32_t wd, uint32_t ws, uint32_t wt)
2881{
2882    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2883    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2884    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2885
2886    pwd->d[0]  = msa_min_a_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2887    pwd->d[1]  = msa_min_a_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2888}
2889
2890
2891static inline int64_t msa_min_s_df(uint32_t df, int64_t arg1, int64_t arg2)
2892{
2893    return arg1 < arg2 ? arg1 : arg2;
2894}
2895
2896void helper_msa_min_s_b(CPUMIPSState *env,
2897                        uint32_t wd, uint32_t ws, uint32_t wt)
2898{
2899    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2900    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2901    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2902
2903    pwd->b[0]  = msa_min_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2904    pwd->b[1]  = msa_min_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2905    pwd->b[2]  = msa_min_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2906    pwd->b[3]  = msa_min_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2907    pwd->b[4]  = msa_min_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2908    pwd->b[5]  = msa_min_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2909    pwd->b[6]  = msa_min_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2910    pwd->b[7]  = msa_min_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2911    pwd->b[8]  = msa_min_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2912    pwd->b[9]  = msa_min_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2913    pwd->b[10] = msa_min_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
2914    pwd->b[11] = msa_min_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
2915    pwd->b[12] = msa_min_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
2916    pwd->b[13] = msa_min_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
2917    pwd->b[14] = msa_min_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
2918    pwd->b[15] = msa_min_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
2919}
2920
2921void helper_msa_min_s_h(CPUMIPSState *env,
2922                        uint32_t wd, uint32_t ws, uint32_t wt)
2923{
2924    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2925    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2926    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2927
2928    pwd->h[0]  = msa_min_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
2929    pwd->h[1]  = msa_min_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
2930    pwd->h[2]  = msa_min_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
2931    pwd->h[3]  = msa_min_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
2932    pwd->h[4]  = msa_min_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
2933    pwd->h[5]  = msa_min_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
2934    pwd->h[6]  = msa_min_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
2935    pwd->h[7]  = msa_min_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
2936}
2937
2938void helper_msa_min_s_w(CPUMIPSState *env,
2939                        uint32_t wd, uint32_t ws, uint32_t wt)
2940{
2941    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2942    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2943    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2944
2945    pwd->w[0]  = msa_min_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
2946    pwd->w[1]  = msa_min_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
2947    pwd->w[2]  = msa_min_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
2948    pwd->w[3]  = msa_min_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
2949}
2950
2951void helper_msa_min_s_d(CPUMIPSState *env,
2952                        uint32_t wd, uint32_t ws, uint32_t wt)
2953{
2954    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2955    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2956    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2957
2958    pwd->d[0]  = msa_min_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
2959    pwd->d[1]  = msa_min_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
2960}
2961
2962
2963static inline int64_t msa_min_u_df(uint32_t df, int64_t arg1, int64_t arg2)
2964{
2965    uint64_t u_arg1 = UNSIGNED(arg1, df);
2966    uint64_t u_arg2 = UNSIGNED(arg2, df);
2967    return u_arg1 < u_arg2 ? arg1 : arg2;
2968}
2969
2970void helper_msa_min_u_b(CPUMIPSState *env,
2971                        uint32_t wd, uint32_t ws, uint32_t wt)
2972{
2973    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2974    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
2975    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
2976
2977    pwd->b[0]  = msa_min_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
2978    pwd->b[1]  = msa_min_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
2979    pwd->b[2]  = msa_min_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
2980    pwd->b[3]  = msa_min_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
2981    pwd->b[4]  = msa_min_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
2982    pwd->b[5]  = msa_min_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
2983    pwd->b[6]  = msa_min_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
2984    pwd->b[7]  = msa_min_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
2985    pwd->b[8]  = msa_min_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
2986    pwd->b[9]  = msa_min_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
2987    pwd->b[10] = msa_min_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
2988    pwd->b[11] = msa_min_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
2989    pwd->b[12] = msa_min_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
2990    pwd->b[13] = msa_min_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
2991    pwd->b[14] = msa_min_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
2992    pwd->b[15] = msa_min_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
2993}
2994
2995void helper_msa_min_u_h(CPUMIPSState *env,
2996                        uint32_t wd, uint32_t ws, uint32_t wt)
2997{
2998    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
2999    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3000    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3001
3002    pwd->h[0]  = msa_min_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
3003    pwd->h[1]  = msa_min_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
3004    pwd->h[2]  = msa_min_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
3005    pwd->h[3]  = msa_min_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
3006    pwd->h[4]  = msa_min_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
3007    pwd->h[5]  = msa_min_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
3008    pwd->h[6]  = msa_min_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
3009    pwd->h[7]  = msa_min_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
3010}
3011
3012void helper_msa_min_u_w(CPUMIPSState *env,
3013                        uint32_t wd, uint32_t ws, uint32_t wt)
3014{
3015    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3016    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3017    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3018
3019    pwd->w[0]  = msa_min_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
3020    pwd->w[1]  = msa_min_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
3021    pwd->w[2]  = msa_min_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
3022    pwd->w[3]  = msa_min_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
3023}
3024
3025void helper_msa_min_u_d(CPUMIPSState *env,
3026                        uint32_t wd, uint32_t ws, uint32_t wt)
3027{
3028    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3029    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3030    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3031
3032    pwd->d[0]  = msa_min_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3033    pwd->d[1]  = msa_min_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3034}
3035
3036
3037/*
3038 * Int Modulo
3039 * ----------
3040 *
3041 * +---------------+----------------------------------------------------------+
3042 * | MOD_S.B       | Vector Signed Modulo (byte)                              |
3043 * | MOD_S.H       | Vector Signed Modulo (halfword)                          |
3044 * | MOD_S.W       | Vector Signed Modulo (word)                              |
3045 * | MOD_S.D       | Vector Signed Modulo (doubleword)                        |
3046 * | MOD_U.B       | Vector Unsigned Modulo (byte)                            |
3047 * | MOD_U.H       | Vector Unsigned Modulo (halfword)                        |
3048 * | MOD_U.W       | Vector Unsigned Modulo (word)                            |
3049 * | MOD_U.D       | Vector Unsigned Modulo (doubleword)                      |
3050 * +---------------+----------------------------------------------------------+
3051 */
3052
3053static inline int64_t msa_mod_s_df(uint32_t df, int64_t arg1, int64_t arg2)
3054{
3055    if (arg1 == DF_MIN_INT(df) && arg2 == -1) {
3056        return 0;
3057    }
3058    return arg2 ? arg1 % arg2 : arg1;
3059}
3060
3061void helper_msa_mod_s_b(CPUMIPSState *env,
3062                        uint32_t wd, uint32_t ws, uint32_t wt)
3063{
3064    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3065    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3066    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3067
3068    pwd->b[0]  = msa_mod_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3069    pwd->b[1]  = msa_mod_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3070    pwd->b[2]  = msa_mod_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3071    pwd->b[3]  = msa_mod_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3072    pwd->b[4]  = msa_mod_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3073    pwd->b[5]  = msa_mod_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3074    pwd->b[6]  = msa_mod_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3075    pwd->b[7]  = msa_mod_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3076    pwd->b[8]  = msa_mod_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3077    pwd->b[9]  = msa_mod_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3078    pwd->b[10] = msa_mod_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
3079    pwd->b[11] = msa_mod_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
3080    pwd->b[12] = msa_mod_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
3081    pwd->b[13] = msa_mod_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
3082    pwd->b[14] = msa_mod_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
3083    pwd->b[15] = msa_mod_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
3084}
3085
3086void helper_msa_mod_s_h(CPUMIPSState *env,
3087                        uint32_t wd, uint32_t ws, uint32_t wt)
3088{
3089    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3090    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3091    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3092
3093    pwd->h[0]  = msa_mod_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
3094    pwd->h[1]  = msa_mod_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
3095    pwd->h[2]  = msa_mod_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
3096    pwd->h[3]  = msa_mod_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
3097    pwd->h[4]  = msa_mod_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
3098    pwd->h[5]  = msa_mod_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
3099    pwd->h[6]  = msa_mod_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
3100    pwd->h[7]  = msa_mod_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
3101}
3102
3103void helper_msa_mod_s_w(CPUMIPSState *env,
3104                        uint32_t wd, uint32_t ws, uint32_t wt)
3105{
3106    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3107    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3108    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3109
3110    pwd->w[0]  = msa_mod_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
3111    pwd->w[1]  = msa_mod_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
3112    pwd->w[2]  = msa_mod_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
3113    pwd->w[3]  = msa_mod_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
3114}
3115
3116void helper_msa_mod_s_d(CPUMIPSState *env,
3117                        uint32_t wd, uint32_t ws, uint32_t wt)
3118{
3119    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3120    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3121    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3122
3123    pwd->d[0]  = msa_mod_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3124    pwd->d[1]  = msa_mod_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3125}
3126
3127static inline int64_t msa_mod_u_df(uint32_t df, int64_t arg1, int64_t arg2)
3128{
3129    uint64_t u_arg1 = UNSIGNED(arg1, df);
3130    uint64_t u_arg2 = UNSIGNED(arg2, df);
3131    return u_arg2 ? u_arg1 % u_arg2 : u_arg1;
3132}
3133
3134void helper_msa_mod_u_b(CPUMIPSState *env,
3135                        uint32_t wd, uint32_t ws, uint32_t wt)
3136{
3137    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3138    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3139    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3140
3141    pwd->b[0]  = msa_mod_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3142    pwd->b[1]  = msa_mod_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3143    pwd->b[2]  = msa_mod_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3144    pwd->b[3]  = msa_mod_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3145    pwd->b[4]  = msa_mod_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3146    pwd->b[5]  = msa_mod_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3147    pwd->b[6]  = msa_mod_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3148    pwd->b[7]  = msa_mod_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3149    pwd->b[8]  = msa_mod_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3150    pwd->b[9]  = msa_mod_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3151    pwd->b[10] = msa_mod_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
3152    pwd->b[11] = msa_mod_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
3153    pwd->b[12] = msa_mod_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
3154    pwd->b[13] = msa_mod_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
3155    pwd->b[14] = msa_mod_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
3156    pwd->b[15] = msa_mod_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
3157}
3158
3159void helper_msa_mod_u_h(CPUMIPSState *env,
3160                        uint32_t wd, uint32_t ws, uint32_t wt)
3161{
3162    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3163    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3164    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3165
3166    pwd->h[0]  = msa_mod_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
3167    pwd->h[1]  = msa_mod_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
3168    pwd->h[2]  = msa_mod_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
3169    pwd->h[3]  = msa_mod_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
3170    pwd->h[4]  = msa_mod_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
3171    pwd->h[5]  = msa_mod_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
3172    pwd->h[6]  = msa_mod_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
3173    pwd->h[7]  = msa_mod_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
3174}
3175
3176void helper_msa_mod_u_w(CPUMIPSState *env,
3177                        uint32_t wd, uint32_t ws, uint32_t wt)
3178{
3179    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3180    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3181    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3182
3183    pwd->w[0]  = msa_mod_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
3184    pwd->w[1]  = msa_mod_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
3185    pwd->w[2]  = msa_mod_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
3186    pwd->w[3]  = msa_mod_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
3187}
3188
3189void helper_msa_mod_u_d(CPUMIPSState *env,
3190                        uint32_t wd, uint32_t ws, uint32_t wt)
3191{
3192    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3193    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3194    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3195
3196    pwd->d[0]  = msa_mod_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3197    pwd->d[1]  = msa_mod_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3198}
3199
3200
3201/*
3202 * Int Multiply
3203 * ------------
3204 *
3205 * +---------------+----------------------------------------------------------+
3206 * | MADDV.B       | Vector Multiply and Add (byte)                           |
3207 * | MADDV.H       | Vector Multiply and Add (halfword)                       |
3208 * | MADDV.W       | Vector Multiply and Add (word)                           |
3209 * | MADDV.D       | Vector Multiply and Add (doubleword)                     |
3210 * | MSUBV.B       | Vector Multiply and Subtract (byte)                      |
3211 * | MSUBV.H       | Vector Multiply and Subtract (halfword)                  |
3212 * | MSUBV.W       | Vector Multiply and Subtract (word)                      |
3213 * | MSUBV.D       | Vector Multiply and Subtract (doubleword)                |
3214 * | MULV.B        | Vector Multiply (byte)                                   |
3215 * | MULV.H        | Vector Multiply (halfword)                               |
3216 * | MULV.W        | Vector Multiply (word)                                   |
3217 * | MULV.D        | Vector Multiply (doubleword)                             |
3218 * +---------------+----------------------------------------------------------+
3219 */
3220
3221static inline int64_t msa_maddv_df(uint32_t df, int64_t dest, int64_t arg1,
3222                                   int64_t arg2)
3223{
3224    return dest + arg1 * arg2;
3225}
3226
3227void helper_msa_maddv_b(CPUMIPSState *env,
3228                        uint32_t wd, uint32_t ws, uint32_t wt)
3229{
3230    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3231    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3232    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3233
3234    pwd->b[0]  = msa_maddv_df(DF_BYTE, pwd->b[0],  pws->b[0],  pwt->b[0]);
3235    pwd->b[1]  = msa_maddv_df(DF_BYTE, pwd->b[1],  pws->b[1],  pwt->b[1]);
3236    pwd->b[2]  = msa_maddv_df(DF_BYTE, pwd->b[2],  pws->b[2],  pwt->b[2]);
3237    pwd->b[3]  = msa_maddv_df(DF_BYTE, pwd->b[3],  pws->b[3],  pwt->b[3]);
3238    pwd->b[4]  = msa_maddv_df(DF_BYTE, pwd->b[4],  pws->b[4],  pwt->b[4]);
3239    pwd->b[5]  = msa_maddv_df(DF_BYTE, pwd->b[5],  pws->b[5],  pwt->b[5]);
3240    pwd->b[6]  = msa_maddv_df(DF_BYTE, pwd->b[6],  pws->b[6],  pwt->b[6]);
3241    pwd->b[7]  = msa_maddv_df(DF_BYTE, pwd->b[7],  pws->b[7],  pwt->b[7]);
3242    pwd->b[8]  = msa_maddv_df(DF_BYTE, pwd->b[8],  pws->b[8],  pwt->b[8]);
3243    pwd->b[9]  = msa_maddv_df(DF_BYTE, pwd->b[9],  pws->b[9],  pwt->b[9]);
3244    pwd->b[10] = msa_maddv_df(DF_BYTE, pwd->b[10], pws->b[10], pwt->b[10]);
3245    pwd->b[11] = msa_maddv_df(DF_BYTE, pwd->b[11], pws->b[11], pwt->b[11]);
3246    pwd->b[12] = msa_maddv_df(DF_BYTE, pwd->b[12], pws->b[12], pwt->b[12]);
3247    pwd->b[13] = msa_maddv_df(DF_BYTE, pwd->b[13], pws->b[13], pwt->b[13]);
3248    pwd->b[14] = msa_maddv_df(DF_BYTE, pwd->b[14], pws->b[14], pwt->b[14]);
3249    pwd->b[15] = msa_maddv_df(DF_BYTE, pwd->b[15], pws->b[15], pwt->b[15]);
3250}
3251
3252void helper_msa_maddv_h(CPUMIPSState *env,
3253                        uint32_t wd, uint32_t ws, uint32_t wt)
3254{
3255    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3256    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3257    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3258
3259    pwd->h[0]  = msa_maddv_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
3260    pwd->h[1]  = msa_maddv_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
3261    pwd->h[2]  = msa_maddv_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
3262    pwd->h[3]  = msa_maddv_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
3263    pwd->h[4]  = msa_maddv_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
3264    pwd->h[5]  = msa_maddv_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
3265    pwd->h[6]  = msa_maddv_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
3266    pwd->h[7]  = msa_maddv_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
3267}
3268
3269void helper_msa_maddv_w(CPUMIPSState *env,
3270                        uint32_t wd, uint32_t ws, uint32_t wt)
3271{
3272    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3273    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3274    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3275
3276    pwd->w[0]  = msa_maddv_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
3277    pwd->w[1]  = msa_maddv_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
3278    pwd->w[2]  = msa_maddv_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
3279    pwd->w[3]  = msa_maddv_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
3280}
3281
3282void helper_msa_maddv_d(CPUMIPSState *env,
3283                        uint32_t wd, uint32_t ws, uint32_t wt)
3284{
3285    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3286    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3287    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3288
3289    pwd->d[0]  = msa_maddv_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
3290    pwd->d[1]  = msa_maddv_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
3291}
3292
3293static inline int64_t msa_msubv_df(uint32_t df, int64_t dest, int64_t arg1,
3294                                   int64_t arg2)
3295{
3296    return dest - arg1 * arg2;
3297}
3298
3299void helper_msa_msubv_b(CPUMIPSState *env,
3300                        uint32_t wd, uint32_t ws, uint32_t wt)
3301{
3302    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3303    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3304    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3305
3306    pwd->b[0]  = msa_msubv_df(DF_BYTE, pwd->b[0],  pws->b[0],  pwt->b[0]);
3307    pwd->b[1]  = msa_msubv_df(DF_BYTE, pwd->b[1],  pws->b[1],  pwt->b[1]);
3308    pwd->b[2]  = msa_msubv_df(DF_BYTE, pwd->b[2],  pws->b[2],  pwt->b[2]);
3309    pwd->b[3]  = msa_msubv_df(DF_BYTE, pwd->b[3],  pws->b[3],  pwt->b[3]);
3310    pwd->b[4]  = msa_msubv_df(DF_BYTE, pwd->b[4],  pws->b[4],  pwt->b[4]);
3311    pwd->b[5]  = msa_msubv_df(DF_BYTE, pwd->b[5],  pws->b[5],  pwt->b[5]);
3312    pwd->b[6]  = msa_msubv_df(DF_BYTE, pwd->b[6],  pws->b[6],  pwt->b[6]);
3313    pwd->b[7]  = msa_msubv_df(DF_BYTE, pwd->b[7],  pws->b[7],  pwt->b[7]);
3314    pwd->b[8]  = msa_msubv_df(DF_BYTE, pwd->b[8],  pws->b[8],  pwt->b[8]);
3315    pwd->b[9]  = msa_msubv_df(DF_BYTE, pwd->b[9],  pws->b[9],  pwt->b[9]);
3316    pwd->b[10] = msa_msubv_df(DF_BYTE, pwd->b[10], pws->b[10], pwt->b[10]);
3317    pwd->b[11] = msa_msubv_df(DF_BYTE, pwd->b[11], pws->b[11], pwt->b[11]);
3318    pwd->b[12] = msa_msubv_df(DF_BYTE, pwd->b[12], pws->b[12], pwt->b[12]);
3319    pwd->b[13] = msa_msubv_df(DF_BYTE, pwd->b[13], pws->b[13], pwt->b[13]);
3320    pwd->b[14] = msa_msubv_df(DF_BYTE, pwd->b[14], pws->b[14], pwt->b[14]);
3321    pwd->b[15] = msa_msubv_df(DF_BYTE, pwd->b[15], pws->b[15], pwt->b[15]);
3322}
3323
3324void helper_msa_msubv_h(CPUMIPSState *env,
3325                        uint32_t wd, uint32_t ws, uint32_t wt)
3326{
3327    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3328    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3329    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3330
3331    pwd->h[0]  = msa_msubv_df(DF_HALF, pwd->h[0],  pws->h[0],  pwt->h[0]);
3332    pwd->h[1]  = msa_msubv_df(DF_HALF, pwd->h[1],  pws->h[1],  pwt->h[1]);
3333    pwd->h[2]  = msa_msubv_df(DF_HALF, pwd->h[2],  pws->h[2],  pwt->h[2]);
3334    pwd->h[3]  = msa_msubv_df(DF_HALF, pwd->h[3],  pws->h[3],  pwt->h[3]);
3335    pwd->h[4]  = msa_msubv_df(DF_HALF, pwd->h[4],  pws->h[4],  pwt->h[4]);
3336    pwd->h[5]  = msa_msubv_df(DF_HALF, pwd->h[5],  pws->h[5],  pwt->h[5]);
3337    pwd->h[6]  = msa_msubv_df(DF_HALF, pwd->h[6],  pws->h[6],  pwt->h[6]);
3338    pwd->h[7]  = msa_msubv_df(DF_HALF, pwd->h[7],  pws->h[7],  pwt->h[7]);
3339}
3340
3341void helper_msa_msubv_w(CPUMIPSState *env,
3342                        uint32_t wd, uint32_t ws, uint32_t wt)
3343{
3344    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3345    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3346    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3347
3348    pwd->w[0]  = msa_msubv_df(DF_WORD, pwd->w[0],  pws->w[0],  pwt->w[0]);
3349    pwd->w[1]  = msa_msubv_df(DF_WORD, pwd->w[1],  pws->w[1],  pwt->w[1]);
3350    pwd->w[2]  = msa_msubv_df(DF_WORD, pwd->w[2],  pws->w[2],  pwt->w[2]);
3351    pwd->w[3]  = msa_msubv_df(DF_WORD, pwd->w[3],  pws->w[3],  pwt->w[3]);
3352}
3353
3354void helper_msa_msubv_d(CPUMIPSState *env,
3355                        uint32_t wd, uint32_t ws, uint32_t wt)
3356{
3357    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3358    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3359    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3360
3361    pwd->d[0]  = msa_msubv_df(DF_DOUBLE, pwd->d[0],  pws->d[0],  pwt->d[0]);
3362    pwd->d[1]  = msa_msubv_df(DF_DOUBLE, pwd->d[1],  pws->d[1],  pwt->d[1]);
3363}
3364
3365
3366static inline int64_t msa_mulv_df(uint32_t df, int64_t arg1, int64_t arg2)
3367{
3368    return arg1 * arg2;
3369}
3370
3371void helper_msa_mulv_b(CPUMIPSState *env,
3372                       uint32_t wd, uint32_t ws, uint32_t wt)
3373{
3374    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3375    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3376    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3377
3378    pwd->b[0]  = msa_mulv_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3379    pwd->b[1]  = msa_mulv_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3380    pwd->b[2]  = msa_mulv_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3381    pwd->b[3]  = msa_mulv_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3382    pwd->b[4]  = msa_mulv_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3383    pwd->b[5]  = msa_mulv_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3384    pwd->b[6]  = msa_mulv_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3385    pwd->b[7]  = msa_mulv_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3386    pwd->b[8]  = msa_mulv_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3387    pwd->b[9]  = msa_mulv_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3388    pwd->b[10] = msa_mulv_df(DF_BYTE, pws->b[10], pwt->b[10]);
3389    pwd->b[11] = msa_mulv_df(DF_BYTE, pws->b[11], pwt->b[11]);
3390    pwd->b[12] = msa_mulv_df(DF_BYTE, pws->b[12], pwt->b[12]);
3391    pwd->b[13] = msa_mulv_df(DF_BYTE, pws->b[13], pwt->b[13]);
3392    pwd->b[14] = msa_mulv_df(DF_BYTE, pws->b[14], pwt->b[14]);
3393    pwd->b[15] = msa_mulv_df(DF_BYTE, pws->b[15], pwt->b[15]);
3394}
3395
3396void helper_msa_mulv_h(CPUMIPSState *env,
3397                       uint32_t wd, uint32_t ws, uint32_t wt)
3398{
3399    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3400    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3401    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3402
3403    pwd->h[0]  = msa_mulv_df(DF_HALF, pws->h[0],  pwt->h[0]);
3404    pwd->h[1]  = msa_mulv_df(DF_HALF, pws->h[1],  pwt->h[1]);
3405    pwd->h[2]  = msa_mulv_df(DF_HALF, pws->h[2],  pwt->h[2]);
3406    pwd->h[3]  = msa_mulv_df(DF_HALF, pws->h[3],  pwt->h[3]);
3407    pwd->h[4]  = msa_mulv_df(DF_HALF, pws->h[4],  pwt->h[4]);
3408    pwd->h[5]  = msa_mulv_df(DF_HALF, pws->h[5],  pwt->h[5]);
3409    pwd->h[6]  = msa_mulv_df(DF_HALF, pws->h[6],  pwt->h[6]);
3410    pwd->h[7]  = msa_mulv_df(DF_HALF, pws->h[7],  pwt->h[7]);
3411}
3412
3413void helper_msa_mulv_w(CPUMIPSState *env,
3414                       uint32_t wd, uint32_t ws, uint32_t wt)
3415{
3416    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3417    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3418    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3419
3420    pwd->w[0]  = msa_mulv_df(DF_WORD, pws->w[0],  pwt->w[0]);
3421    pwd->w[1]  = msa_mulv_df(DF_WORD, pws->w[1],  pwt->w[1]);
3422    pwd->w[2]  = msa_mulv_df(DF_WORD, pws->w[2],  pwt->w[2]);
3423    pwd->w[3]  = msa_mulv_df(DF_WORD, pws->w[3],  pwt->w[3]);
3424}
3425
3426void helper_msa_mulv_d(CPUMIPSState *env,
3427                       uint32_t wd, uint32_t ws, uint32_t wt)
3428{
3429    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3430    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3431    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3432
3433    pwd->d[0]  = msa_mulv_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3434    pwd->d[1]  = msa_mulv_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3435}
3436
3437
3438/*
3439 * Int Subtract
3440 * ------------
3441 *
3442 * +---------------+----------------------------------------------------------+
3443 * | ASUB_S.B      | Vector Absolute Values of Signed Subtract (byte)         |
3444 * | ASUB_S.H      | Vector Absolute Values of Signed Subtract (halfword)     |
3445 * | ASUB_S.W      | Vector Absolute Values of Signed Subtract (word)         |
3446 * | ASUB_S.D      | Vector Absolute Values of Signed Subtract (doubleword)   |
3447 * | ASUB_U.B      | Vector Absolute Values of Unsigned Subtract (byte)       |
3448 * | ASUB_U.H      | Vector Absolute Values of Unsigned Subtract (halfword)   |
3449 * | ASUB_U.W      | Vector Absolute Values of Unsigned Subtract (word)       |
3450 * | ASUB_U.D      | Vector Absolute Values of Unsigned Subtract (doubleword) |
3451 * | HSUB_S.H      | Vector Signed Horizontal Subtract (halfword)             |
3452 * | HSUB_S.W      | Vector Signed Horizontal Subtract (word)                 |
3453 * | HSUB_S.D      | Vector Signed Horizontal Subtract (doubleword)           |
3454 * | HSUB_U.H      | Vector Unigned Horizontal Subtract (halfword)            |
3455 * | HSUB_U.W      | Vector Unigned Horizontal Subtract (word)                |
3456 * | HSUB_U.D      | Vector Unigned Horizontal Subtract (doubleword)          |
3457 * | SUBS_S.B      | Vector Signed Saturated Subtract (of Signed) (byte)      |
3458 * | SUBS_S.H      | Vector Signed Saturated Subtract (of Signed) (halfword)  |
3459 * | SUBS_S.W      | Vector Signed Saturated Subtract (of Signed) (word)      |
3460 * | SUBS_S.D      | Vector Signed Saturated Subtract (of Signed) (doubleword)|
3461 * | SUBS_U.B      | Vector Unsigned Saturated Subtract (of Uns.) (byte)      |
3462 * | SUBS_U.H      | Vector Unsigned Saturated Subtract (of Uns.) (halfword)  |
3463 * | SUBS_U.W      | Vector Unsigned Saturated Subtract (of Uns.) (word)      |
3464 * | SUBS_U.D      | Vector Unsigned Saturated Subtract (of Uns.) (doubleword)|
3465 * | SUBSUS_U.B    | Vector Uns. Sat. Subtract (of S. from Uns.) (byte)       |
3466 * | SUBSUS_U.H    | Vector Uns. Sat. Subtract (of S. from Uns.) (halfword)   |
3467 * | SUBSUS_U.W    | Vector Uns. Sat. Subtract (of S. from Uns.) (word)       |
3468 * | SUBSUS_U.D    | Vector Uns. Sat. Subtract (of S. from Uns.) (doubleword) |
3469 * | SUBSUU_S.B    | Vector Signed Saturated Subtract (of Uns.) (byte)        |
3470 * | SUBSUU_S.H    | Vector Signed Saturated Subtract (of Uns.) (halfword)    |
3471 * | SUBSUU_S.W    | Vector Signed Saturated Subtract (of Uns.) (word)        |
3472 * | SUBSUU_S.D    | Vector Signed Saturated Subtract (of Uns.) (doubleword)  |
3473 * | SUBV.B        | Vector Subtract (byte)                                   |
3474 * | SUBV.H        | Vector Subtract (halfword)                               |
3475 * | SUBV.W        | Vector Subtract (word)                                   |
3476 * | SUBV.D        | Vector Subtract (doubleword)                             |
3477 * +---------------+----------------------------------------------------------+
3478 */
3479
3480
3481static inline int64_t msa_asub_s_df(uint32_t df, int64_t arg1, int64_t arg2)
3482{
3483    /* signed compare */
3484    return (arg1 < arg2) ?
3485        (uint64_t)(arg2 - arg1) : (uint64_t)(arg1 - arg2);
3486}
3487
3488void helper_msa_asub_s_b(CPUMIPSState *env,
3489                         uint32_t wd, uint32_t ws, uint32_t wt)
3490{
3491    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3492    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3493    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3494
3495    pwd->b[0]  = msa_asub_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3496    pwd->b[1]  = msa_asub_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3497    pwd->b[2]  = msa_asub_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3498    pwd->b[3]  = msa_asub_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3499    pwd->b[4]  = msa_asub_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3500    pwd->b[5]  = msa_asub_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3501    pwd->b[6]  = msa_asub_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3502    pwd->b[7]  = msa_asub_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3503    pwd->b[8]  = msa_asub_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3504    pwd->b[9]  = msa_asub_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3505    pwd->b[10] = msa_asub_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
3506    pwd->b[11] = msa_asub_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
3507    pwd->b[12] = msa_asub_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
3508    pwd->b[13] = msa_asub_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
3509    pwd->b[14] = msa_asub_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
3510    pwd->b[15] = msa_asub_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
3511}
3512
3513void helper_msa_asub_s_h(CPUMIPSState *env,
3514                         uint32_t wd, uint32_t ws, uint32_t wt)
3515{
3516    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3517    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3518    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3519
3520    pwd->h[0]  = msa_asub_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
3521    pwd->h[1]  = msa_asub_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
3522    pwd->h[2]  = msa_asub_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
3523    pwd->h[3]  = msa_asub_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
3524    pwd->h[4]  = msa_asub_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
3525    pwd->h[5]  = msa_asub_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
3526    pwd->h[6]  = msa_asub_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
3527    pwd->h[7]  = msa_asub_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
3528}
3529
3530void helper_msa_asub_s_w(CPUMIPSState *env,
3531                         uint32_t wd, uint32_t ws, uint32_t wt)
3532{
3533    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3534    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3535    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3536
3537    pwd->w[0]  = msa_asub_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
3538    pwd->w[1]  = msa_asub_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
3539    pwd->w[2]  = msa_asub_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
3540    pwd->w[3]  = msa_asub_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
3541}
3542
3543void helper_msa_asub_s_d(CPUMIPSState *env,
3544                         uint32_t wd, uint32_t ws, uint32_t wt)
3545{
3546    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3547    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3548    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3549
3550    pwd->d[0]  = msa_asub_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3551    pwd->d[1]  = msa_asub_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3552}
3553
3554
3555static inline uint64_t msa_asub_u_df(uint32_t df, uint64_t arg1, uint64_t arg2)
3556{
3557    uint64_t u_arg1 = UNSIGNED(arg1, df);
3558    uint64_t u_arg2 = UNSIGNED(arg2, df);
3559    /* unsigned compare */
3560    return (u_arg1 < u_arg2) ?
3561        (uint64_t)(u_arg2 - u_arg1) : (uint64_t)(u_arg1 - u_arg2);
3562}
3563
3564void helper_msa_asub_u_b(CPUMIPSState *env,
3565                         uint32_t wd, uint32_t ws, uint32_t wt)
3566{
3567    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3568    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3569    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3570
3571    pwd->b[0]  = msa_asub_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3572    pwd->b[1]  = msa_asub_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3573    pwd->b[2]  = msa_asub_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3574    pwd->b[3]  = msa_asub_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3575    pwd->b[4]  = msa_asub_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3576    pwd->b[5]  = msa_asub_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3577    pwd->b[6]  = msa_asub_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3578    pwd->b[7]  = msa_asub_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3579    pwd->b[8]  = msa_asub_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3580    pwd->b[9]  = msa_asub_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3581    pwd->b[10] = msa_asub_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
3582    pwd->b[11] = msa_asub_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
3583    pwd->b[12] = msa_asub_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
3584    pwd->b[13] = msa_asub_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
3585    pwd->b[14] = msa_asub_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
3586    pwd->b[15] = msa_asub_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
3587}
3588
3589void helper_msa_asub_u_h(CPUMIPSState *env,
3590                         uint32_t wd, uint32_t ws, uint32_t wt)
3591{
3592    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3593    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3594    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3595
3596    pwd->h[0]  = msa_asub_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
3597    pwd->h[1]  = msa_asub_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
3598    pwd->h[2]  = msa_asub_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
3599    pwd->h[3]  = msa_asub_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
3600    pwd->h[4]  = msa_asub_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
3601    pwd->h[5]  = msa_asub_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
3602    pwd->h[6]  = msa_asub_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
3603    pwd->h[7]  = msa_asub_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
3604}
3605
3606void helper_msa_asub_u_w(CPUMIPSState *env,
3607                         uint32_t wd, uint32_t ws, uint32_t wt)
3608{
3609    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3610    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3611    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3612
3613    pwd->w[0]  = msa_asub_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
3614    pwd->w[1]  = msa_asub_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
3615    pwd->w[2]  = msa_asub_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
3616    pwd->w[3]  = msa_asub_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
3617}
3618
3619void helper_msa_asub_u_d(CPUMIPSState *env,
3620                         uint32_t wd, uint32_t ws, uint32_t wt)
3621{
3622    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3623    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3624    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3625
3626    pwd->d[0]  = msa_asub_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3627    pwd->d[1]  = msa_asub_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3628}
3629
3630
3631static inline int64_t msa_hsub_s_df(uint32_t df, int64_t arg1, int64_t arg2)
3632{
3633    return SIGNED_ODD(arg1, df) - SIGNED_EVEN(arg2, df);
3634}
3635
3636void helper_msa_hsub_s_h(CPUMIPSState *env,
3637                         uint32_t wd, uint32_t ws, uint32_t wt)
3638{
3639    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3640    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3641    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3642
3643    pwd->h[0]  = msa_hsub_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
3644    pwd->h[1]  = msa_hsub_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
3645    pwd->h[2]  = msa_hsub_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
3646    pwd->h[3]  = msa_hsub_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
3647    pwd->h[4]  = msa_hsub_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
3648    pwd->h[5]  = msa_hsub_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
3649    pwd->h[6]  = msa_hsub_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
3650    pwd->h[7]  = msa_hsub_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
3651}
3652
3653void helper_msa_hsub_s_w(CPUMIPSState *env,
3654                         uint32_t wd, uint32_t ws, uint32_t wt)
3655{
3656    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3657    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3658    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3659
3660    pwd->w[0]  = msa_hsub_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
3661    pwd->w[1]  = msa_hsub_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
3662    pwd->w[2]  = msa_hsub_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
3663    pwd->w[3]  = msa_hsub_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
3664}
3665
3666void helper_msa_hsub_s_d(CPUMIPSState *env,
3667                         uint32_t wd, uint32_t ws, uint32_t wt)
3668{
3669    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3670    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3671    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3672
3673    pwd->d[0]  = msa_hsub_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3674    pwd->d[1]  = msa_hsub_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3675}
3676
3677
3678static inline int64_t msa_hsub_u_df(uint32_t df, int64_t arg1, int64_t arg2)
3679{
3680    return UNSIGNED_ODD(arg1, df) - UNSIGNED_EVEN(arg2, df);
3681}
3682
3683void helper_msa_hsub_u_h(CPUMIPSState *env,
3684                         uint32_t wd, uint32_t ws, uint32_t wt)
3685{
3686    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3687    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3688    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3689
3690    pwd->h[0]  = msa_hsub_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
3691    pwd->h[1]  = msa_hsub_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
3692    pwd->h[2]  = msa_hsub_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
3693    pwd->h[3]  = msa_hsub_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
3694    pwd->h[4]  = msa_hsub_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
3695    pwd->h[5]  = msa_hsub_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
3696    pwd->h[6]  = msa_hsub_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
3697    pwd->h[7]  = msa_hsub_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
3698}
3699
3700void helper_msa_hsub_u_w(CPUMIPSState *env,
3701                         uint32_t wd, uint32_t ws, uint32_t wt)
3702{
3703    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3704    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3705    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3706
3707    pwd->w[0]  = msa_hsub_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
3708    pwd->w[1]  = msa_hsub_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
3709    pwd->w[2]  = msa_hsub_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
3710    pwd->w[3]  = msa_hsub_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
3711}
3712
3713void helper_msa_hsub_u_d(CPUMIPSState *env,
3714                         uint32_t wd, uint32_t ws, uint32_t wt)
3715{
3716    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3717    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3718    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3719
3720    pwd->d[0]  = msa_hsub_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3721    pwd->d[1]  = msa_hsub_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3722}
3723
3724
3725static inline int64_t msa_subs_s_df(uint32_t df, int64_t arg1, int64_t arg2)
3726{
3727    int64_t max_int = DF_MAX_INT(df);
3728    int64_t min_int = DF_MIN_INT(df);
3729    if (arg2 > 0) {
3730        return (min_int + arg2 < arg1) ? arg1 - arg2 : min_int;
3731    } else {
3732        return (arg1 < max_int + arg2) ? arg1 - arg2 : max_int;
3733    }
3734}
3735
3736void helper_msa_subs_s_b(CPUMIPSState *env,
3737                         uint32_t wd, uint32_t ws, uint32_t wt)
3738{
3739    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3740    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3741    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3742
3743    pwd->b[0]  = msa_subs_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3744    pwd->b[1]  = msa_subs_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3745    pwd->b[2]  = msa_subs_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3746    pwd->b[3]  = msa_subs_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3747    pwd->b[4]  = msa_subs_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3748    pwd->b[5]  = msa_subs_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3749    pwd->b[6]  = msa_subs_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3750    pwd->b[7]  = msa_subs_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3751    pwd->b[8]  = msa_subs_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3752    pwd->b[9]  = msa_subs_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3753    pwd->b[10] = msa_subs_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
3754    pwd->b[11] = msa_subs_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
3755    pwd->b[12] = msa_subs_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
3756    pwd->b[13] = msa_subs_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
3757    pwd->b[14] = msa_subs_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
3758    pwd->b[15] = msa_subs_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
3759}
3760
3761void helper_msa_subs_s_h(CPUMIPSState *env,
3762                         uint32_t wd, uint32_t ws, uint32_t wt)
3763{
3764    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3765    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3766    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3767
3768    pwd->h[0]  = msa_subs_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
3769    pwd->h[1]  = msa_subs_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
3770    pwd->h[2]  = msa_subs_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
3771    pwd->h[3]  = msa_subs_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
3772    pwd->h[4]  = msa_subs_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
3773    pwd->h[5]  = msa_subs_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
3774    pwd->h[6]  = msa_subs_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
3775    pwd->h[7]  = msa_subs_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
3776}
3777
3778void helper_msa_subs_s_w(CPUMIPSState *env,
3779                         uint32_t wd, uint32_t ws, uint32_t wt)
3780{
3781    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3782    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3783    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3784
3785    pwd->w[0]  = msa_subs_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
3786    pwd->w[1]  = msa_subs_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
3787    pwd->w[2]  = msa_subs_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
3788    pwd->w[3]  = msa_subs_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
3789}
3790
3791void helper_msa_subs_s_d(CPUMIPSState *env,
3792                         uint32_t wd, uint32_t ws, uint32_t wt)
3793{
3794    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3795    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3796    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3797
3798    pwd->d[0]  = msa_subs_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3799    pwd->d[1]  = msa_subs_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3800}
3801
3802
3803static inline int64_t msa_subs_u_df(uint32_t df, int64_t arg1, int64_t arg2)
3804{
3805    uint64_t u_arg1 = UNSIGNED(arg1, df);
3806    uint64_t u_arg2 = UNSIGNED(arg2, df);
3807    return (u_arg1 > u_arg2) ? u_arg1 - u_arg2 : 0;
3808}
3809
3810void helper_msa_subs_u_b(CPUMIPSState *env,
3811                         uint32_t wd, uint32_t ws, uint32_t wt)
3812{
3813    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3814    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3815    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3816
3817    pwd->b[0]  = msa_subs_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3818    pwd->b[1]  = msa_subs_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3819    pwd->b[2]  = msa_subs_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3820    pwd->b[3]  = msa_subs_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3821    pwd->b[4]  = msa_subs_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3822    pwd->b[5]  = msa_subs_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3823    pwd->b[6]  = msa_subs_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3824    pwd->b[7]  = msa_subs_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3825    pwd->b[8]  = msa_subs_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3826    pwd->b[9]  = msa_subs_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3827    pwd->b[10] = msa_subs_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
3828    pwd->b[11] = msa_subs_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
3829    pwd->b[12] = msa_subs_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
3830    pwd->b[13] = msa_subs_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
3831    pwd->b[14] = msa_subs_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
3832    pwd->b[15] = msa_subs_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
3833}
3834
3835void helper_msa_subs_u_h(CPUMIPSState *env,
3836                         uint32_t wd, uint32_t ws, uint32_t wt)
3837{
3838    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3839    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3840    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3841
3842    pwd->h[0]  = msa_subs_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
3843    pwd->h[1]  = msa_subs_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
3844    pwd->h[2]  = msa_subs_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
3845    pwd->h[3]  = msa_subs_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
3846    pwd->h[4]  = msa_subs_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
3847    pwd->h[5]  = msa_subs_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
3848    pwd->h[6]  = msa_subs_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
3849    pwd->h[7]  = msa_subs_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
3850}
3851
3852void helper_msa_subs_u_w(CPUMIPSState *env,
3853                         uint32_t wd, uint32_t ws, uint32_t wt)
3854{
3855    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3856    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3857    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3858
3859    pwd->w[0]  = msa_subs_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
3860    pwd->w[1]  = msa_subs_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
3861    pwd->w[2]  = msa_subs_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
3862    pwd->w[3]  = msa_subs_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
3863}
3864
3865void helper_msa_subs_u_d(CPUMIPSState *env,
3866                         uint32_t wd, uint32_t ws, uint32_t wt)
3867{
3868    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3869    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3870    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3871
3872    pwd->d[0]  = msa_subs_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3873    pwd->d[1]  = msa_subs_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3874}
3875
3876
3877static inline int64_t msa_subsus_u_df(uint32_t df, int64_t arg1, int64_t arg2)
3878{
3879    uint64_t u_arg1 = UNSIGNED(arg1, df);
3880    uint64_t max_uint = DF_MAX_UINT(df);
3881    if (arg2 >= 0) {
3882        uint64_t u_arg2 = (uint64_t)arg2;
3883        return (u_arg1 > u_arg2) ?
3884            (int64_t)(u_arg1 - u_arg2) :
3885            0;
3886    } else {
3887        uint64_t u_arg2 = (uint64_t)(-arg2);
3888        return (u_arg1 < max_uint - u_arg2) ?
3889            (int64_t)(u_arg1 + u_arg2) :
3890            (int64_t)max_uint;
3891    }
3892}
3893
3894void helper_msa_subsus_u_b(CPUMIPSState *env,
3895                           uint32_t wd, uint32_t ws, uint32_t wt)
3896{
3897    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3898    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3899    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3900
3901    pwd->b[0]  = msa_subsus_u_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3902    pwd->b[1]  = msa_subsus_u_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3903    pwd->b[2]  = msa_subsus_u_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3904    pwd->b[3]  = msa_subsus_u_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3905    pwd->b[4]  = msa_subsus_u_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3906    pwd->b[5]  = msa_subsus_u_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3907    pwd->b[6]  = msa_subsus_u_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3908    pwd->b[7]  = msa_subsus_u_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3909    pwd->b[8]  = msa_subsus_u_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3910    pwd->b[9]  = msa_subsus_u_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3911    pwd->b[10] = msa_subsus_u_df(DF_BYTE, pws->b[10], pwt->b[10]);
3912    pwd->b[11] = msa_subsus_u_df(DF_BYTE, pws->b[11], pwt->b[11]);
3913    pwd->b[12] = msa_subsus_u_df(DF_BYTE, pws->b[12], pwt->b[12]);
3914    pwd->b[13] = msa_subsus_u_df(DF_BYTE, pws->b[13], pwt->b[13]);
3915    pwd->b[14] = msa_subsus_u_df(DF_BYTE, pws->b[14], pwt->b[14]);
3916    pwd->b[15] = msa_subsus_u_df(DF_BYTE, pws->b[15], pwt->b[15]);
3917}
3918
3919void helper_msa_subsus_u_h(CPUMIPSState *env,
3920                           uint32_t wd, uint32_t ws, uint32_t wt)
3921{
3922    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3923    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3924    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3925
3926    pwd->h[0]  = msa_subsus_u_df(DF_HALF, pws->h[0],  pwt->h[0]);
3927    pwd->h[1]  = msa_subsus_u_df(DF_HALF, pws->h[1],  pwt->h[1]);
3928    pwd->h[2]  = msa_subsus_u_df(DF_HALF, pws->h[2],  pwt->h[2]);
3929    pwd->h[3]  = msa_subsus_u_df(DF_HALF, pws->h[3],  pwt->h[3]);
3930    pwd->h[4]  = msa_subsus_u_df(DF_HALF, pws->h[4],  pwt->h[4]);
3931    pwd->h[5]  = msa_subsus_u_df(DF_HALF, pws->h[5],  pwt->h[5]);
3932    pwd->h[6]  = msa_subsus_u_df(DF_HALF, pws->h[6],  pwt->h[6]);
3933    pwd->h[7]  = msa_subsus_u_df(DF_HALF, pws->h[7],  pwt->h[7]);
3934}
3935
3936void helper_msa_subsus_u_w(CPUMIPSState *env,
3937                           uint32_t wd, uint32_t ws, uint32_t wt)
3938{
3939    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3940    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3941    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3942
3943    pwd->w[0]  = msa_subsus_u_df(DF_WORD, pws->w[0],  pwt->w[0]);
3944    pwd->w[1]  = msa_subsus_u_df(DF_WORD, pws->w[1],  pwt->w[1]);
3945    pwd->w[2]  = msa_subsus_u_df(DF_WORD, pws->w[2],  pwt->w[2]);
3946    pwd->w[3]  = msa_subsus_u_df(DF_WORD, pws->w[3],  pwt->w[3]);
3947}
3948
3949void helper_msa_subsus_u_d(CPUMIPSState *env,
3950                           uint32_t wd, uint32_t ws, uint32_t wt)
3951{
3952    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3953    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3954    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3955
3956    pwd->d[0]  = msa_subsus_u_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
3957    pwd->d[1]  = msa_subsus_u_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
3958}
3959
3960
3961static inline int64_t msa_subsuu_s_df(uint32_t df, int64_t arg1, int64_t arg2)
3962{
3963    uint64_t u_arg1 = UNSIGNED(arg1, df);
3964    uint64_t u_arg2 = UNSIGNED(arg2, df);
3965    int64_t max_int = DF_MAX_INT(df);
3966    int64_t min_int = DF_MIN_INT(df);
3967    if (u_arg1 > u_arg2) {
3968        return u_arg1 - u_arg2 < (uint64_t)max_int ?
3969            (int64_t)(u_arg1 - u_arg2) :
3970            max_int;
3971    } else {
3972        return u_arg2 - u_arg1 < (uint64_t)(-min_int) ?
3973            (int64_t)(u_arg1 - u_arg2) :
3974            min_int;
3975    }
3976}
3977
3978void helper_msa_subsuu_s_b(CPUMIPSState *env,
3979                           uint32_t wd, uint32_t ws, uint32_t wt)
3980{
3981    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
3982    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
3983    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
3984
3985    pwd->b[0]  = msa_subsuu_s_df(DF_BYTE, pws->b[0],  pwt->b[0]);
3986    pwd->b[1]  = msa_subsuu_s_df(DF_BYTE, pws->b[1],  pwt->b[1]);
3987    pwd->b[2]  = msa_subsuu_s_df(DF_BYTE, pws->b[2],  pwt->b[2]);
3988    pwd->b[3]  = msa_subsuu_s_df(DF_BYTE, pws->b[3],  pwt->b[3]);
3989    pwd->b[4]  = msa_subsuu_s_df(DF_BYTE, pws->b[4],  pwt->b[4]);
3990    pwd->b[5]  = msa_subsuu_s_df(DF_BYTE, pws->b[5],  pwt->b[5]);
3991    pwd->b[6]  = msa_subsuu_s_df(DF_BYTE, pws->b[6],  pwt->b[6]);
3992    pwd->b[7]  = msa_subsuu_s_df(DF_BYTE, pws->b[7],  pwt->b[7]);
3993    pwd->b[8]  = msa_subsuu_s_df(DF_BYTE, pws->b[8],  pwt->b[8]);
3994    pwd->b[9]  = msa_subsuu_s_df(DF_BYTE, pws->b[9],  pwt->b[9]);
3995    pwd->b[10] = msa_subsuu_s_df(DF_BYTE, pws->b[10], pwt->b[10]);
3996    pwd->b[11] = msa_subsuu_s_df(DF_BYTE, pws->b[11], pwt->b[11]);
3997    pwd->b[12] = msa_subsuu_s_df(DF_BYTE, pws->b[12], pwt->b[12]);
3998    pwd->b[13] = msa_subsuu_s_df(DF_BYTE, pws->b[13], pwt->b[13]);
3999    pwd->b[14] = msa_subsuu_s_df(DF_BYTE, pws->b[14], pwt->b[14]);
4000    pwd->b[15] = msa_subsuu_s_df(DF_BYTE, pws->b[15], pwt->b[15]);
4001}
4002
4003void helper_msa_subsuu_s_h(CPUMIPSState *env,
4004                           uint32_t wd, uint32_t ws, uint32_t wt)
4005{
4006    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4007    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4008    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4009
4010    pwd->h[0]  = msa_subsuu_s_df(DF_HALF, pws->h[0],  pwt->h[0]);
4011    pwd->h[1]  = msa_subsuu_s_df(DF_HALF, pws->h[1],  pwt->h[1]);
4012    pwd->h[2]  = msa_subsuu_s_df(DF_HALF, pws->h[2],  pwt->h[2]);
4013    pwd->h[3]  = msa_subsuu_s_df(DF_HALF, pws->h[3],  pwt->h[3]);
4014    pwd->h[4]  = msa_subsuu_s_df(DF_HALF, pws->h[4],  pwt->h[4]);
4015    pwd->h[5]  = msa_subsuu_s_df(DF_HALF, pws->h[5],  pwt->h[5]);
4016    pwd->h[6]  = msa_subsuu_s_df(DF_HALF, pws->h[6],  pwt->h[6]);
4017    pwd->h[7]  = msa_subsuu_s_df(DF_HALF, pws->h[7],  pwt->h[7]);
4018}
4019
4020void helper_msa_subsuu_s_w(CPUMIPSState *env,
4021                           uint32_t wd, uint32_t ws, uint32_t wt)
4022{
4023    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4024    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4025    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4026
4027    pwd->w[0]  = msa_subsuu_s_df(DF_WORD, pws->w[0],  pwt->w[0]);
4028    pwd->w[1]  = msa_subsuu_s_df(DF_WORD, pws->w[1],  pwt->w[1]);
4029    pwd->w[2]  = msa_subsuu_s_df(DF_WORD, pws->w[2],  pwt->w[2]);
4030    pwd->w[3]  = msa_subsuu_s_df(DF_WORD, pws->w[3],  pwt->w[3]);
4031}
4032
4033void helper_msa_subsuu_s_d(CPUMIPSState *env,
4034                           uint32_t wd, uint32_t ws, uint32_t wt)
4035{
4036    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4037    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4038    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4039
4040    pwd->d[0]  = msa_subsuu_s_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
4041    pwd->d[1]  = msa_subsuu_s_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
4042}
4043
4044
4045static inline int64_t msa_subv_df(uint32_t df, int64_t arg1, int64_t arg2)
4046{
4047    return arg1 - arg2;
4048}
4049
4050void helper_msa_subv_b(CPUMIPSState *env,
4051                       uint32_t wd, uint32_t ws, uint32_t wt)
4052{
4053    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4054    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4055    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4056
4057    pwd->b[0]  = msa_subv_df(DF_BYTE, pws->b[0],  pwt->b[0]);
4058    pwd->b[1]  = msa_subv_df(DF_BYTE, pws->b[1],  pwt->b[1]);
4059    pwd->b[2]  = msa_subv_df(DF_BYTE, pws->b[2],  pwt->b[2]);
4060    pwd->b[3]  = msa_subv_df(DF_BYTE, pws->b[3],  pwt->b[3]);
4061    pwd->b[4]  = msa_subv_df(DF_BYTE, pws->b[4],  pwt->b[4]);
4062    pwd->b[5]  = msa_subv_df(DF_BYTE, pws->b[5],  pwt->b[5]);
4063    pwd->b[6]  = msa_subv_df(DF_BYTE, pws->b[6],  pwt->b[6]);
4064    pwd->b[7]  = msa_subv_df(DF_BYTE, pws->b[7],  pwt->b[7]);
4065    pwd->b[8]  = msa_subv_df(DF_BYTE, pws->b[8],  pwt->b[8]);
4066    pwd->b[9]  = msa_subv_df(DF_BYTE, pws->b[9],  pwt->b[9]);
4067    pwd->b[10] = msa_subv_df(DF_BYTE, pws->b[10], pwt->b[10]);
4068    pwd->b[11] = msa_subv_df(DF_BYTE, pws->b[11], pwt->b[11]);
4069    pwd->b[12] = msa_subv_df(DF_BYTE, pws->b[12], pwt->b[12]);
4070    pwd->b[13] = msa_subv_df(DF_BYTE, pws->b[13], pwt->b[13]);
4071    pwd->b[14] = msa_subv_df(DF_BYTE, pws->b[14], pwt->b[14]);
4072    pwd->b[15] = msa_subv_df(DF_BYTE, pws->b[15], pwt->b[15]);
4073}
4074
4075void helper_msa_subv_h(CPUMIPSState *env,
4076                       uint32_t wd, uint32_t ws, uint32_t wt)
4077{
4078    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4079    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4080    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4081
4082    pwd->h[0]  = msa_subv_df(DF_HALF, pws->h[0],  pwt->h[0]);
4083    pwd->h[1]  = msa_subv_df(DF_HALF, pws->h[1],  pwt->h[1]);
4084    pwd->h[2]  = msa_subv_df(DF_HALF, pws->h[2],  pwt->h[2]);
4085    pwd->h[3]  = msa_subv_df(DF_HALF, pws->h[3],  pwt->h[3]);
4086    pwd->h[4]  = msa_subv_df(DF_HALF, pws->h[4],  pwt->h[4]);
4087    pwd->h[5]  = msa_subv_df(DF_HALF, pws->h[5],  pwt->h[5]);
4088    pwd->h[6]  = msa_subv_df(DF_HALF, pws->h[6],  pwt->h[6]);
4089    pwd->h[7]  = msa_subv_df(DF_HALF, pws->h[7],  pwt->h[7]);
4090}
4091
4092void helper_msa_subv_w(CPUMIPSState *env,
4093                       uint32_t wd, uint32_t ws, uint32_t wt)
4094{
4095    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4096    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4097    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4098
4099    pwd->w[0]  = msa_subv_df(DF_WORD, pws->w[0],  pwt->w[0]);
4100    pwd->w[1]  = msa_subv_df(DF_WORD, pws->w[1],  pwt->w[1]);
4101    pwd->w[2]  = msa_subv_df(DF_WORD, pws->w[2],  pwt->w[2]);
4102    pwd->w[3]  = msa_subv_df(DF_WORD, pws->w[3],  pwt->w[3]);
4103}
4104
4105void helper_msa_subv_d(CPUMIPSState *env,
4106                       uint32_t wd, uint32_t ws, uint32_t wt)
4107{
4108    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4109    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4110    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4111
4112    pwd->d[0]  = msa_subv_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
4113    pwd->d[1]  = msa_subv_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
4114}
4115
4116
4117/*
4118 * Interleave
4119 * ----------
4120 *
4121 * +---------------+----------------------------------------------------------+
4122 * | ILVEV.B       | Vector Interleave Even (byte)                            |
4123 * | ILVEV.H       | Vector Interleave Even (halfword)                        |
4124 * | ILVEV.W       | Vector Interleave Even (word)                            |
4125 * | ILVEV.D       | Vector Interleave Even (doubleword)                      |
4126 * | ILVOD.B       | Vector Interleave Odd (byte)                             |
4127 * | ILVOD.H       | Vector Interleave Odd (halfword)                         |
4128 * | ILVOD.W       | Vector Interleave Odd (word)                             |
4129 * | ILVOD.D       | Vector Interleave Odd (doubleword)                       |
4130 * | ILVL.B        | Vector Interleave Left (byte)                            |
4131 * | ILVL.H        | Vector Interleave Left (halfword)                        |
4132 * | ILVL.W        | Vector Interleave Left (word)                            |
4133 * | ILVL.D        | Vector Interleave Left (doubleword)                      |
4134 * | ILVR.B        | Vector Interleave Right (byte)                           |
4135 * | ILVR.H        | Vector Interleave Right (halfword)                       |
4136 * | ILVR.W        | Vector Interleave Right (word)                           |
4137 * | ILVR.D        | Vector Interleave Right (doubleword)                     |
4138 * +---------------+----------------------------------------------------------+
4139 */
4140
4141
4142void helper_msa_ilvev_b(CPUMIPSState *env,
4143                        uint32_t wd, uint32_t ws, uint32_t wt)
4144{
4145    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4146    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4147    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4148
4149#if defined(HOST_WORDS_BIGENDIAN)
4150    pwd->b[8]  = pws->b[9];
4151    pwd->b[9]  = pwt->b[9];
4152    pwd->b[10] = pws->b[11];
4153    pwd->b[11] = pwt->b[11];
4154    pwd->b[12] = pws->b[13];
4155    pwd->b[13] = pwt->b[13];
4156    pwd->b[14] = pws->b[15];
4157    pwd->b[15] = pwt->b[15];
4158    pwd->b[0]  = pws->b[1];
4159    pwd->b[1]  = pwt->b[1];
4160    pwd->b[2]  = pws->b[3];
4161    pwd->b[3]  = pwt->b[3];
4162    pwd->b[4]  = pws->b[5];
4163    pwd->b[5]  = pwt->b[5];
4164    pwd->b[6]  = pws->b[7];
4165    pwd->b[7]  = pwt->b[7];
4166#else
4167    pwd->b[15] = pws->b[14];
4168    pwd->b[14] = pwt->b[14];
4169    pwd->b[13] = pws->b[12];
4170    pwd->b[12] = pwt->b[12];
4171    pwd->b[11] = pws->b[10];
4172    pwd->b[10] = pwt->b[10];
4173    pwd->b[9]  = pws->b[8];
4174    pwd->b[8]  = pwt->b[8];
4175    pwd->b[7]  = pws->b[6];
4176    pwd->b[6]  = pwt->b[6];
4177    pwd->b[5]  = pws->b[4];
4178    pwd->b[4]  = pwt->b[4];
4179    pwd->b[3]  = pws->b[2];
4180    pwd->b[2]  = pwt->b[2];
4181    pwd->b[1]  = pws->b[0];
4182    pwd->b[0]  = pwt->b[0];
4183#endif
4184}
4185
4186void helper_msa_ilvev_h(CPUMIPSState *env,
4187                        uint32_t wd, uint32_t ws, uint32_t wt)
4188{
4189    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4190    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4191    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4192
4193#if defined(HOST_WORDS_BIGENDIAN)
4194    pwd->h[4] = pws->h[5];
4195    pwd->h[5] = pwt->h[5];
4196    pwd->h[6] = pws->h[7];
4197    pwd->h[7] = pwt->h[7];
4198    pwd->h[0] = pws->h[1];
4199    pwd->h[1] = pwt->h[1];
4200    pwd->h[2] = pws->h[3];
4201    pwd->h[3] = pwt->h[3];
4202#else
4203    pwd->h[7] = pws->h[6];
4204    pwd->h[6] = pwt->h[6];
4205    pwd->h[5] = pws->h[4];
4206    pwd->h[4] = pwt->h[4];
4207    pwd->h[3] = pws->h[2];
4208    pwd->h[2] = pwt->h[2];
4209    pwd->h[1] = pws->h[0];
4210    pwd->h[0] = pwt->h[0];
4211#endif
4212}
4213
4214void helper_msa_ilvev_w(CPUMIPSState *env,
4215                        uint32_t wd, uint32_t ws, uint32_t wt)
4216{
4217    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4218    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4219    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4220
4221#if defined(HOST_WORDS_BIGENDIAN)
4222    pwd->w[2] = pws->w[3];
4223    pwd->w[3] = pwt->w[3];
4224    pwd->w[0] = pws->w[1];
4225    pwd->w[1] = pwt->w[1];
4226#else
4227    pwd->w[3] = pws->w[2];
4228    pwd->w[2] = pwt->w[2];
4229    pwd->w[1] = pws->w[0];
4230    pwd->w[0] = pwt->w[0];
4231#endif
4232}
4233
4234void helper_msa_ilvev_d(CPUMIPSState *env,
4235                        uint32_t wd, uint32_t ws, uint32_t wt)
4236{
4237    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4238    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4239    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4240
4241    pwd->d[1] = pws->d[0];
4242    pwd->d[0] = pwt->d[0];
4243}
4244
4245
4246void helper_msa_ilvod_b(CPUMIPSState *env,
4247                        uint32_t wd, uint32_t ws, uint32_t wt)
4248{
4249    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4250    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4251    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4252
4253#if defined(HOST_WORDS_BIGENDIAN)
4254    pwd->b[7]  = pwt->b[6];
4255    pwd->b[6]  = pws->b[6];
4256    pwd->b[5]  = pwt->b[4];
4257    pwd->b[4]  = pws->b[4];
4258    pwd->b[3]  = pwt->b[2];
4259    pwd->b[2]  = pws->b[2];
4260    pwd->b[1]  = pwt->b[0];
4261    pwd->b[0]  = pws->b[0];
4262    pwd->b[15] = pwt->b[14];
4263    pwd->b[14] = pws->b[14];
4264    pwd->b[13] = pwt->b[12];
4265    pwd->b[12] = pws->b[12];
4266    pwd->b[11] = pwt->b[10];
4267    pwd->b[10] = pws->b[10];
4268    pwd->b[9]  = pwt->b[8];
4269    pwd->b[8]  = pws->b[8];
4270#else
4271    pwd->b[0]  = pwt->b[1];
4272    pwd->b[1]  = pws->b[1];
4273    pwd->b[2]  = pwt->b[3];
4274    pwd->b[3]  = pws->b[3];
4275    pwd->b[4]  = pwt->b[5];
4276    pwd->b[5]  = pws->b[5];
4277    pwd->b[6]  = pwt->b[7];
4278    pwd->b[7]  = pws->b[7];
4279    pwd->b[8]  = pwt->b[9];
4280    pwd->b[9]  = pws->b[9];
4281    pwd->b[10] = pwt->b[11];
4282    pwd->b[11] = pws->b[11];
4283    pwd->b[12] = pwt->b[13];
4284    pwd->b[13] = pws->b[13];
4285    pwd->b[14] = pwt->b[15];
4286    pwd->b[15] = pws->b[15];
4287#endif
4288}
4289
4290void helper_msa_ilvod_h(CPUMIPSState *env,
4291                        uint32_t wd, uint32_t ws, uint32_t wt)
4292{
4293    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4294    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4295    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4296
4297#if defined(HOST_WORDS_BIGENDIAN)
4298    pwd->h[3] = pwt->h[2];
4299    pwd->h[2] = pws->h[2];
4300    pwd->h[1] = pwt->h[0];
4301    pwd->h[0] = pws->h[0];
4302    pwd->h[7] = pwt->h[6];
4303    pwd->h[6] = pws->h[6];
4304    pwd->h[5] = pwt->h[4];
4305    pwd->h[4] = pws->h[4];
4306#else
4307    pwd->h[0] = pwt->h[1];
4308    pwd->h[1] = pws->h[1];
4309    pwd->h[2] = pwt->h[3];
4310    pwd->h[3] = pws->h[3];
4311    pwd->h[4] = pwt->h[5];
4312    pwd->h[5] = pws->h[5];
4313    pwd->h[6] = pwt->h[7];
4314    pwd->h[7] = pws->h[7];
4315#endif
4316}
4317
4318void helper_msa_ilvod_w(CPUMIPSState *env,
4319                        uint32_t wd, uint32_t ws, uint32_t wt)
4320{
4321    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4322    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4323    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4324
4325#if defined(HOST_WORDS_BIGENDIAN)
4326    pwd->w[1] = pwt->w[0];
4327    pwd->w[0] = pws->w[0];
4328    pwd->w[3] = pwt->w[2];
4329    pwd->w[2] = pws->w[2];
4330#else
4331    pwd->w[0] = pwt->w[1];
4332    pwd->w[1] = pws->w[1];
4333    pwd->w[2] = pwt->w[3];
4334    pwd->w[3] = pws->w[3];
4335#endif
4336}
4337
4338void helper_msa_ilvod_d(CPUMIPSState *env,
4339                        uint32_t wd, uint32_t ws, uint32_t wt)
4340{
4341    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4342    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4343    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4344
4345    pwd->d[0] = pwt->d[1];
4346    pwd->d[1] = pws->d[1];
4347}
4348
4349
4350void helper_msa_ilvl_b(CPUMIPSState *env,
4351                       uint32_t wd, uint32_t ws, uint32_t wt)
4352{
4353    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4354    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4355    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4356
4357#if defined(HOST_WORDS_BIGENDIAN)
4358    pwd->b[7]  = pwt->b[15];
4359    pwd->b[6]  = pws->b[15];
4360    pwd->b[5]  = pwt->b[14];
4361    pwd->b[4]  = pws->b[14];
4362    pwd->b[3]  = pwt->b[13];
4363    pwd->b[2]  = pws->b[13];
4364    pwd->b[1]  = pwt->b[12];
4365    pwd->b[0]  = pws->b[12];
4366    pwd->b[15] = pwt->b[11];
4367    pwd->b[14] = pws->b[11];
4368    pwd->b[13] = pwt->b[10];
4369    pwd->b[12] = pws->b[10];
4370    pwd->b[11] = pwt->b[9];
4371    pwd->b[10] = pws->b[9];
4372    pwd->b[9]  = pwt->b[8];
4373    pwd->b[8]  = pws->b[8];
4374#else
4375    pwd->b[0]  = pwt->b[8];
4376    pwd->b[1]  = pws->b[8];
4377    pwd->b[2]  = pwt->b[9];
4378    pwd->b[3]  = pws->b[9];
4379    pwd->b[4]  = pwt->b[10];
4380    pwd->b[5]  = pws->b[10];
4381    pwd->b[6]  = pwt->b[11];
4382    pwd->b[7]  = pws->b[11];
4383    pwd->b[8]  = pwt->b[12];
4384    pwd->b[9]  = pws->b[12];
4385    pwd->b[10] = pwt->b[13];
4386    pwd->b[11] = pws->b[13];
4387    pwd->b[12] = pwt->b[14];
4388    pwd->b[13] = pws->b[14];
4389    pwd->b[14] = pwt->b[15];
4390    pwd->b[15] = pws->b[15];
4391#endif
4392}
4393
4394void helper_msa_ilvl_h(CPUMIPSState *env,
4395                       uint32_t wd, uint32_t ws, uint32_t wt)
4396{
4397    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4398    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4399    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4400
4401#if defined(HOST_WORDS_BIGENDIAN)
4402    pwd->h[3] = pwt->h[7];
4403    pwd->h[2] = pws->h[7];
4404    pwd->h[1] = pwt->h[6];
4405    pwd->h[0] = pws->h[6];
4406    pwd->h[7] = pwt->h[5];
4407    pwd->h[6] = pws->h[5];
4408    pwd->h[5] = pwt->h[4];
4409    pwd->h[4] = pws->h[4];
4410#else
4411    pwd->h[0] = pwt->h[4];
4412    pwd->h[1] = pws->h[4];
4413    pwd->h[2] = pwt->h[5];
4414    pwd->h[3] = pws->h[5];
4415    pwd->h[4] = pwt->h[6];
4416    pwd->h[5] = pws->h[6];
4417    pwd->h[6] = pwt->h[7];
4418    pwd->h[7] = pws->h[7];
4419#endif
4420}
4421
4422void helper_msa_ilvl_w(CPUMIPSState *env,
4423                       uint32_t wd, uint32_t ws, uint32_t wt)
4424{
4425    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4426    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4427    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4428
4429#if defined(HOST_WORDS_BIGENDIAN)
4430    pwd->w[1] = pwt->w[3];
4431    pwd->w[0] = pws->w[3];
4432    pwd->w[3] = pwt->w[2];
4433    pwd->w[2] = pws->w[2];
4434#else
4435    pwd->w[0] = pwt->w[2];
4436    pwd->w[1] = pws->w[2];
4437    pwd->w[2] = pwt->w[3];
4438    pwd->w[3] = pws->w[3];
4439#endif
4440}
4441
4442void helper_msa_ilvl_d(CPUMIPSState *env,
4443                       uint32_t wd, uint32_t ws, uint32_t wt)
4444{
4445    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4446    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4447    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4448
4449    pwd->d[0] = pwt->d[1];
4450    pwd->d[1] = pws->d[1];
4451}
4452
4453
4454void helper_msa_ilvr_b(CPUMIPSState *env,
4455                       uint32_t wd, uint32_t ws, uint32_t wt)
4456{
4457    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4458    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4459    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4460
4461#if defined(HOST_WORDS_BIGENDIAN)
4462    pwd->b[8]  = pws->b[0];
4463    pwd->b[9]  = pwt->b[0];
4464    pwd->b[10] = pws->b[1];
4465    pwd->b[11] = pwt->b[1];
4466    pwd->b[12] = pws->b[2];
4467    pwd->b[13] = pwt->b[2];
4468    pwd->b[14] = pws->b[3];
4469    pwd->b[15] = pwt->b[3];
4470    pwd->b[0]  = pws->b[4];
4471    pwd->b[1]  = pwt->b[4];
4472    pwd->b[2]  = pws->b[5];
4473    pwd->b[3]  = pwt->b[5];
4474    pwd->b[4]  = pws->b[6];
4475    pwd->b[5]  = pwt->b[6];
4476    pwd->b[6]  = pws->b[7];
4477    pwd->b[7]  = pwt->b[7];
4478#else
4479    pwd->b[15] = pws->b[7];
4480    pwd->b[14] = pwt->b[7];
4481    pwd->b[13] = pws->b[6];
4482    pwd->b[12] = pwt->b[6];
4483    pwd->b[11] = pws->b[5];
4484    pwd->b[10] = pwt->b[5];
4485    pwd->b[9]  = pws->b[4];
4486    pwd->b[8]  = pwt->b[4];
4487    pwd->b[7]  = pws->b[3];
4488    pwd->b[6]  = pwt->b[3];
4489    pwd->b[5]  = pws->b[2];
4490    pwd->b[4]  = pwt->b[2];
4491    pwd->b[3]  = pws->b[1];
4492    pwd->b[2]  = pwt->b[1];
4493    pwd->b[1]  = pws->b[0];
4494    pwd->b[0]  = pwt->b[0];
4495#endif
4496}
4497
4498void helper_msa_ilvr_h(CPUMIPSState *env,
4499                       uint32_t wd, uint32_t ws, uint32_t wt)
4500{
4501    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4502    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4503    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4504
4505#if defined(HOST_WORDS_BIGENDIAN)
4506    pwd->h[4] = pws->h[0];
4507    pwd->h[5] = pwt->h[0];
4508    pwd->h[6] = pws->h[1];
4509    pwd->h[7] = pwt->h[1];
4510    pwd->h[0] = pws->h[2];
4511    pwd->h[1] = pwt->h[2];
4512    pwd->h[2] = pws->h[3];
4513    pwd->h[3] = pwt->h[3];
4514#else
4515    pwd->h[7] = pws->h[3];
4516    pwd->h[6] = pwt->h[3];
4517    pwd->h[5] = pws->h[2];
4518    pwd->h[4] = pwt->h[2];
4519    pwd->h[3] = pws->h[1];
4520    pwd->h[2] = pwt->h[1];
4521    pwd->h[1] = pws->h[0];
4522    pwd->h[0] = pwt->h[0];
4523#endif
4524}
4525
4526void helper_msa_ilvr_w(CPUMIPSState *env,
4527                       uint32_t wd, uint32_t ws, uint32_t wt)
4528{
4529    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4530    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4531    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4532
4533#if defined(HOST_WORDS_BIGENDIAN)
4534    pwd->w[2] = pws->w[0];
4535    pwd->w[3] = pwt->w[0];
4536    pwd->w[0] = pws->w[1];
4537    pwd->w[1] = pwt->w[1];
4538#else
4539    pwd->w[3] = pws->w[1];
4540    pwd->w[2] = pwt->w[1];
4541    pwd->w[1] = pws->w[0];
4542    pwd->w[0] = pwt->w[0];
4543#endif
4544}
4545
4546void helper_msa_ilvr_d(CPUMIPSState *env,
4547                       uint32_t wd, uint32_t ws, uint32_t wt)
4548{
4549    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4550    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4551    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4552
4553    pwd->d[1] = pws->d[0];
4554    pwd->d[0] = pwt->d[0];
4555}
4556
4557
4558/*
4559 * Logic
4560 * -----
4561 *
4562 * +---------------+----------------------------------------------------------+
4563 * | AND.V         | Vector Logical And                                       |
4564 * | NOR.V         | Vector Logical Negated Or                                |
4565 * | OR.V          | Vector Logical Or                                        |
4566 * | XOR.V         | Vector Logical Exclusive Or                              |
4567 * +---------------+----------------------------------------------------------+
4568 */
4569
4570
4571void helper_msa_and_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
4572{
4573    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4574    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4575    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4576
4577    pwd->d[0] = pws->d[0] & pwt->d[0];
4578    pwd->d[1] = pws->d[1] & pwt->d[1];
4579}
4580
4581void helper_msa_nor_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
4582{
4583    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4584    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4585    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4586
4587    pwd->d[0] = ~(pws->d[0] | pwt->d[0]);
4588    pwd->d[1] = ~(pws->d[1] | pwt->d[1]);
4589}
4590
4591void helper_msa_or_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
4592{
4593    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4594    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4595    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4596
4597    pwd->d[0] = pws->d[0] | pwt->d[0];
4598    pwd->d[1] = pws->d[1] | pwt->d[1];
4599}
4600
4601void helper_msa_xor_v(CPUMIPSState *env, uint32_t wd, uint32_t ws, uint32_t wt)
4602{
4603    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4604    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4605    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4606
4607    pwd->d[0] = pws->d[0] ^ pwt->d[0];
4608    pwd->d[1] = pws->d[1] ^ pwt->d[1];
4609}
4610
4611
4612/*
4613 * Move
4614 * ----
4615 *
4616 * +---------------+----------------------------------------------------------+
4617 * | MOVE.V        | Vector Move                                              |
4618 * +---------------+----------------------------------------------------------+
4619 */
4620
4621static inline void msa_move_v(wr_t *pwd, wr_t *pws)
4622{
4623    pwd->d[0] = pws->d[0];
4624    pwd->d[1] = pws->d[1];
4625}
4626
4627void helper_msa_move_v(CPUMIPSState *env, uint32_t wd, uint32_t ws)
4628{
4629    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4630    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4631
4632    msa_move_v(pwd, pws);
4633}
4634
4635
4636/*
4637 * Pack
4638 * ----
4639 *
4640 * +---------------+----------------------------------------------------------+
4641 * | PCKEV.B       | Vector Pack Even (byte)                                  |
4642 * | PCKEV.H       | Vector Pack Even (halfword)                              |
4643 * | PCKEV.W       | Vector Pack Even (word)                                  |
4644 * | PCKEV.D       | Vector Pack Even (doubleword)                            |
4645 * | PCKOD.B       | Vector Pack Odd (byte)                                   |
4646 * | PCKOD.H       | Vector Pack Odd (halfword)                               |
4647 * | PCKOD.W       | Vector Pack Odd (word)                                   |
4648 * | PCKOD.D       | Vector Pack Odd (doubleword)                             |
4649 * | VSHF.B        | Vector Data Preserving Shuffle (byte)                    |
4650 * | VSHF.H        | Vector Data Preserving Shuffle (halfword)                |
4651 * | VSHF.W        | Vector Data Preserving Shuffle (word)                    |
4652 * | VSHF.D        | Vector Data Preserving Shuffle (doubleword)              |
4653 * +---------------+----------------------------------------------------------+
4654 */
4655
4656
4657void helper_msa_pckev_b(CPUMIPSState *env,
4658                        uint32_t wd, uint32_t ws, uint32_t wt)
4659{
4660    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4661    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4662    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4663
4664#if defined(HOST_WORDS_BIGENDIAN)
4665    pwd->b[8]  = pws->b[9];
4666    pwd->b[10] = pws->b[13];
4667    pwd->b[12] = pws->b[1];
4668    pwd->b[14] = pws->b[5];
4669    pwd->b[0]  = pwt->b[9];
4670    pwd->b[2]  = pwt->b[13];
4671    pwd->b[4]  = pwt->b[1];
4672    pwd->b[6]  = pwt->b[5];
4673    pwd->b[9]  = pws->b[11];
4674    pwd->b[13] = pws->b[3];
4675    pwd->b[1]  = pwt->b[11];
4676    pwd->b[5]  = pwt->b[3];
4677    pwd->b[11] = pws->b[15];
4678    pwd->b[3]  = pwt->b[15];
4679    pwd->b[15] = pws->b[7];
4680    pwd->b[7]  = pwt->b[7];
4681#else
4682    pwd->b[15] = pws->b[14];
4683    pwd->b[13] = pws->b[10];
4684    pwd->b[11] = pws->b[6];
4685    pwd->b[9]  = pws->b[2];
4686    pwd->b[7]  = pwt->b[14];
4687    pwd->b[5]  = pwt->b[10];
4688    pwd->b[3]  = pwt->b[6];
4689    pwd->b[1]  = pwt->b[2];
4690    pwd->b[14] = pws->b[12];
4691    pwd->b[10] = pws->b[4];
4692    pwd->b[6]  = pwt->b[12];
4693    pwd->b[2]  = pwt->b[4];
4694    pwd->b[12] = pws->b[8];
4695    pwd->b[4]  = pwt->b[8];
4696    pwd->b[8]  = pws->b[0];
4697    pwd->b[0]  = pwt->b[0];
4698#endif
4699}
4700
4701void helper_msa_pckev_h(CPUMIPSState *env,
4702                        uint32_t wd, uint32_t ws, uint32_t wt)
4703{
4704    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4705    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4706    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4707
4708#if defined(HOST_WORDS_BIGENDIAN)
4709    pwd->h[4] = pws->h[5];
4710    pwd->h[6] = pws->h[1];
4711    pwd->h[0] = pwt->h[5];
4712    pwd->h[2] = pwt->h[1];
4713    pwd->h[5] = pws->h[7];
4714    pwd->h[1] = pwt->h[7];
4715    pwd->h[7] = pws->h[3];
4716    pwd->h[3] = pwt->h[3];
4717#else
4718    pwd->h[7] = pws->h[6];
4719    pwd->h[5] = pws->h[2];
4720    pwd->h[3] = pwt->h[6];
4721    pwd->h[1] = pwt->h[2];
4722    pwd->h[6] = pws->h[4];
4723    pwd->h[2] = pwt->h[4];
4724    pwd->h[4] = pws->h[0];
4725    pwd->h[0] = pwt->h[0];
4726#endif
4727}
4728
4729void helper_msa_pckev_w(CPUMIPSState *env,
4730                        uint32_t wd, uint32_t ws, uint32_t wt)
4731{
4732    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4733    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4734    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4735
4736#if defined(HOST_WORDS_BIGENDIAN)
4737    pwd->w[2] = pws->w[3];
4738    pwd->w[0] = pwt->w[3];
4739    pwd->w[3] = pws->w[1];
4740    pwd->w[1] = pwt->w[1];
4741#else
4742    pwd->w[3] = pws->w[2];
4743    pwd->w[1] = pwt->w[2];
4744    pwd->w[2] = pws->w[0];
4745    pwd->w[0] = pwt->w[0];
4746#endif
4747}
4748
4749void helper_msa_pckev_d(CPUMIPSState *env,
4750                        uint32_t wd, uint32_t ws, uint32_t wt)
4751{
4752    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4753    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4754    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4755
4756    pwd->d[1] = pws->d[0];
4757    pwd->d[0] = pwt->d[0];
4758}
4759
4760
4761void helper_msa_pckod_b(CPUMIPSState *env,
4762                        uint32_t wd, uint32_t ws, uint32_t wt)
4763{
4764    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4765    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4766    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4767
4768#if defined(HOST_WORDS_BIGENDIAN)
4769    pwd->b[7]  = pwt->b[6];
4770    pwd->b[5]  = pwt->b[2];
4771    pwd->b[3]  = pwt->b[14];
4772    pwd->b[1]  = pwt->b[10];
4773    pwd->b[15] = pws->b[6];
4774    pwd->b[13] = pws->b[2];
4775    pwd->b[11] = pws->b[14];
4776    pwd->b[9]  = pws->b[10];
4777    pwd->b[6]  = pwt->b[4];
4778    pwd->b[2]  = pwt->b[12];
4779    pwd->b[14] = pws->b[4];
4780    pwd->b[10] = pws->b[12];
4781    pwd->b[4]  = pwt->b[0];
4782    pwd->b[12] = pws->b[0];
4783    pwd->b[0]  = pwt->b[8];
4784    pwd->b[8]  = pws->b[8];
4785#else
4786    pwd->b[0]  = pwt->b[1];
4787    pwd->b[2]  = pwt->b[5];
4788    pwd->b[4]  = pwt->b[9];
4789    pwd->b[6]  = pwt->b[13];
4790    pwd->b[8]  = pws->b[1];
4791    pwd->b[10] = pws->b[5];
4792    pwd->b[12] = pws->b[9];
4793    pwd->b[14] = pws->b[13];
4794    pwd->b[1]  = pwt->b[3];
4795    pwd->b[5]  = pwt->b[11];
4796    pwd->b[9]  = pws->b[3];
4797    pwd->b[13] = pws->b[11];
4798    pwd->b[3]  = pwt->b[7];
4799    pwd->b[11] = pws->b[7];
4800    pwd->b[7]  = pwt->b[15];
4801    pwd->b[15] = pws->b[15];
4802#endif
4803
4804}
4805
4806void helper_msa_pckod_h(CPUMIPSState *env,
4807                        uint32_t wd, uint32_t ws, uint32_t wt)
4808{
4809    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4810    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4811    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4812
4813#if defined(HOST_WORDS_BIGENDIAN)
4814    pwd->h[3] = pwt->h[2];
4815    pwd->h[1] = pwt->h[6];
4816    pwd->h[7] = pws->h[2];
4817    pwd->h[5] = pws->h[6];
4818    pwd->h[2] = pwt->h[0];
4819    pwd->h[6] = pws->h[0];
4820    pwd->h[0] = pwt->h[4];
4821    pwd->h[4] = pws->h[4];
4822#else
4823    pwd->h[0] = pwt->h[1];
4824    pwd->h[2] = pwt->h[5];
4825    pwd->h[4] = pws->h[1];
4826    pwd->h[6] = pws->h[5];
4827    pwd->h[1] = pwt->h[3];
4828    pwd->h[5] = pws->h[3];
4829    pwd->h[3] = pwt->h[7];
4830    pwd->h[7] = pws->h[7];
4831#endif
4832}
4833
4834void helper_msa_pckod_w(CPUMIPSState *env,
4835                        uint32_t wd, uint32_t ws, uint32_t wt)
4836{
4837    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4838    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4839    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4840
4841#if defined(HOST_WORDS_BIGENDIAN)
4842    pwd->w[1] = pwt->w[0];
4843    pwd->w[3] = pws->w[0];
4844    pwd->w[0] = pwt->w[2];
4845    pwd->w[2] = pws->w[2];
4846#else
4847    pwd->w[0] = pwt->w[1];
4848    pwd->w[2] = pws->w[1];
4849    pwd->w[1] = pwt->w[3];
4850    pwd->w[3] = pws->w[3];
4851#endif
4852}
4853
4854void helper_msa_pckod_d(CPUMIPSState *env,
4855                        uint32_t wd, uint32_t ws, uint32_t wt)
4856{
4857    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4858    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4859    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4860
4861    pwd->d[0] = pwt->d[1];
4862    pwd->d[1] = pws->d[1];
4863}
4864
4865
4866/*
4867 * Shift
4868 * -----
4869 *
4870 * +---------------+----------------------------------------------------------+
4871 * | SLL.B         | Vector Shift Left (byte)                                 |
4872 * | SLL.H         | Vector Shift Left (halfword)                             |
4873 * | SLL.W         | Vector Shift Left (word)                                 |
4874 * | SLL.D         | Vector Shift Left (doubleword)                           |
4875 * | SRA.B         | Vector Shift Right Arithmetic (byte)                     |
4876 * | SRA.H         | Vector Shift Right Arithmetic (halfword)                 |
4877 * | SRA.W         | Vector Shift Right Arithmetic (word)                     |
4878 * | SRA.D         | Vector Shift Right Arithmetic (doubleword)               |
4879 * | SRAR.B        | Vector Shift Right Arithmetic Rounded (byte)             |
4880 * | SRAR.H        | Vector Shift Right Arithmetic Rounded (halfword)         |
4881 * | SRAR.W        | Vector Shift Right Arithmetic Rounded (word)             |
4882 * | SRAR.D        | Vector Shift Right Arithmetic Rounded (doubleword)       |
4883 * | SRL.B         | Vector Shift Right Logical (byte)                        |
4884 * | SRL.H         | Vector Shift Right Logical (halfword)                    |
4885 * | SRL.W         | Vector Shift Right Logical (word)                        |
4886 * | SRL.D         | Vector Shift Right Logical (doubleword)                  |
4887 * | SRLR.B        | Vector Shift Right Logical Rounded (byte)                |
4888 * | SRLR.H        | Vector Shift Right Logical Rounded (halfword)            |
4889 * | SRLR.W        | Vector Shift Right Logical Rounded (word)                |
4890 * | SRLR.D        | Vector Shift Right Logical Rounded (doubleword)          |
4891 * +---------------+----------------------------------------------------------+
4892 */
4893
4894
4895static inline int64_t msa_sll_df(uint32_t df, int64_t arg1, int64_t arg2)
4896{
4897    int32_t b_arg2 = BIT_POSITION(arg2, df);
4898    return arg1 << b_arg2;
4899}
4900
4901void helper_msa_sll_b(CPUMIPSState *env,
4902                      uint32_t wd, uint32_t ws, uint32_t wt)
4903{
4904    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4905    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4906    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4907
4908    pwd->b[0]  = msa_sll_df(DF_BYTE, pws->b[0],  pwt->b[0]);
4909    pwd->b[1]  = msa_sll_df(DF_BYTE, pws->b[1],  pwt->b[1]);
4910    pwd->b[2]  = msa_sll_df(DF_BYTE, pws->b[2],  pwt->b[2]);
4911    pwd->b[3]  = msa_sll_df(DF_BYTE, pws->b[3],  pwt->b[3]);
4912    pwd->b[4]  = msa_sll_df(DF_BYTE, pws->b[4],  pwt->b[4]);
4913    pwd->b[5]  = msa_sll_df(DF_BYTE, pws->b[5],  pwt->b[5]);
4914    pwd->b[6]  = msa_sll_df(DF_BYTE, pws->b[6],  pwt->b[6]);
4915    pwd->b[7]  = msa_sll_df(DF_BYTE, pws->b[7],  pwt->b[7]);
4916    pwd->b[8]  = msa_sll_df(DF_BYTE, pws->b[8],  pwt->b[8]);
4917    pwd->b[9]  = msa_sll_df(DF_BYTE, pws->b[9],  pwt->b[9]);
4918    pwd->b[10] = msa_sll_df(DF_BYTE, pws->b[10], pwt->b[10]);
4919    pwd->b[11] = msa_sll_df(DF_BYTE, pws->b[11], pwt->b[11]);
4920    pwd->b[12] = msa_sll_df(DF_BYTE, pws->b[12], pwt->b[12]);
4921    pwd->b[13] = msa_sll_df(DF_BYTE, pws->b[13], pwt->b[13]);
4922    pwd->b[14] = msa_sll_df(DF_BYTE, pws->b[14], pwt->b[14]);
4923    pwd->b[15] = msa_sll_df(DF_BYTE, pws->b[15], pwt->b[15]);
4924}
4925
4926void helper_msa_sll_h(CPUMIPSState *env,
4927                      uint32_t wd, uint32_t ws, uint32_t wt)
4928{
4929    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4930    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4931    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4932
4933    pwd->h[0]  = msa_sll_df(DF_HALF, pws->h[0],  pwt->h[0]);
4934    pwd->h[1]  = msa_sll_df(DF_HALF, pws->h[1],  pwt->h[1]);
4935    pwd->h[2]  = msa_sll_df(DF_HALF, pws->h[2],  pwt->h[2]);
4936    pwd->h[3]  = msa_sll_df(DF_HALF, pws->h[3],  pwt->h[3]);
4937    pwd->h[4]  = msa_sll_df(DF_HALF, pws->h[4],  pwt->h[4]);
4938    pwd->h[5]  = msa_sll_df(DF_HALF, pws->h[5],  pwt->h[5]);
4939    pwd->h[6]  = msa_sll_df(DF_HALF, pws->h[6],  pwt->h[6]);
4940    pwd->h[7]  = msa_sll_df(DF_HALF, pws->h[7],  pwt->h[7]);
4941}
4942
4943void helper_msa_sll_w(CPUMIPSState *env,
4944                      uint32_t wd, uint32_t ws, uint32_t wt)
4945{
4946    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4947    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4948    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4949
4950    pwd->w[0]  = msa_sll_df(DF_WORD, pws->w[0],  pwt->w[0]);
4951    pwd->w[1]  = msa_sll_df(DF_WORD, pws->w[1],  pwt->w[1]);
4952    pwd->w[2]  = msa_sll_df(DF_WORD, pws->w[2],  pwt->w[2]);
4953    pwd->w[3]  = msa_sll_df(DF_WORD, pws->w[3],  pwt->w[3]);
4954}
4955
4956void helper_msa_sll_d(CPUMIPSState *env,
4957                      uint32_t wd, uint32_t ws, uint32_t wt)
4958{
4959    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4960    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4961    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4962
4963    pwd->d[0]  = msa_sll_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
4964    pwd->d[1]  = msa_sll_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
4965}
4966
4967
4968static inline int64_t msa_sra_df(uint32_t df, int64_t arg1, int64_t arg2)
4969{
4970    int32_t b_arg2 = BIT_POSITION(arg2, df);
4971    return arg1 >> b_arg2;
4972}
4973
4974void helper_msa_sra_b(CPUMIPSState *env,
4975                      uint32_t wd, uint32_t ws, uint32_t wt)
4976{
4977    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
4978    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
4979    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
4980
4981    pwd->b[0]  = msa_sra_df(DF_BYTE, pws->b[0],  pwt->b[0]);
4982    pwd->b[1]  = msa_sra_df(DF_BYTE, pws->b[1],  pwt->b[1]);
4983    pwd->b[2]  = msa_sra_df(DF_BYTE, pws->b[2],  pwt->b[2]);
4984    pwd->b[3]  = msa_sra_df(DF_BYTE, pws->b[3],  pwt->b[3]);
4985    pwd->b[4]  = msa_sra_df(DF_BYTE, pws->b[4],  pwt->b[4]);
4986    pwd->b[5]  = msa_sra_df(DF_BYTE, pws->b[5],  pwt->b[5]);
4987    pwd->b[6]  = msa_sra_df(DF_BYTE, pws->b[6],  pwt->b[6]);
4988    pwd->b[7]  = msa_sra_df(DF_BYTE, pws->b[7],  pwt->b[7]);
4989    pwd->b[8]  = msa_sra_df(DF_BYTE, pws->b[8],  pwt->b[8]);
4990    pwd->b[9]  = msa_sra_df(DF_BYTE, pws->b[9],  pwt->b[9]);
4991    pwd->b[10] = msa_sra_df(DF_BYTE, pws->b[10], pwt->b[10]);
4992    pwd->b[11] = msa_sra_df(DF_BYTE, pws->b[11], pwt->b[11]);
4993    pwd->b[12] = msa_sra_df(DF_BYTE, pws->b[12], pwt->b[12]);
4994    pwd->b[13] = msa_sra_df(DF_BYTE, pws->b[13], pwt->b[13]);
4995    pwd->b[14] = msa_sra_df(DF_BYTE, pws->b[14], pwt->b[14]);
4996    pwd->b[15] = msa_sra_df(DF_BYTE, pws->b[15], pwt->b[15]);
4997}
4998
4999void helper_msa_sra_h(CPUMIPSState *env,
5000                      uint32_t wd, uint32_t ws, uint32_t wt)
5001{
5002    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5003    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5004    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5005
5006    pwd->h[0]  = msa_sra_df(DF_HALF, pws->h[0],  pwt->h[0]);
5007    pwd->h[1]  = msa_sra_df(DF_HALF, pws->h[1],  pwt->h[1]);
5008    pwd->h[2]  = msa_sra_df(DF_HALF, pws->h[2],  pwt->h[2]);
5009    pwd->h[3]  = msa_sra_df(DF_HALF, pws->h[3],  pwt->h[3]);
5010    pwd->h[4]  = msa_sra_df(DF_HALF, pws->h[4],  pwt->h[4]);
5011    pwd->h[5]  = msa_sra_df(DF_HALF, pws->h[5],  pwt->h[5]);
5012    pwd->h[6]  = msa_sra_df(DF_HALF, pws->h[6],  pwt->h[6]);
5013    pwd->h[7]  = msa_sra_df(DF_HALF, pws->h[7],  pwt->h[7]);
5014}
5015
5016void helper_msa_sra_w(CPUMIPSState *env,
5017                      uint32_t wd, uint32_t ws, uint32_t wt)
5018{
5019    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5020    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5021    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5022
5023    pwd->w[0]  = msa_sra_df(DF_WORD, pws->w[0],  pwt->w[0]);
5024    pwd->w[1]  = msa_sra_df(DF_WORD, pws->w[1],  pwt->w[1]);
5025    pwd->w[2]  = msa_sra_df(DF_WORD, pws->w[2],  pwt->w[2]);
5026    pwd->w[3]  = msa_sra_df(DF_WORD, pws->w[3],  pwt->w[3]);
5027}
5028
5029void helper_msa_sra_d(CPUMIPSState *env,
5030                      uint32_t wd, uint32_t ws, uint32_t wt)
5031{
5032    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5033    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5034    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5035
5036    pwd->d[0]  = msa_sra_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
5037    pwd->d[1]  = msa_sra_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
5038}
5039
5040
5041static inline int64_t msa_srar_df(uint32_t df, int64_t arg1, int64_t arg2)
5042{
5043    int32_t b_arg2 = BIT_POSITION(arg2, df);
5044    if (b_arg2 == 0) {
5045        return arg1;
5046    } else {
5047        int64_t r_bit = (arg1 >> (b_arg2 - 1)) & 1;
5048        return (arg1 >> b_arg2) + r_bit;
5049    }
5050}
5051
5052void helper_msa_srar_b(CPUMIPSState *env,
5053                       uint32_t wd, uint32_t ws, uint32_t wt)
5054{
5055    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5056    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5057    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5058
5059    pwd->b[0]  = msa_srar_df(DF_BYTE, pws->b[0],  pwt->b[0]);
5060    pwd->b[1]  = msa_srar_df(DF_BYTE, pws->b[1],  pwt->b[1]);
5061    pwd->b[2]  = msa_srar_df(DF_BYTE, pws->b[2],  pwt->b[2]);
5062    pwd->b[3]  = msa_srar_df(DF_BYTE, pws->b[3],  pwt->b[3]);
5063    pwd->b[4]  = msa_srar_df(DF_BYTE, pws->b[4],  pwt->b[4]);
5064    pwd->b[5]  = msa_srar_df(DF_BYTE, pws->b[5],  pwt->b[5]);
5065    pwd->b[6]  = msa_srar_df(DF_BYTE, pws->b[6],  pwt->b[6]);
5066    pwd->b[7]  = msa_srar_df(DF_BYTE, pws->b[7],  pwt->b[7]);
5067    pwd->b[8]  = msa_srar_df(DF_BYTE, pws->b[8],  pwt->b[8]);
5068    pwd->b[9]  = msa_srar_df(DF_BYTE, pws->b[9],  pwt->b[9]);
5069    pwd->b[10] = msa_srar_df(DF_BYTE, pws->b[10], pwt->b[10]);
5070    pwd->b[11] = msa_srar_df(DF_BYTE, pws->b[11], pwt->b[11]);
5071    pwd->b[12] = msa_srar_df(DF_BYTE, pws->b[12], pwt->b[12]);
5072    pwd->b[13] = msa_srar_df(DF_BYTE, pws->b[13], pwt->b[13]);
5073    pwd->b[14] = msa_srar_df(DF_BYTE, pws->b[14], pwt->b[14]);
5074    pwd->b[15] = msa_srar_df(DF_BYTE, pws->b[15], pwt->b[15]);
5075}
5076
5077void helper_msa_srar_h(CPUMIPSState *env,
5078                       uint32_t wd, uint32_t ws, uint32_t wt)
5079{
5080    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5081    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5082    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5083
5084    pwd->h[0]  = msa_srar_df(DF_HALF, pws->h[0],  pwt->h[0]);
5085    pwd->h[1]  = msa_srar_df(DF_HALF, pws->h[1],  pwt->h[1]);
5086    pwd->h[2]  = msa_srar_df(DF_HALF, pws->h[2],  pwt->h[2]);
5087    pwd->h[3]  = msa_srar_df(DF_HALF, pws->h[3],  pwt->h[3]);
5088    pwd->h[4]  = msa_srar_df(DF_HALF, pws->h[4],  pwt->h[4]);
5089    pwd->h[5]  = msa_srar_df(DF_HALF, pws->h[5],  pwt->h[5]);
5090    pwd->h[6]  = msa_srar_df(DF_HALF, pws->h[6],  pwt->h[6]);
5091    pwd->h[7]  = msa_srar_df(DF_HALF, pws->h[7],  pwt->h[7]);
5092}
5093
5094void helper_msa_srar_w(CPUMIPSState *env,
5095                       uint32_t wd, uint32_t ws, uint32_t wt)
5096{
5097    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5098    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5099    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5100
5101    pwd->w[0]  = msa_srar_df(DF_WORD, pws->w[0],  pwt->w[0]);
5102    pwd->w[1]  = msa_srar_df(DF_WORD, pws->w[1],  pwt->w[1]);
5103    pwd->w[2]  = msa_srar_df(DF_WORD, pws->w[2],  pwt->w[2]);
5104    pwd->w[3]  = msa_srar_df(DF_WORD, pws->w[3],  pwt->w[3]);
5105}
5106
5107void helper_msa_srar_d(CPUMIPSState *env,
5108                       uint32_t wd, uint32_t ws, uint32_t wt)
5109{
5110    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5111    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5112    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5113
5114    pwd->d[0]  = msa_srar_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
5115    pwd->d[1]  = msa_srar_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
5116}
5117
5118
5119static inline int64_t msa_srl_df(uint32_t df, int64_t arg1, int64_t arg2)
5120{
5121    uint64_t u_arg1 = UNSIGNED(arg1, df);
5122    int32_t b_arg2 = BIT_POSITION(arg2, df);
5123    return u_arg1 >> b_arg2;
5124}
5125
5126void helper_msa_srl_b(CPUMIPSState *env,
5127                      uint32_t wd, uint32_t ws, uint32_t wt)
5128{
5129    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5130    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5131    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5132
5133    pwd->b[0]  = msa_srl_df(DF_BYTE, pws->b[0],  pwt->b[0]);
5134    pwd->b[1]  = msa_srl_df(DF_BYTE, pws->b[1],  pwt->b[1]);
5135    pwd->b[2]  = msa_srl_df(DF_BYTE, pws->b[2],  pwt->b[2]);
5136    pwd->b[3]  = msa_srl_df(DF_BYTE, pws->b[3],  pwt->b[3]);
5137    pwd->b[4]  = msa_srl_df(DF_BYTE, pws->b[4],  pwt->b[4]);
5138    pwd->b[5]  = msa_srl_df(DF_BYTE, pws->b[5],  pwt->b[5]);
5139    pwd->b[6]  = msa_srl_df(DF_BYTE, pws->b[6],  pwt->b[6]);
5140    pwd->b[7]  = msa_srl_df(DF_BYTE, pws->b[7],  pwt->b[7]);
5141    pwd->b[8]  = msa_srl_df(DF_BYTE, pws->b[8],  pwt->b[8]);
5142    pwd->b[9]  = msa_srl_df(DF_BYTE, pws->b[9],  pwt->b[9]);
5143    pwd->b[10] = msa_srl_df(DF_BYTE, pws->b[10], pwt->b[10]);
5144    pwd->b[11] = msa_srl_df(DF_BYTE, pws->b[11], pwt->b[11]);
5145    pwd->b[12] = msa_srl_df(DF_BYTE, pws->b[12], pwt->b[12]);
5146    pwd->b[13] = msa_srl_df(DF_BYTE, pws->b[13], pwt->b[13]);
5147    pwd->b[14] = msa_srl_df(DF_BYTE, pws->b[14], pwt->b[14]);
5148    pwd->b[15] = msa_srl_df(DF_BYTE, pws->b[15], pwt->b[15]);
5149}
5150
5151void helper_msa_srl_h(CPUMIPSState *env,
5152                      uint32_t wd, uint32_t ws, uint32_t wt)
5153{
5154    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5155    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5156    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5157
5158    pwd->h[0]  = msa_srl_df(DF_HALF, pws->h[0],  pwt->h[0]);
5159    pwd->h[1]  = msa_srl_df(DF_HALF, pws->h[1],  pwt->h[1]);
5160    pwd->h[2]  = msa_srl_df(DF_HALF, pws->h[2],  pwt->h[2]);
5161    pwd->h[3]  = msa_srl_df(DF_HALF, pws->h[3],  pwt->h[3]);
5162    pwd->h[4]  = msa_srl_df(DF_HALF, pws->h[4],  pwt->h[4]);
5163    pwd->h[5]  = msa_srl_df(DF_HALF, pws->h[5],  pwt->h[5]);
5164    pwd->h[6]  = msa_srl_df(DF_HALF, pws->h[6],  pwt->h[6]);
5165    pwd->h[7]  = msa_srl_df(DF_HALF, pws->h[7],  pwt->h[7]);
5166}
5167
5168void helper_msa_srl_w(CPUMIPSState *env,
5169                      uint32_t wd, uint32_t ws, uint32_t wt)
5170{
5171    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5172    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5173    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5174
5175    pwd->w[0]  = msa_srl_df(DF_WORD, pws->w[0],  pwt->w[0]);
5176    pwd->w[1]  = msa_srl_df(DF_WORD, pws->w[1],  pwt->w[1]);
5177    pwd->w[2]  = msa_srl_df(DF_WORD, pws->w[2],  pwt->w[2]);
5178    pwd->w[3]  = msa_srl_df(DF_WORD, pws->w[3],  pwt->w[3]);
5179}
5180
5181void helper_msa_srl_d(CPUMIPSState *env,
5182                      uint32_t wd, uint32_t ws, uint32_t wt)
5183{
5184    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5185    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5186    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5187
5188    pwd->d[0]  = msa_srl_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
5189    pwd->d[1]  = msa_srl_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
5190}
5191
5192
5193static inline int64_t msa_srlr_df(uint32_t df, int64_t arg1, int64_t arg2)
5194{
5195    uint64_t u_arg1 = UNSIGNED(arg1, df);
5196    int32_t b_arg2 = BIT_POSITION(arg2, df);
5197    if (b_arg2 == 0) {
5198        return u_arg1;
5199    } else {
5200        uint64_t r_bit = (u_arg1 >> (b_arg2 - 1)) & 1;
5201        return (u_arg1 >> b_arg2) + r_bit;
5202    }
5203}
5204
5205void helper_msa_srlr_b(CPUMIPSState *env,
5206                       uint32_t wd, uint32_t ws, uint32_t wt)
5207{
5208    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5209    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5210    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5211
5212    pwd->b[0]  = msa_srlr_df(DF_BYTE, pws->b[0],  pwt->b[0]);
5213    pwd->b[1]  = msa_srlr_df(DF_BYTE, pws->b[1],  pwt->b[1]);
5214    pwd->b[2]  = msa_srlr_df(DF_BYTE, pws->b[2],  pwt->b[2]);
5215    pwd->b[3]  = msa_srlr_df(DF_BYTE, pws->b[3],  pwt->b[3]);
5216    pwd->b[4]  = msa_srlr_df(DF_BYTE, pws->b[4],  pwt->b[4]);
5217    pwd->b[5]  = msa_srlr_df(DF_BYTE, pws->b[5],  pwt->b[5]);
5218    pwd->b[6]  = msa_srlr_df(DF_BYTE, pws->b[6],  pwt->b[6]);
5219    pwd->b[7]  = msa_srlr_df(DF_BYTE, pws->b[7],  pwt->b[7]);
5220    pwd->b[8]  = msa_srlr_df(DF_BYTE, pws->b[8],  pwt->b[8]);
5221    pwd->b[9]  = msa_srlr_df(DF_BYTE, pws->b[9],  pwt->b[9]);
5222    pwd->b[10] = msa_srlr_df(DF_BYTE, pws->b[10], pwt->b[10]);
5223    pwd->b[11] = msa_srlr_df(DF_BYTE, pws->b[11], pwt->b[11]);
5224    pwd->b[12] = msa_srlr_df(DF_BYTE, pws->b[12], pwt->b[12]);
5225    pwd->b[13] = msa_srlr_df(DF_BYTE, pws->b[13], pwt->b[13]);
5226    pwd->b[14] = msa_srlr_df(DF_BYTE, pws->b[14], pwt->b[14]);
5227    pwd->b[15] = msa_srlr_df(DF_BYTE, pws->b[15], pwt->b[15]);
5228}
5229
5230void helper_msa_srlr_h(CPUMIPSState *env,
5231                       uint32_t wd, uint32_t ws, uint32_t wt)
5232{
5233    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5234    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5235    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5236
5237    pwd->h[0]  = msa_srlr_df(DF_HALF, pws->h[0],  pwt->h[0]);
5238    pwd->h[1]  = msa_srlr_df(DF_HALF, pws->h[1],  pwt->h[1]);
5239    pwd->h[2]  = msa_srlr_df(DF_HALF, pws->h[2],  pwt->h[2]);
5240    pwd->h[3]  = msa_srlr_df(DF_HALF, pws->h[3],  pwt->h[3]);
5241    pwd->h[4]  = msa_srlr_df(DF_HALF, pws->h[4],  pwt->h[4]);
5242    pwd->h[5]  = msa_srlr_df(DF_HALF, pws->h[5],  pwt->h[5]);
5243    pwd->h[6]  = msa_srlr_df(DF_HALF, pws->h[6],  pwt->h[6]);
5244    pwd->h[7]  = msa_srlr_df(DF_HALF, pws->h[7],  pwt->h[7]);
5245}
5246
5247void helper_msa_srlr_w(CPUMIPSState *env,
5248                       uint32_t wd, uint32_t ws, uint32_t wt)
5249{
5250    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5251    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5252    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5253
5254    pwd->w[0]  = msa_srlr_df(DF_WORD, pws->w[0],  pwt->w[0]);
5255    pwd->w[1]  = msa_srlr_df(DF_WORD, pws->w[1],  pwt->w[1]);
5256    pwd->w[2]  = msa_srlr_df(DF_WORD, pws->w[2],  pwt->w[2]);
5257    pwd->w[3]  = msa_srlr_df(DF_WORD, pws->w[3],  pwt->w[3]);
5258}
5259
5260void helper_msa_srlr_d(CPUMIPSState *env,
5261                       uint32_t wd, uint32_t ws, uint32_t wt)
5262{
5263    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5264    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5265    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
5266
5267    pwd->d[0]  = msa_srlr_df(DF_DOUBLE, pws->d[0],  pwt->d[0]);
5268    pwd->d[1]  = msa_srlr_df(DF_DOUBLE, pws->d[1],  pwt->d[1]);
5269}
5270
5271
5272#define MSA_FN_IMM8(FUNC, DEST, OPERATION)                              \
5273void helper_msa_ ## FUNC(CPUMIPSState *env, uint32_t wd, uint32_t ws,   \
5274        uint32_t i8)                                                    \
5275{                                                                       \
5276    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
5277    wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
5278    uint32_t i;                                                         \
5279    for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                        \
5280        DEST = OPERATION;                                               \
5281    }                                                                   \
5282}
5283
5284MSA_FN_IMM8(andi_b, pwd->b[i], pws->b[i] & i8)
5285MSA_FN_IMM8(ori_b, pwd->b[i], pws->b[i] | i8)
5286MSA_FN_IMM8(nori_b, pwd->b[i], ~(pws->b[i] | i8))
5287MSA_FN_IMM8(xori_b, pwd->b[i], pws->b[i] ^ i8)
5288
5289#define BIT_MOVE_IF_NOT_ZERO(dest, arg1, arg2, df) \
5290            UNSIGNED(((dest & (~arg2)) | (arg1 & arg2)), df)
5291MSA_FN_IMM8(bmnzi_b, pwd->b[i],
5292        BIT_MOVE_IF_NOT_ZERO(pwd->b[i], pws->b[i], i8, DF_BYTE))
5293
5294#define BIT_MOVE_IF_ZERO(dest, arg1, arg2, df) \
5295            UNSIGNED((dest & arg2) | (arg1 & (~arg2)), df)
5296MSA_FN_IMM8(bmzi_b, pwd->b[i],
5297        BIT_MOVE_IF_ZERO(pwd->b[i], pws->b[i], i8, DF_BYTE))
5298
5299#define BIT_SELECT(dest, arg1, arg2, df) \
5300            UNSIGNED((arg1 & (~dest)) | (arg2 & dest), df)
5301MSA_FN_IMM8(bseli_b, pwd->b[i],
5302        BIT_SELECT(pwd->b[i], pws->b[i], i8, DF_BYTE))
5303
5304#undef BIT_SELECT
5305#undef BIT_MOVE_IF_ZERO
5306#undef BIT_MOVE_IF_NOT_ZERO
5307#undef MSA_FN_IMM8
5308
5309#define SHF_POS(i, imm) (((i) & 0xfc) + (((imm) >> (2 * ((i) & 0x03))) & 0x03))
5310
5311void helper_msa_shf_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
5312                       uint32_t ws, uint32_t imm)
5313{
5314    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5315    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5316    wr_t wx, *pwx = &wx;
5317    uint32_t i;
5318
5319    switch (df) {
5320    case DF_BYTE:
5321        for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {
5322            pwx->b[i] = pws->b[SHF_POS(i, imm)];
5323        }
5324        break;
5325    case DF_HALF:
5326        for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {
5327            pwx->h[i] = pws->h[SHF_POS(i, imm)];
5328        }
5329        break;
5330    case DF_WORD:
5331        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
5332            pwx->w[i] = pws->w[SHF_POS(i, imm)];
5333        }
5334        break;
5335    default:
5336        assert(0);
5337    }
5338    msa_move_v(pwd, pwx);
5339}
5340
5341#define MSA_BINOP_IMM_DF(helper, func)                                  \
5342void helper_msa_ ## helper ## _df(CPUMIPSState *env, uint32_t df,       \
5343                        uint32_t wd, uint32_t ws, int32_t u5)           \
5344{                                                                       \
5345    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
5346    wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
5347    uint32_t i;                                                         \
5348                                                                        \
5349    switch (df) {                                                       \
5350    case DF_BYTE:                                                       \
5351        for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                    \
5352            pwd->b[i] = msa_ ## func ## _df(df, pws->b[i], u5);         \
5353        }                                                               \
5354        break;                                                          \
5355    case DF_HALF:                                                       \
5356        for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {                    \
5357            pwd->h[i] = msa_ ## func ## _df(df, pws->h[i], u5);         \
5358        }                                                               \
5359        break;                                                          \
5360    case DF_WORD:                                                       \
5361        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {                    \
5362            pwd->w[i] = msa_ ## func ## _df(df, pws->w[i], u5);         \
5363        }                                                               \
5364        break;                                                          \
5365    case DF_DOUBLE:                                                     \
5366        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {                  \
5367            pwd->d[i] = msa_ ## func ## _df(df, pws->d[i], u5);         \
5368        }                                                               \
5369        break;                                                          \
5370    default:                                                            \
5371        assert(0);                                                      \
5372    }                                                                   \
5373}
5374
5375MSA_BINOP_IMM_DF(addvi, addv)
5376MSA_BINOP_IMM_DF(subvi, subv)
5377MSA_BINOP_IMM_DF(ceqi, ceq)
5378MSA_BINOP_IMM_DF(clei_s, cle_s)
5379MSA_BINOP_IMM_DF(clei_u, cle_u)
5380MSA_BINOP_IMM_DF(clti_s, clt_s)
5381MSA_BINOP_IMM_DF(clti_u, clt_u)
5382MSA_BINOP_IMM_DF(maxi_s, max_s)
5383MSA_BINOP_IMM_DF(maxi_u, max_u)
5384MSA_BINOP_IMM_DF(mini_s, min_s)
5385MSA_BINOP_IMM_DF(mini_u, min_u)
5386#undef MSA_BINOP_IMM_DF
5387
5388void helper_msa_ldi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
5389                       int32_t s10)
5390{
5391    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5392    uint32_t i;
5393
5394    switch (df) {
5395    case DF_BYTE:
5396        for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {
5397            pwd->b[i] = (int8_t)s10;
5398        }
5399        break;
5400    case DF_HALF:
5401        for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {
5402            pwd->h[i] = (int16_t)s10;
5403        }
5404        break;
5405    case DF_WORD:
5406        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
5407            pwd->w[i] = (int32_t)s10;
5408        }
5409        break;
5410    case DF_DOUBLE:
5411        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
5412            pwd->d[i] = (int64_t)s10;
5413        }
5414       break;
5415    default:
5416        assert(0);
5417    }
5418}
5419
5420static inline int64_t msa_sat_s_df(uint32_t df, int64_t arg, uint32_t m)
5421{
5422    return arg < M_MIN_INT(m + 1) ? M_MIN_INT(m + 1) :
5423                                    arg > M_MAX_INT(m + 1) ? M_MAX_INT(m + 1) :
5424                                                             arg;
5425}
5426
5427static inline int64_t msa_sat_u_df(uint32_t df, int64_t arg, uint32_t m)
5428{
5429    uint64_t u_arg = UNSIGNED(arg, df);
5430    return  u_arg < M_MAX_UINT(m + 1) ? u_arg :
5431                                        M_MAX_UINT(m + 1);
5432}
5433
5434#define MSA_BINOP_IMMU_DF(helper, func)                                  \
5435void helper_msa_ ## helper ## _df(CPUMIPSState *env, uint32_t df, uint32_t wd, \
5436                       uint32_t ws, uint32_t u5)                        \
5437{                                                                       \
5438    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
5439    wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
5440    uint32_t i;                                                         \
5441                                                                        \
5442    switch (df) {                                                       \
5443    case DF_BYTE:                                                       \
5444        for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                    \
5445            pwd->b[i] = msa_ ## func ## _df(df, pws->b[i], u5);         \
5446        }                                                               \
5447        break;                                                          \
5448    case DF_HALF:                                                       \
5449        for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {                    \
5450            pwd->h[i] = msa_ ## func ## _df(df, pws->h[i], u5);         \
5451        }                                                               \
5452        break;                                                          \
5453    case DF_WORD:                                                       \
5454        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {                    \
5455            pwd->w[i] = msa_ ## func ## _df(df, pws->w[i], u5);         \
5456        }                                                               \
5457        break;                                                          \
5458    case DF_DOUBLE:                                                     \
5459        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {                  \
5460            pwd->d[i] = msa_ ## func ## _df(df, pws->d[i], u5);         \
5461        }                                                               \
5462        break;                                                          \
5463    default:                                                            \
5464        assert(0);                                                      \
5465    }                                                                   \
5466}
5467
5468MSA_BINOP_IMMU_DF(slli, sll)
5469MSA_BINOP_IMMU_DF(srai, sra)
5470MSA_BINOP_IMMU_DF(srli, srl)
5471MSA_BINOP_IMMU_DF(bclri, bclr)
5472MSA_BINOP_IMMU_DF(bseti, bset)
5473MSA_BINOP_IMMU_DF(bnegi, bneg)
5474MSA_BINOP_IMMU_DF(sat_s, sat_s)
5475MSA_BINOP_IMMU_DF(sat_u, sat_u)
5476MSA_BINOP_IMMU_DF(srari, srar)
5477MSA_BINOP_IMMU_DF(srlri, srlr)
5478#undef MSA_BINOP_IMMU_DF
5479
5480#define MSA_TEROP_IMMU_DF(helper, func)                                  \
5481void helper_msa_ ## helper ## _df(CPUMIPSState *env, uint32_t df,       \
5482                                  uint32_t wd, uint32_t ws, uint32_t u5) \
5483{                                                                       \
5484    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
5485    wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
5486    uint32_t i;                                                         \
5487                                                                        \
5488    switch (df) {                                                       \
5489    case DF_BYTE:                                                       \
5490        for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {                    \
5491            pwd->b[i] = msa_ ## func ## _df(df, pwd->b[i], pws->b[i],   \
5492                                            u5);                        \
5493        }                                                               \
5494        break;                                                          \
5495    case DF_HALF:                                                       \
5496        for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {                    \
5497            pwd->h[i] = msa_ ## func ## _df(df, pwd->h[i], pws->h[i],   \
5498                                            u5);                        \
5499        }                                                               \
5500        break;                                                          \
5501    case DF_WORD:                                                       \
5502        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {                    \
5503            pwd->w[i] = msa_ ## func ## _df(df, pwd->w[i], pws->w[i],   \
5504                                            u5);                        \
5505        }                                                               \
5506        break;                                                          \
5507    case DF_DOUBLE:                                                     \
5508        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {                  \
5509            pwd->d[i] = msa_ ## func ## _df(df, pwd->d[i], pws->d[i],   \
5510                                            u5);                        \
5511        }                                                               \
5512        break;                                                          \
5513    default:                                                            \
5514        assert(0);                                                      \
5515    }                                                                   \
5516}
5517
5518MSA_TEROP_IMMU_DF(binsli, binsl)
5519MSA_TEROP_IMMU_DF(binsri, binsr)
5520#undef MSA_TEROP_IMMU_DF
5521
5522#define CONCATENATE_AND_SLIDE(s, k)             \
5523    do {                                        \
5524        for (i = 0; i < s; i++) {               \
5525            v[i]     = pws->b[s * k + i];       \
5526            v[i + s] = pwd->b[s * k + i];       \
5527        }                                       \
5528        for (i = 0; i < s; i++) {               \
5529            pwd->b[s * k + i] = v[i + n];       \
5530        }                                       \
5531    } while (0)
5532
5533static inline void msa_sld_df(uint32_t df, wr_t *pwd,
5534                              wr_t *pws, target_ulong rt)
5535{
5536    uint32_t n = rt % DF_ELEMENTS(df);
5537    uint8_t v[64];
5538    uint32_t i, k;
5539
5540    switch (df) {
5541    case DF_BYTE:
5542        CONCATENATE_AND_SLIDE(DF_ELEMENTS(DF_BYTE), 0);
5543        break;
5544    case DF_HALF:
5545        for (k = 0; k < 2; k++) {
5546            CONCATENATE_AND_SLIDE(DF_ELEMENTS(DF_HALF), k);
5547        }
5548        break;
5549    case DF_WORD:
5550        for (k = 0; k < 4; k++) {
5551            CONCATENATE_AND_SLIDE(DF_ELEMENTS(DF_WORD), k);
5552        }
5553        break;
5554    case DF_DOUBLE:
5555        for (k = 0; k < 8; k++) {
5556            CONCATENATE_AND_SLIDE(DF_ELEMENTS(DF_DOUBLE), k);
5557        }
5558        break;
5559    default:
5560        assert(0);
5561    }
5562}
5563
5564static inline int64_t msa_mul_q_df(uint32_t df, int64_t arg1, int64_t arg2)
5565{
5566    int64_t q_min = DF_MIN_INT(df);
5567    int64_t q_max = DF_MAX_INT(df);
5568
5569    if (arg1 == q_min && arg2 == q_min) {
5570        return q_max;
5571    }
5572    return (arg1 * arg2) >> (DF_BITS(df) - 1);
5573}
5574
5575static inline int64_t msa_mulr_q_df(uint32_t df, int64_t arg1, int64_t arg2)
5576{
5577    int64_t q_min = DF_MIN_INT(df);
5578    int64_t q_max = DF_MAX_INT(df);
5579    int64_t r_bit = 1 << (DF_BITS(df) - 2);
5580
5581    if (arg1 == q_min && arg2 == q_min) {
5582        return q_max;
5583    }
5584    return (arg1 * arg2 + r_bit) >> (DF_BITS(df) - 1);
5585}
5586
5587#define MSA_BINOP_DF(func) \
5588void helper_msa_ ## func ## _df(CPUMIPSState *env, uint32_t df,         \
5589                                uint32_t wd, uint32_t ws, uint32_t wt)  \
5590{                                                                       \
5591    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                          \
5592    wr_t *pws = &(env->active_fpu.fpr[ws].wr);                          \
5593    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);                          \
5594                                                                        \
5595    switch (df) {                                                       \
5596    case DF_BYTE:                                                       \
5597        pwd->b[0]  = msa_ ## func ## _df(df, pws->b[0],  pwt->b[0]);    \
5598        pwd->b[1]  = msa_ ## func ## _df(df, pws->b[1],  pwt->b[1]);    \
5599        pwd->b[2]  = msa_ ## func ## _df(df, pws->b[2],  pwt->b[2]);    \
5600        pwd->b[3]  = msa_ ## func ## _df(df, pws->b[3],  pwt->b[3]);    \
5601        pwd->b[4]  = msa_ ## func ## _df(df, pws->b[4],  pwt->b[4]);    \
5602        pwd->b[5]  = msa_ ## func ## _df(df, pws->b[5],  pwt->b[5]);    \
5603        pwd->b[6]  = msa_ ## func ## _df(df, pws->b[6],  pwt->b[6]);    \
5604        pwd->b[7]  = msa_ ## func ## _df(df, pws->b[7],  pwt->b[7]);    \
5605        pwd->b[8]  = msa_ ## func ## _df(df, pws->b[8],  pwt->b[8]);    \
5606        pwd->b[9]  = msa_ ## func ## _df(df, pws->b[9],  pwt->b[9]);    \
5607        pwd->b[10] = msa_ ## func ## _df(df, pws->b[10], pwt->b[10]);   \
5608        pwd->b[11] = msa_ ## func ## _df(df, pws->b[11], pwt->b[11]);   \
5609        pwd->b[12] = msa_ ## func ## _df(df, pws->b[12], pwt->b[12]);   \
5610        pwd->b[13] = msa_ ## func ## _df(df, pws->b[13], pwt->b[13]);   \
5611        pwd->b[14] = msa_ ## func ## _df(df, pws->b[14], pwt->b[14]);   \
5612        pwd->b[15] = msa_ ## func ## _df(df, pws->b[15], pwt->b[15]);   \
5613        break;                                                          \
5614    case DF_HALF:                                                       \
5615        pwd->h[0] = msa_ ## func ## _df(df, pws->h[0], pwt->h[0]);      \
5616        pwd->h[1] = msa_ ## func ## _df(df, pws->h[1], pwt->h[1]);      \
5617        pwd->h[2] = msa_ ## func ## _df(df, pws->h[2], pwt->h[2]);      \
5618        pwd->h[3] = msa_ ## func ## _df(df, pws->h[3], pwt->h[3]);      \
5619        pwd->h[4] = msa_ ## func ## _df(df, pws->h[4], pwt->h[4]);      \
5620        pwd->h[5] = msa_ ## func ## _df(df, pws->h[5], pwt->h[5]);      \
5621        pwd->h[6] = msa_ ## func ## _df(df, pws->h[6], pwt->h[6]);      \
5622        pwd->h[7] = msa_ ## func ## _df(df, pws->h[7], pwt->h[7]);      \
5623        break;                                                          \
5624    case DF_WORD:                                                       \
5625        pwd->w[0] = msa_ ## func ## _df(df, pws->w[0], pwt->w[0]);      \
5626        pwd->w[1] = msa_ ## func ## _df(df, pws->w[1], pwt->w[1]);      \
5627        pwd->w[2] = msa_ ## func ## _df(df, pws->w[2], pwt->w[2]);      \
5628        pwd->w[3] = msa_ ## func ## _df(df, pws->w[3], pwt->w[3]);      \
5629        break;                                                          \
5630    case DF_DOUBLE:                                                     \
5631        pwd->d[0] = msa_ ## func ## _df(df, pws->d[0], pwt->d[0]);      \
5632        pwd->d[1] = msa_ ## func ## _df(df, pws->d[1], pwt->d[1]);      \
5633        break;                                                          \
5634    default:                                                            \
5635        assert(0);                                                      \
5636    }                                                                   \
5637}
5638
5639MSA_BINOP_DF(mul_q)
5640MSA_BINOP_DF(mulr_q)
5641#undef MSA_BINOP_DF
5642
5643void helper_msa_sld_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
5644                       uint32_t ws, uint32_t rt)
5645{
5646    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5647    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5648
5649    msa_sld_df(df, pwd, pws, env->active_tc.gpr[rt]);
5650}
5651
5652static inline int64_t msa_madd_q_df(uint32_t df, int64_t dest, int64_t arg1,
5653                                    int64_t arg2)
5654{
5655    int64_t q_prod, q_ret;
5656
5657    int64_t q_max = DF_MAX_INT(df);
5658    int64_t q_min = DF_MIN_INT(df);
5659
5660    q_prod = arg1 * arg2;
5661    q_ret = ((dest << (DF_BITS(df) - 1)) + q_prod) >> (DF_BITS(df) - 1);
5662
5663    return (q_ret < q_min) ? q_min : (q_max < q_ret) ? q_max : q_ret;
5664}
5665
5666static inline int64_t msa_msub_q_df(uint32_t df, int64_t dest, int64_t arg1,
5667                                    int64_t arg2)
5668{
5669    int64_t q_prod, q_ret;
5670
5671    int64_t q_max = DF_MAX_INT(df);
5672    int64_t q_min = DF_MIN_INT(df);
5673
5674    q_prod = arg1 * arg2;
5675    q_ret = ((dest << (DF_BITS(df) - 1)) - q_prod) >> (DF_BITS(df) - 1);
5676
5677    return (q_ret < q_min) ? q_min : (q_max < q_ret) ? q_max : q_ret;
5678}
5679
5680static inline int64_t msa_maddr_q_df(uint32_t df, int64_t dest, int64_t arg1,
5681                                     int64_t arg2)
5682{
5683    int64_t q_prod, q_ret;
5684
5685    int64_t q_max = DF_MAX_INT(df);
5686    int64_t q_min = DF_MIN_INT(df);
5687    int64_t r_bit = 1 << (DF_BITS(df) - 2);
5688
5689    q_prod = arg1 * arg2;
5690    q_ret = ((dest << (DF_BITS(df) - 1)) + q_prod + r_bit) >> (DF_BITS(df) - 1);
5691
5692    return (q_ret < q_min) ? q_min : (q_max < q_ret) ? q_max : q_ret;
5693}
5694
5695static inline int64_t msa_msubr_q_df(uint32_t df, int64_t dest, int64_t arg1,
5696                                     int64_t arg2)
5697{
5698    int64_t q_prod, q_ret;
5699
5700    int64_t q_max = DF_MAX_INT(df);
5701    int64_t q_min = DF_MIN_INT(df);
5702    int64_t r_bit = 1 << (DF_BITS(df) - 2);
5703
5704    q_prod = arg1 * arg2;
5705    q_ret = ((dest << (DF_BITS(df) - 1)) - q_prod + r_bit) >> (DF_BITS(df) - 1);
5706
5707    return (q_ret < q_min) ? q_min : (q_max < q_ret) ? q_max : q_ret;
5708}
5709
5710#define MSA_TEROP_DF(func) \
5711void helper_msa_ ## func ## _df(CPUMIPSState *env, uint32_t df, uint32_t wd,  \
5712                                uint32_t ws, uint32_t wt)                     \
5713{                                                                             \
5714    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                                \
5715    wr_t *pws = &(env->active_fpu.fpr[ws].wr);                                \
5716    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);                                \
5717                                                                              \
5718    switch (df) {                                                             \
5719    case DF_BYTE:                                                             \
5720        pwd->b[0]  = msa_ ## func ## _df(df, pwd->b[0],  pws->b[0],           \
5721                                             pwt->b[0]);                      \
5722        pwd->b[1]  = msa_ ## func ## _df(df, pwd->b[1],  pws->b[1],           \
5723                                             pwt->b[1]);                      \
5724        pwd->b[2]  = msa_ ## func ## _df(df, pwd->b[2],  pws->b[2],           \
5725                                             pwt->b[2]);                      \
5726        pwd->b[3]  = msa_ ## func ## _df(df, pwd->b[3],  pws->b[3],           \
5727                                             pwt->b[3]);                      \
5728        pwd->b[4]  = msa_ ## func ## _df(df, pwd->b[4],  pws->b[4],           \
5729                                             pwt->b[4]);                      \
5730        pwd->b[5]  = msa_ ## func ## _df(df, pwd->b[5],  pws->b[5],           \
5731                                             pwt->b[5]);                      \
5732        pwd->b[6]  = msa_ ## func ## _df(df, pwd->b[6],  pws->b[6],           \
5733                                             pwt->b[6]);                      \
5734        pwd->b[7]  = msa_ ## func ## _df(df, pwd->b[7],  pws->b[7],           \
5735                                             pwt->b[7]);                      \
5736        pwd->b[8]  = msa_ ## func ## _df(df, pwd->b[8],  pws->b[8],           \
5737                                             pwt->b[8]);                      \
5738        pwd->b[9]  = msa_ ## func ## _df(df, pwd->b[9],  pws->b[9],           \
5739                                             pwt->b[9]);                      \
5740        pwd->b[10] = msa_ ## func ## _df(df, pwd->b[10], pws->b[10],          \
5741                                             pwt->b[10]);                     \
5742        pwd->b[11] = msa_ ## func ## _df(df, pwd->b[11], pws->b[11],          \
5743                                             pwt->b[11]);                     \
5744        pwd->b[12] = msa_ ## func ## _df(df, pwd->b[12], pws->b[12],          \
5745                                             pwt->b[12]);                     \
5746        pwd->b[13] = msa_ ## func ## _df(df, pwd->b[13], pws->b[13],          \
5747                                             pwt->b[13]);                     \
5748        pwd->b[14] = msa_ ## func ## _df(df, pwd->b[14], pws->b[14],          \
5749                                             pwt->b[14]);                     \
5750        pwd->b[15] = msa_ ## func ## _df(df, pwd->b[15], pws->b[15],          \
5751                                             pwt->b[15]);                     \
5752        break;                                                                \
5753    case DF_HALF:                                                             \
5754        pwd->h[0] = msa_ ## func ## _df(df, pwd->h[0], pws->h[0], pwt->h[0]); \
5755        pwd->h[1] = msa_ ## func ## _df(df, pwd->h[1], pws->h[1], pwt->h[1]); \
5756        pwd->h[2] = msa_ ## func ## _df(df, pwd->h[2], pws->h[2], pwt->h[2]); \
5757        pwd->h[3] = msa_ ## func ## _df(df, pwd->h[3], pws->h[3], pwt->h[3]); \
5758        pwd->h[4] = msa_ ## func ## _df(df, pwd->h[4], pws->h[4], pwt->h[4]); \
5759        pwd->h[5] = msa_ ## func ## _df(df, pwd->h[5], pws->h[5], pwt->h[5]); \
5760        pwd->h[6] = msa_ ## func ## _df(df, pwd->h[6], pws->h[6], pwt->h[6]); \
5761        pwd->h[7] = msa_ ## func ## _df(df, pwd->h[7], pws->h[7], pwt->h[7]); \
5762        break;                                                                \
5763    case DF_WORD:                                                             \
5764        pwd->w[0] = msa_ ## func ## _df(df, pwd->w[0], pws->w[0], pwt->w[0]); \
5765        pwd->w[1] = msa_ ## func ## _df(df, pwd->w[1], pws->w[1], pwt->w[1]); \
5766        pwd->w[2] = msa_ ## func ## _df(df, pwd->w[2], pws->w[2], pwt->w[2]); \
5767        pwd->w[3] = msa_ ## func ## _df(df, pwd->w[3], pws->w[3], pwt->w[3]); \
5768        break;                                                                \
5769    case DF_DOUBLE:                                                           \
5770        pwd->d[0] = msa_ ## func ## _df(df, pwd->d[0], pws->d[0], pwt->d[0]); \
5771        pwd->d[1] = msa_ ## func ## _df(df, pwd->d[1], pws->d[1], pwt->d[1]); \
5772        break;                                                                \
5773    default:                                                                  \
5774        assert(0);                                                            \
5775    }                                                                         \
5776}
5777
5778MSA_TEROP_DF(binsl)
5779MSA_TEROP_DF(binsr)
5780MSA_TEROP_DF(madd_q)
5781MSA_TEROP_DF(msub_q)
5782MSA_TEROP_DF(maddr_q)
5783MSA_TEROP_DF(msubr_q)
5784#undef MSA_TEROP_DF
5785
5786static inline void msa_splat_df(uint32_t df, wr_t *pwd,
5787                                wr_t *pws, target_ulong rt)
5788{
5789    uint32_t n = rt % DF_ELEMENTS(df);
5790    uint32_t i;
5791
5792    switch (df) {
5793    case DF_BYTE:
5794        for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {
5795            pwd->b[i] = pws->b[n];
5796        }
5797        break;
5798    case DF_HALF:
5799        for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {
5800            pwd->h[i] = pws->h[n];
5801        }
5802        break;
5803    case DF_WORD:
5804        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
5805            pwd->w[i] = pws->w[n];
5806        }
5807        break;
5808    case DF_DOUBLE:
5809        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
5810            pwd->d[i] = pws->d[n];
5811        }
5812       break;
5813    default:
5814        assert(0);
5815    }
5816}
5817
5818void helper_msa_splat_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
5819                         uint32_t ws, uint32_t rt)
5820{
5821    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5822    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5823
5824    msa_splat_df(df, pwd, pws, env->active_tc.gpr[rt]);
5825}
5826
5827#define MSA_DO_B MSA_DO(b)
5828#define MSA_DO_H MSA_DO(h)
5829#define MSA_DO_W MSA_DO(w)
5830#define MSA_DO_D MSA_DO(d)
5831
5832#define MSA_LOOP_B MSA_LOOP(B)
5833#define MSA_LOOP_H MSA_LOOP(H)
5834#define MSA_LOOP_W MSA_LOOP(W)
5835#define MSA_LOOP_D MSA_LOOP(D)
5836
5837#define MSA_LOOP_COND_B MSA_LOOP_COND(DF_BYTE)
5838#define MSA_LOOP_COND_H MSA_LOOP_COND(DF_HALF)
5839#define MSA_LOOP_COND_W MSA_LOOP_COND(DF_WORD)
5840#define MSA_LOOP_COND_D MSA_LOOP_COND(DF_DOUBLE)
5841
5842#define MSA_LOOP(DF) \
5843    do { \
5844        for (i = 0; i < (MSA_LOOP_COND_ ## DF) ; i++) { \
5845            MSA_DO_ ## DF; \
5846        } \
5847    } while (0)
5848
5849#define MSA_FN_DF(FUNC)                                             \
5850void helper_msa_##FUNC(CPUMIPSState *env, uint32_t df, uint32_t wd, \
5851        uint32_t ws, uint32_t wt)                                   \
5852{                                                                   \
5853    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);                      \
5854    wr_t *pws = &(env->active_fpu.fpr[ws].wr);                      \
5855    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);                      \
5856    wr_t wx, *pwx = &wx;                                            \
5857    uint32_t i;                                                     \
5858    switch (df) {                                                   \
5859    case DF_BYTE:                                                   \
5860        MSA_LOOP_B;                                                 \
5861        break;                                                      \
5862    case DF_HALF:                                                   \
5863        MSA_LOOP_H;                                                 \
5864        break;                                                      \
5865    case DF_WORD:                                                   \
5866        MSA_LOOP_W;                                                 \
5867        break;                                                      \
5868    case DF_DOUBLE:                                                 \
5869        MSA_LOOP_D;                                                 \
5870        break;                                                      \
5871    default:                                                        \
5872        assert(0);                                                  \
5873    }                                                               \
5874    msa_move_v(pwd, pwx);                                           \
5875}
5876
5877#define MSA_LOOP_COND(DF) \
5878            (DF_ELEMENTS(DF) / 2)
5879
5880#define Rb(pwr, i) (pwr->b[i])
5881#define Lb(pwr, i) (pwr->b[i + DF_ELEMENTS(DF_BYTE) / 2])
5882#define Rh(pwr, i) (pwr->h[i])
5883#define Lh(pwr, i) (pwr->h[i + DF_ELEMENTS(DF_HALF) / 2])
5884#define Rw(pwr, i) (pwr->w[i])
5885#define Lw(pwr, i) (pwr->w[i + DF_ELEMENTS(DF_WORD) / 2])
5886#define Rd(pwr, i) (pwr->d[i])
5887#define Ld(pwr, i) (pwr->d[i + DF_ELEMENTS(DF_DOUBLE) / 2])
5888
5889#undef MSA_LOOP_COND
5890
5891#define MSA_LOOP_COND(DF) \
5892            (DF_ELEMENTS(DF))
5893
5894#define MSA_DO(DF)                                                          \
5895    do {                                                                    \
5896        uint32_t n = DF_ELEMENTS(df);                                       \
5897        uint32_t k = (pwd->DF[i] & 0x3f) % (2 * n);                         \
5898        pwx->DF[i] =                                                        \
5899            (pwd->DF[i] & 0xc0) ? 0 : k < n ? pwt->DF[k] : pws->DF[k - n];  \
5900    } while (0)
5901MSA_FN_DF(vshf_df)
5902#undef MSA_DO
5903#undef MSA_LOOP_COND
5904#undef MSA_FN_DF
5905
5906
5907void helper_msa_sldi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
5908                        uint32_t ws, uint32_t n)
5909{
5910    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5911    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5912
5913    msa_sld_df(df, pwd, pws, n);
5914}
5915
5916void helper_msa_splati_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
5917                          uint32_t ws, uint32_t n)
5918{
5919    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
5920    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
5921
5922    msa_splat_df(df, pwd, pws, n);
5923}
5924
5925void helper_msa_copy_s_b(CPUMIPSState *env, uint32_t rd,
5926                         uint32_t ws, uint32_t n)
5927{
5928    n %= 16;
5929#if defined(HOST_WORDS_BIGENDIAN)
5930    if (n < 8) {
5931        n = 8 - n - 1;
5932    } else {
5933        n = 24 - n - 1;
5934    }
5935#endif
5936    env->active_tc.gpr[rd] = (int8_t)env->active_fpu.fpr[ws].wr.b[n];
5937}
5938
5939void helper_msa_copy_s_h(CPUMIPSState *env, uint32_t rd,
5940                         uint32_t ws, uint32_t n)
5941{
5942    n %= 8;
5943#if defined(HOST_WORDS_BIGENDIAN)
5944    if (n < 4) {
5945        n = 4 - n - 1;
5946    } else {
5947        n = 12 - n - 1;
5948    }
5949#endif
5950    env->active_tc.gpr[rd] = (int16_t)env->active_fpu.fpr[ws].wr.h[n];
5951}
5952
5953void helper_msa_copy_s_w(CPUMIPSState *env, uint32_t rd,
5954                         uint32_t ws, uint32_t n)
5955{
5956    n %= 4;
5957#if defined(HOST_WORDS_BIGENDIAN)
5958    if (n < 2) {
5959        n = 2 - n - 1;
5960    } else {
5961        n = 6 - n - 1;
5962    }
5963#endif
5964    env->active_tc.gpr[rd] = (int32_t)env->active_fpu.fpr[ws].wr.w[n];
5965}
5966
5967void helper_msa_copy_s_d(CPUMIPSState *env, uint32_t rd,
5968                         uint32_t ws, uint32_t n)
5969{
5970    n %= 2;
5971    env->active_tc.gpr[rd] = (int64_t)env->active_fpu.fpr[ws].wr.d[n];
5972}
5973
5974void helper_msa_copy_u_b(CPUMIPSState *env, uint32_t rd,
5975                         uint32_t ws, uint32_t n)
5976{
5977    n %= 16;
5978#if defined(HOST_WORDS_BIGENDIAN)
5979    if (n < 8) {
5980        n = 8 - n - 1;
5981    } else {
5982        n = 24 - n - 1;
5983    }
5984#endif
5985    env->active_tc.gpr[rd] = (uint8_t)env->active_fpu.fpr[ws].wr.b[n];
5986}
5987
5988void helper_msa_copy_u_h(CPUMIPSState *env, uint32_t rd,
5989                         uint32_t ws, uint32_t n)
5990{
5991    n %= 8;
5992#if defined(HOST_WORDS_BIGENDIAN)
5993    if (n < 4) {
5994        n = 4 - n - 1;
5995    } else {
5996        n = 12 - n - 1;
5997    }
5998#endif
5999    env->active_tc.gpr[rd] = (uint16_t)env->active_fpu.fpr[ws].wr.h[n];
6000}
6001
6002void helper_msa_copy_u_w(CPUMIPSState *env, uint32_t rd,
6003                         uint32_t ws, uint32_t n)
6004{
6005    n %= 4;
6006#if defined(HOST_WORDS_BIGENDIAN)
6007    if (n < 2) {
6008        n = 2 - n - 1;
6009    } else {
6010        n = 6 - n - 1;
6011    }
6012#endif
6013    env->active_tc.gpr[rd] = (uint32_t)env->active_fpu.fpr[ws].wr.w[n];
6014}
6015
6016void helper_msa_insert_b(CPUMIPSState *env, uint32_t wd,
6017                          uint32_t rs_num, uint32_t n)
6018{
6019    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6020    target_ulong rs = env->active_tc.gpr[rs_num];
6021    n %= 16;
6022#if defined(HOST_WORDS_BIGENDIAN)
6023    if (n < 8) {
6024        n = 8 - n - 1;
6025    } else {
6026        n = 24 - n - 1;
6027    }
6028#endif
6029    pwd->b[n] = (int8_t)rs;
6030}
6031
6032void helper_msa_insert_h(CPUMIPSState *env, uint32_t wd,
6033                          uint32_t rs_num, uint32_t n)
6034{
6035    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6036    target_ulong rs = env->active_tc.gpr[rs_num];
6037    n %= 8;
6038#if defined(HOST_WORDS_BIGENDIAN)
6039    if (n < 4) {
6040        n = 4 - n - 1;
6041    } else {
6042        n = 12 - n - 1;
6043    }
6044#endif
6045    pwd->h[n] = (int16_t)rs;
6046}
6047
6048void helper_msa_insert_w(CPUMIPSState *env, uint32_t wd,
6049                          uint32_t rs_num, uint32_t n)
6050{
6051    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6052    target_ulong rs = env->active_tc.gpr[rs_num];
6053    n %= 4;
6054#if defined(HOST_WORDS_BIGENDIAN)
6055    if (n < 2) {
6056        n = 2 - n - 1;
6057    } else {
6058        n = 6 - n - 1;
6059    }
6060#endif
6061    pwd->w[n] = (int32_t)rs;
6062}
6063
6064void helper_msa_insert_d(CPUMIPSState *env, uint32_t wd,
6065                          uint32_t rs_num, uint32_t n)
6066{
6067    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6068    target_ulong rs = env->active_tc.gpr[rs_num];
6069    n %= 2;
6070    pwd->d[n] = (int64_t)rs;
6071}
6072
6073void helper_msa_insve_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6074                         uint32_t ws, uint32_t n)
6075{
6076    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6077    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6078
6079    switch (df) {
6080    case DF_BYTE:
6081        pwd->b[n] = (int8_t)pws->b[0];
6082        break;
6083    case DF_HALF:
6084        pwd->h[n] = (int16_t)pws->h[0];
6085        break;
6086    case DF_WORD:
6087        pwd->w[n] = (int32_t)pws->w[0];
6088        break;
6089    case DF_DOUBLE:
6090        pwd->d[n] = (int64_t)pws->d[0];
6091        break;
6092    default:
6093        assert(0);
6094    }
6095}
6096
6097void helper_msa_ctcmsa(CPUMIPSState *env, target_ulong elm, uint32_t cd)
6098{
6099    switch (cd) {
6100    case 0:
6101        break;
6102    case 1:
6103        env->active_tc.msacsr = (int32_t)elm & MSACSR_MASK;
6104        restore_msa_fp_status(env);
6105        /* check exception */
6106        if ((GET_FP_ENABLE(env->active_tc.msacsr) | FP_UNIMPLEMENTED)
6107            & GET_FP_CAUSE(env->active_tc.msacsr)) {
6108            do_raise_exception(env, EXCP_MSAFPE, GETPC());
6109        }
6110        break;
6111    }
6112}
6113
6114target_ulong helper_msa_cfcmsa(CPUMIPSState *env, uint32_t cs)
6115{
6116    switch (cs) {
6117    case 0:
6118        return env->msair;
6119    case 1:
6120        return env->active_tc.msacsr & MSACSR_MASK;
6121    }
6122    return 0;
6123}
6124
6125void helper_msa_fill_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6126                        uint32_t rs)
6127{
6128    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6129    uint32_t i;
6130
6131    switch (df) {
6132    case DF_BYTE:
6133        for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {
6134            pwd->b[i] = (int8_t)env->active_tc.gpr[rs];
6135        }
6136        break;
6137    case DF_HALF:
6138        for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {
6139            pwd->h[i] = (int16_t)env->active_tc.gpr[rs];
6140        }
6141        break;
6142    case DF_WORD:
6143        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6144            pwd->w[i] = (int32_t)env->active_tc.gpr[rs];
6145        }
6146        break;
6147    case DF_DOUBLE:
6148        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6149            pwd->d[i] = (int64_t)env->active_tc.gpr[rs];
6150        }
6151       break;
6152    default:
6153        assert(0);
6154    }
6155}
6156
6157
6158#define FLOAT_ONE32 make_float32(0x3f8 << 20)
6159#define FLOAT_ONE64 make_float64(0x3ffULL << 52)
6160
6161#define FLOAT_SNAN16(s) (float16_default_nan(s) ^ 0x0220)
6162        /* 0x7c20 */
6163#define FLOAT_SNAN32(s) (float32_default_nan(s) ^ 0x00400020)
6164        /* 0x7f800020 */
6165#define FLOAT_SNAN64(s) (float64_default_nan(s) ^ 0x0008000000000020ULL)
6166        /* 0x7ff0000000000020 */
6167
6168static inline void clear_msacsr_cause(CPUMIPSState *env)
6169{
6170    SET_FP_CAUSE(env->active_tc.msacsr, 0);
6171}
6172
6173static inline void check_msacsr_cause(CPUMIPSState *env, uintptr_t retaddr)
6174{
6175    if ((GET_FP_CAUSE(env->active_tc.msacsr) &
6176            (GET_FP_ENABLE(env->active_tc.msacsr) | FP_UNIMPLEMENTED)) == 0) {
6177        UPDATE_FP_FLAGS(env->active_tc.msacsr,
6178                GET_FP_CAUSE(env->active_tc.msacsr));
6179    } else {
6180        do_raise_exception(env, EXCP_MSAFPE, retaddr);
6181    }
6182}
6183
6184/* Flush-to-zero use cases for update_msacsr() */
6185#define CLEAR_FS_UNDERFLOW 1
6186#define CLEAR_IS_INEXACT   2
6187#define RECIPROCAL_INEXACT 4
6188
6189
6190static inline int ieee_to_mips_xcpt_msa(int ieee_xcpt)
6191{
6192    int mips_xcpt = 0;
6193
6194    if (ieee_xcpt & float_flag_invalid) {
6195        mips_xcpt |= FP_INVALID;
6196    }
6197    if (ieee_xcpt & float_flag_overflow) {
6198        mips_xcpt |= FP_OVERFLOW;
6199    }
6200    if (ieee_xcpt & float_flag_underflow) {
6201        mips_xcpt |= FP_UNDERFLOW;
6202    }
6203    if (ieee_xcpt & float_flag_divbyzero) {
6204        mips_xcpt |= FP_DIV0;
6205    }
6206    if (ieee_xcpt & float_flag_inexact) {
6207        mips_xcpt |= FP_INEXACT;
6208    }
6209
6210    return mips_xcpt;
6211}
6212
6213static inline int update_msacsr(CPUMIPSState *env, int action, int denormal)
6214{
6215    int ieee_exception_flags;
6216    int mips_exception_flags = 0;
6217    int cause;
6218    int enable;
6219
6220    ieee_exception_flags = get_float_exception_flags(
6221                               &env->active_tc.msa_fp_status);
6222
6223    /* QEMU softfloat does not signal all underflow cases */
6224    if (denormal) {
6225        ieee_exception_flags |= float_flag_underflow;
6226    }
6227    if (ieee_exception_flags) {
6228        mips_exception_flags = ieee_to_mips_xcpt_msa(ieee_exception_flags);
6229    }
6230    enable = GET_FP_ENABLE(env->active_tc.msacsr) | FP_UNIMPLEMENTED;
6231
6232    /* Set Inexact (I) when flushing inputs to zero */
6233    if ((ieee_exception_flags & float_flag_input_denormal) &&
6234            (env->active_tc.msacsr & MSACSR_FS_MASK) != 0) {
6235        if (action & CLEAR_IS_INEXACT) {
6236            mips_exception_flags &= ~FP_INEXACT;
6237        } else {
6238            mips_exception_flags |= FP_INEXACT;
6239        }
6240    }
6241
6242    /* Set Inexact (I) and Underflow (U) when flushing outputs to zero */
6243    if ((ieee_exception_flags & float_flag_output_denormal) &&
6244            (env->active_tc.msacsr & MSACSR_FS_MASK) != 0) {
6245        mips_exception_flags |= FP_INEXACT;
6246        if (action & CLEAR_FS_UNDERFLOW) {
6247            mips_exception_flags &= ~FP_UNDERFLOW;
6248        } else {
6249            mips_exception_flags |= FP_UNDERFLOW;
6250        }
6251    }
6252
6253    /* Set Inexact (I) when Overflow (O) is not enabled */
6254    if ((mips_exception_flags & FP_OVERFLOW) != 0 &&
6255           (enable & FP_OVERFLOW) == 0) {
6256        mips_exception_flags |= FP_INEXACT;
6257    }
6258
6259    /* Clear Exact Underflow when Underflow (U) is not enabled */
6260    if ((mips_exception_flags & FP_UNDERFLOW) != 0 &&
6261           (enable & FP_UNDERFLOW) == 0 &&
6262           (mips_exception_flags & FP_INEXACT) == 0) {
6263        mips_exception_flags &= ~FP_UNDERFLOW;
6264    }
6265
6266    /*
6267     * Reciprocal operations set only Inexact when valid and not
6268     * divide by zero
6269     */
6270    if ((action & RECIPROCAL_INEXACT) &&
6271            (mips_exception_flags & (FP_INVALID | FP_DIV0)) == 0) {
6272        mips_exception_flags = FP_INEXACT;
6273    }
6274
6275    cause = mips_exception_flags & enable; /* all current enabled exceptions */
6276
6277    if (cause == 0) {
6278        /*
6279         * No enabled exception, update the MSACSR Cause
6280         * with all current exceptions
6281         */
6282        SET_FP_CAUSE(env->active_tc.msacsr,
6283            (GET_FP_CAUSE(env->active_tc.msacsr) | mips_exception_flags));
6284    } else {
6285        /* Current exceptions are enabled */
6286        if ((env->active_tc.msacsr & MSACSR_NX_MASK) == 0) {
6287            /*
6288             * Exception(s) will trap, update MSACSR Cause
6289             * with all enabled exceptions
6290             */
6291            SET_FP_CAUSE(env->active_tc.msacsr,
6292                (GET_FP_CAUSE(env->active_tc.msacsr) | mips_exception_flags));
6293        }
6294    }
6295
6296    return mips_exception_flags;
6297}
6298
6299static inline int get_enabled_exceptions(const CPUMIPSState *env, int c)
6300{
6301    int enable = GET_FP_ENABLE(env->active_tc.msacsr) | FP_UNIMPLEMENTED;
6302    return c & enable;
6303}
6304
6305static inline float16 float16_from_float32(int32_t a, bool ieee,
6306                                           float_status *status)
6307{
6308      float16 f_val;
6309
6310      f_val = float32_to_float16((float32)a, ieee, status);
6311
6312      return a < 0 ? (f_val | (1 << 15)) : f_val;
6313}
6314
6315static inline float32 float32_from_float64(int64_t a, float_status *status)
6316{
6317      float32 f_val;
6318
6319      f_val = float64_to_float32((float64)a, status);
6320
6321      return a < 0 ? (f_val | (1 << 31)) : f_val;
6322}
6323
6324static inline float32 float32_from_float16(int16_t a, bool ieee,
6325                                           float_status *status)
6326{
6327      float32 f_val;
6328
6329      f_val = float16_to_float32((float16)a, ieee, status);
6330
6331      return a < 0 ? (f_val | (1 << 31)) : f_val;
6332}
6333
6334static inline float64 float64_from_float32(int32_t a, float_status *status)
6335{
6336      float64 f_val;
6337
6338      f_val = float32_to_float64((float64)a, status);
6339
6340      return a < 0 ? (f_val | (1ULL << 63)) : f_val;
6341}
6342
6343static inline float32 float32_from_q16(int16_t a, float_status *status)
6344{
6345    float32 f_val;
6346
6347    /* conversion as integer and scaling */
6348    f_val = int32_to_float32(a, status);
6349    f_val = float32_scalbn(f_val, -15, status);
6350
6351    return f_val;
6352}
6353
6354static inline float64 float64_from_q32(int32_t a, float_status *status)
6355{
6356    float64 f_val;
6357
6358    /* conversion as integer and scaling */
6359    f_val = int32_to_float64(a, status);
6360    f_val = float64_scalbn(f_val, -31, status);
6361
6362    return f_val;
6363}
6364
6365static inline int16_t float32_to_q16(float32 a, float_status *status)
6366{
6367    int32_t q_val;
6368    int32_t q_min = 0xffff8000;
6369    int32_t q_max = 0x00007fff;
6370
6371    int ieee_ex;
6372
6373    if (float32_is_any_nan(a)) {
6374        float_raise(float_flag_invalid, status);
6375        return 0;
6376    }
6377
6378    /* scaling */
6379    a = float32_scalbn(a, 15, status);
6380
6381    ieee_ex = get_float_exception_flags(status);
6382    set_float_exception_flags(ieee_ex & (~float_flag_underflow)
6383                             , status);
6384
6385    if (ieee_ex & float_flag_overflow) {
6386        float_raise(float_flag_inexact, status);
6387        return (int32_t)a < 0 ? q_min : q_max;
6388    }
6389
6390    /* conversion to int */
6391    q_val = float32_to_int32(a, status);
6392
6393    ieee_ex = get_float_exception_flags(status);
6394    set_float_exception_flags(ieee_ex & (~float_flag_underflow)
6395                             , status);
6396
6397    if (ieee_ex & float_flag_invalid) {
6398        set_float_exception_flags(ieee_ex & (~float_flag_invalid)
6399                               , status);
6400        float_raise(float_flag_overflow | float_flag_inexact, status);
6401        return (int32_t)a < 0 ? q_min : q_max;
6402    }
6403
6404    if (q_val < q_min) {
6405        float_raise(float_flag_overflow | float_flag_inexact, status);
6406        return (int16_t)q_min;
6407    }
6408
6409    if (q_max < q_val) {
6410        float_raise(float_flag_overflow | float_flag_inexact, status);
6411        return (int16_t)q_max;
6412    }
6413
6414    return (int16_t)q_val;
6415}
6416
6417static inline int32_t float64_to_q32(float64 a, float_status *status)
6418{
6419    int64_t q_val;
6420    int64_t q_min = 0xffffffff80000000LL;
6421    int64_t q_max = 0x000000007fffffffLL;
6422
6423    int ieee_ex;
6424
6425    if (float64_is_any_nan(a)) {
6426        float_raise(float_flag_invalid, status);
6427        return 0;
6428    }
6429
6430    /* scaling */
6431    a = float64_scalbn(a, 31, status);
6432
6433    ieee_ex = get_float_exception_flags(status);
6434    set_float_exception_flags(ieee_ex & (~float_flag_underflow)
6435           , status);
6436
6437    if (ieee_ex & float_flag_overflow) {
6438        float_raise(float_flag_inexact, status);
6439        return (int64_t)a < 0 ? q_min : q_max;
6440    }
6441
6442    /* conversion to integer */
6443    q_val = float64_to_int64(a, status);
6444
6445    ieee_ex = get_float_exception_flags(status);
6446    set_float_exception_flags(ieee_ex & (~float_flag_underflow)
6447           , status);
6448
6449    if (ieee_ex & float_flag_invalid) {
6450        set_float_exception_flags(ieee_ex & (~float_flag_invalid)
6451               , status);
6452        float_raise(float_flag_overflow | float_flag_inexact, status);
6453        return (int64_t)a < 0 ? q_min : q_max;
6454    }
6455
6456    if (q_val < q_min) {
6457        float_raise(float_flag_overflow | float_flag_inexact, status);
6458        return (int32_t)q_min;
6459    }
6460
6461    if (q_max < q_val) {
6462        float_raise(float_flag_overflow | float_flag_inexact, status);
6463        return (int32_t)q_max;
6464    }
6465
6466    return (int32_t)q_val;
6467}
6468
6469#define MSA_FLOAT_COND(DEST, OP, ARG1, ARG2, BITS, QUIET)                   \
6470    do {                                                                    \
6471        float_status *status = &env->active_tc.msa_fp_status;               \
6472        int c;                                                              \
6473        int64_t cond;                                                       \
6474        set_float_exception_flags(0, status);                               \
6475        if (!QUIET) {                                                       \
6476            cond = float ## BITS ## _ ## OP(ARG1, ARG2, status);            \
6477        } else {                                                            \
6478            cond = float ## BITS ## _ ## OP ## _quiet(ARG1, ARG2, status);  \
6479        }                                                                   \
6480        DEST = cond ? M_MAX_UINT(BITS) : 0;                                 \
6481        c = update_msacsr(env, CLEAR_IS_INEXACT, 0);                        \
6482                                                                            \
6483        if (get_enabled_exceptions(env, c)) {                               \
6484            DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
6485        }                                                                   \
6486    } while (0)
6487
6488#define MSA_FLOAT_AF(DEST, ARG1, ARG2, BITS, QUIET)                 \
6489    do {                                                            \
6490        MSA_FLOAT_COND(DEST, eq, ARG1, ARG2, BITS, QUIET);          \
6491        if ((DEST & M_MAX_UINT(BITS)) == M_MAX_UINT(BITS)) {        \
6492            DEST = 0;                                               \
6493        }                                                           \
6494    } while (0)
6495
6496#define MSA_FLOAT_UEQ(DEST, ARG1, ARG2, BITS, QUIET)                \
6497    do {                                                            \
6498        MSA_FLOAT_COND(DEST, unordered, ARG1, ARG2, BITS, QUIET);   \
6499        if (DEST == 0) {                                            \
6500            MSA_FLOAT_COND(DEST, eq, ARG1, ARG2, BITS, QUIET);      \
6501        }                                                           \
6502    } while (0)
6503
6504#define MSA_FLOAT_NE(DEST, ARG1, ARG2, BITS, QUIET)                 \
6505    do {                                                            \
6506        MSA_FLOAT_COND(DEST, lt, ARG1, ARG2, BITS, QUIET);          \
6507        if (DEST == 0) {                                            \
6508            MSA_FLOAT_COND(DEST, lt, ARG2, ARG1, BITS, QUIET);      \
6509        }                                                           \
6510    } while (0)
6511
6512#define MSA_FLOAT_UNE(DEST, ARG1, ARG2, BITS, QUIET)                \
6513    do {                                                            \
6514        MSA_FLOAT_COND(DEST, unordered, ARG1, ARG2, BITS, QUIET);   \
6515        if (DEST == 0) {                                            \
6516            MSA_FLOAT_COND(DEST, lt, ARG1, ARG2, BITS, QUIET);      \
6517            if (DEST == 0) {                                        \
6518                MSA_FLOAT_COND(DEST, lt, ARG2, ARG1, BITS, QUIET);  \
6519            }                                                       \
6520        }                                                           \
6521    } while (0)
6522
6523#define MSA_FLOAT_ULE(DEST, ARG1, ARG2, BITS, QUIET)                \
6524    do {                                                            \
6525        MSA_FLOAT_COND(DEST, unordered, ARG1, ARG2, BITS, QUIET);   \
6526        if (DEST == 0) {                                            \
6527            MSA_FLOAT_COND(DEST, le, ARG1, ARG2, BITS, QUIET);      \
6528        }                                                           \
6529    } while (0)
6530
6531#define MSA_FLOAT_ULT(DEST, ARG1, ARG2, BITS, QUIET)                \
6532    do {                                                            \
6533        MSA_FLOAT_COND(DEST, unordered, ARG1, ARG2, BITS, QUIET);   \
6534        if (DEST == 0) {                                            \
6535            MSA_FLOAT_COND(DEST, lt, ARG1, ARG2, BITS, QUIET);      \
6536        }                                                           \
6537    } while (0)
6538
6539#define MSA_FLOAT_OR(DEST, ARG1, ARG2, BITS, QUIET)                 \
6540    do {                                                            \
6541        MSA_FLOAT_COND(DEST, le, ARG1, ARG2, BITS, QUIET);          \
6542        if (DEST == 0) {                                            \
6543            MSA_FLOAT_COND(DEST, le, ARG2, ARG1, BITS, QUIET);      \
6544        }                                                           \
6545    } while (0)
6546
6547static inline void compare_af(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6548                              wr_t *pwt, uint32_t df, int quiet,
6549                              uintptr_t retaddr)
6550{
6551    wr_t wx, *pwx = &wx;
6552    uint32_t i;
6553
6554    clear_msacsr_cause(env);
6555
6556    switch (df) {
6557    case DF_WORD:
6558        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6559            MSA_FLOAT_AF(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6560        }
6561        break;
6562    case DF_DOUBLE:
6563        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6564            MSA_FLOAT_AF(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6565        }
6566        break;
6567    default:
6568        assert(0);
6569    }
6570
6571    check_msacsr_cause(env, retaddr);
6572
6573    msa_move_v(pwd, pwx);
6574}
6575
6576static inline void compare_un(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6577                              wr_t *pwt, uint32_t df, int quiet,
6578                              uintptr_t retaddr)
6579{
6580    wr_t wx, *pwx = &wx;
6581    uint32_t i;
6582
6583    clear_msacsr_cause(env);
6584
6585    switch (df) {
6586    case DF_WORD:
6587        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6588            MSA_FLOAT_COND(pwx->w[i], unordered, pws->w[i], pwt->w[i], 32,
6589                    quiet);
6590        }
6591        break;
6592    case DF_DOUBLE:
6593        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6594            MSA_FLOAT_COND(pwx->d[i], unordered, pws->d[i], pwt->d[i], 64,
6595                    quiet);
6596        }
6597        break;
6598    default:
6599        assert(0);
6600    }
6601
6602    check_msacsr_cause(env, retaddr);
6603
6604    msa_move_v(pwd, pwx);
6605}
6606
6607static inline void compare_eq(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6608                              wr_t *pwt, uint32_t df, int quiet,
6609                              uintptr_t retaddr)
6610{
6611    wr_t wx, *pwx = &wx;
6612    uint32_t i;
6613
6614    clear_msacsr_cause(env);
6615
6616    switch (df) {
6617    case DF_WORD:
6618        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6619            MSA_FLOAT_COND(pwx->w[i], eq, pws->w[i], pwt->w[i], 32, quiet);
6620        }
6621        break;
6622    case DF_DOUBLE:
6623        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6624            MSA_FLOAT_COND(pwx->d[i], eq, pws->d[i], pwt->d[i], 64, quiet);
6625        }
6626        break;
6627    default:
6628        assert(0);
6629    }
6630
6631    check_msacsr_cause(env, retaddr);
6632
6633    msa_move_v(pwd, pwx);
6634}
6635
6636static inline void compare_ueq(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6637                               wr_t *pwt, uint32_t df, int quiet,
6638                               uintptr_t retaddr)
6639{
6640    wr_t wx, *pwx = &wx;
6641    uint32_t i;
6642
6643    clear_msacsr_cause(env);
6644
6645    switch (df) {
6646    case DF_WORD:
6647        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6648            MSA_FLOAT_UEQ(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6649        }
6650        break;
6651    case DF_DOUBLE:
6652        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6653            MSA_FLOAT_UEQ(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6654        }
6655        break;
6656    default:
6657        assert(0);
6658    }
6659
6660    check_msacsr_cause(env, retaddr);
6661
6662    msa_move_v(pwd, pwx);
6663}
6664
6665static inline void compare_lt(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6666                              wr_t *pwt, uint32_t df, int quiet,
6667                              uintptr_t retaddr)
6668{
6669    wr_t wx, *pwx = &wx;
6670    uint32_t i;
6671
6672    clear_msacsr_cause(env);
6673
6674    switch (df) {
6675    case DF_WORD:
6676        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6677            MSA_FLOAT_COND(pwx->w[i], lt, pws->w[i], pwt->w[i], 32, quiet);
6678        }
6679        break;
6680    case DF_DOUBLE:
6681        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6682            MSA_FLOAT_COND(pwx->d[i], lt, pws->d[i], pwt->d[i], 64, quiet);
6683        }
6684        break;
6685    default:
6686        assert(0);
6687    }
6688
6689    check_msacsr_cause(env, retaddr);
6690
6691    msa_move_v(pwd, pwx);
6692}
6693
6694static inline void compare_ult(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6695                               wr_t *pwt, uint32_t df, int quiet,
6696                               uintptr_t retaddr)
6697{
6698    wr_t wx, *pwx = &wx;
6699    uint32_t i;
6700
6701    clear_msacsr_cause(env);
6702
6703    switch (df) {
6704    case DF_WORD:
6705        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6706            MSA_FLOAT_ULT(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6707        }
6708        break;
6709    case DF_DOUBLE:
6710        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6711            MSA_FLOAT_ULT(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6712        }
6713        break;
6714    default:
6715        assert(0);
6716    }
6717
6718    check_msacsr_cause(env, retaddr);
6719
6720    msa_move_v(pwd, pwx);
6721}
6722
6723static inline void compare_le(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6724                              wr_t *pwt, uint32_t df, int quiet,
6725                              uintptr_t retaddr)
6726{
6727    wr_t wx, *pwx = &wx;
6728    uint32_t i;
6729
6730    clear_msacsr_cause(env);
6731
6732    switch (df) {
6733    case DF_WORD:
6734        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6735            MSA_FLOAT_COND(pwx->w[i], le, pws->w[i], pwt->w[i], 32, quiet);
6736        }
6737        break;
6738    case DF_DOUBLE:
6739        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6740            MSA_FLOAT_COND(pwx->d[i], le, pws->d[i], pwt->d[i], 64, quiet);
6741        }
6742        break;
6743    default:
6744        assert(0);
6745    }
6746
6747    check_msacsr_cause(env, retaddr);
6748
6749    msa_move_v(pwd, pwx);
6750}
6751
6752static inline void compare_ule(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6753                               wr_t *pwt, uint32_t df, int quiet,
6754                               uintptr_t retaddr)
6755{
6756    wr_t wx, *pwx = &wx;
6757    uint32_t i;
6758
6759    clear_msacsr_cause(env);
6760
6761    switch (df) {
6762    case DF_WORD:
6763        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6764            MSA_FLOAT_ULE(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6765        }
6766        break;
6767    case DF_DOUBLE:
6768        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6769            MSA_FLOAT_ULE(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6770        }
6771        break;
6772    default:
6773        assert(0);
6774    }
6775
6776    check_msacsr_cause(env, retaddr);
6777
6778    msa_move_v(pwd, pwx);
6779}
6780
6781static inline void compare_or(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6782                              wr_t *pwt, uint32_t df, int quiet,
6783                              uintptr_t retaddr)
6784{
6785    wr_t wx, *pwx = &wx;
6786    uint32_t i;
6787
6788    clear_msacsr_cause(env);
6789
6790    switch (df) {
6791    case DF_WORD:
6792        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6793            MSA_FLOAT_OR(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6794        }
6795        break;
6796    case DF_DOUBLE:
6797        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6798            MSA_FLOAT_OR(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6799        }
6800        break;
6801    default:
6802        assert(0);
6803    }
6804
6805    check_msacsr_cause(env, retaddr);
6806
6807    msa_move_v(pwd, pwx);
6808}
6809
6810static inline void compare_une(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6811                               wr_t *pwt, uint32_t df, int quiet,
6812                               uintptr_t retaddr)
6813{
6814    wr_t wx, *pwx = &wx;
6815    uint32_t i;
6816
6817    clear_msacsr_cause(env);
6818
6819    switch (df) {
6820    case DF_WORD:
6821        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6822            MSA_FLOAT_UNE(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6823        }
6824        break;
6825    case DF_DOUBLE:
6826        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6827            MSA_FLOAT_UNE(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6828        }
6829        break;
6830    default:
6831        assert(0);
6832    }
6833
6834    check_msacsr_cause(env, retaddr);
6835
6836    msa_move_v(pwd, pwx);
6837}
6838
6839static inline void compare_ne(CPUMIPSState *env, wr_t *pwd, wr_t *pws,
6840                              wr_t *pwt, uint32_t df, int quiet,
6841                              uintptr_t retaddr)
6842{
6843    wr_t wx, *pwx = &wx;
6844    uint32_t i;
6845
6846    clear_msacsr_cause(env);
6847
6848    switch (df) {
6849    case DF_WORD:
6850        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
6851            MSA_FLOAT_NE(pwx->w[i], pws->w[i], pwt->w[i], 32, quiet);
6852        }
6853        break;
6854    case DF_DOUBLE:
6855        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
6856            MSA_FLOAT_NE(pwx->d[i], pws->d[i], pwt->d[i], 64, quiet);
6857        }
6858        break;
6859    default:
6860        assert(0);
6861    }
6862
6863    check_msacsr_cause(env, retaddr);
6864
6865    msa_move_v(pwd, pwx);
6866}
6867
6868void helper_msa_fcaf_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6869                        uint32_t ws, uint32_t wt)
6870{
6871    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6872    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6873    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6874    compare_af(env, pwd, pws, pwt, df, 1, GETPC());
6875}
6876
6877void helper_msa_fcun_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6878                        uint32_t ws, uint32_t wt)
6879{
6880    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6881    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6882    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6883    compare_un(env, pwd, pws, pwt, df, 1, GETPC());
6884}
6885
6886void helper_msa_fceq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6887                        uint32_t ws, uint32_t wt)
6888{
6889    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6890    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6891    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6892    compare_eq(env, pwd, pws, pwt, df, 1, GETPC());
6893}
6894
6895void helper_msa_fcueq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6896                         uint32_t ws, uint32_t wt)
6897{
6898    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6899    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6900    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6901    compare_ueq(env, pwd, pws, pwt, df, 1, GETPC());
6902}
6903
6904void helper_msa_fclt_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6905                        uint32_t ws, uint32_t wt)
6906{
6907    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6908    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6909    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6910    compare_lt(env, pwd, pws, pwt, df, 1, GETPC());
6911}
6912
6913void helper_msa_fcult_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6914                         uint32_t ws, uint32_t wt)
6915{
6916    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6917    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6918    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6919    compare_ult(env, pwd, pws, pwt, df, 1, GETPC());
6920}
6921
6922void helper_msa_fcle_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6923                        uint32_t ws, uint32_t wt)
6924{
6925    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6926    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6927    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6928    compare_le(env, pwd, pws, pwt, df, 1, GETPC());
6929}
6930
6931void helper_msa_fcule_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6932                         uint32_t ws, uint32_t wt)
6933{
6934    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6935    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6936    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6937    compare_ule(env, pwd, pws, pwt, df, 1, GETPC());
6938}
6939
6940void helper_msa_fsaf_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6941                        uint32_t ws, uint32_t wt)
6942{
6943    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6944    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6945    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6946    compare_af(env, pwd, pws, pwt, df, 0, GETPC());
6947}
6948
6949void helper_msa_fsun_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6950                        uint32_t ws, uint32_t wt)
6951{
6952    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6953    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6954    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6955    compare_un(env, pwd, pws, pwt, df, 0, GETPC());
6956}
6957
6958void helper_msa_fseq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6959                        uint32_t ws, uint32_t wt)
6960{
6961    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6962    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6963    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6964    compare_eq(env, pwd, pws, pwt, df, 0, GETPC());
6965}
6966
6967void helper_msa_fsueq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6968                         uint32_t ws, uint32_t wt)
6969{
6970    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6971    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6972    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6973    compare_ueq(env, pwd, pws, pwt, df, 0, GETPC());
6974}
6975
6976void helper_msa_fslt_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6977                        uint32_t ws, uint32_t wt)
6978{
6979    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6980    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6981    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6982    compare_lt(env, pwd, pws, pwt, df, 0, GETPC());
6983}
6984
6985void helper_msa_fsult_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6986                         uint32_t ws, uint32_t wt)
6987{
6988    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6989    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6990    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
6991    compare_ult(env, pwd, pws, pwt, df, 0, GETPC());
6992}
6993
6994void helper_msa_fsle_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
6995                        uint32_t ws, uint32_t wt)
6996{
6997    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
6998    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
6999    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7000    compare_le(env, pwd, pws, pwt, df, 0, GETPC());
7001}
7002
7003void helper_msa_fsule_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7004                         uint32_t ws, uint32_t wt)
7005{
7006    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7007    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7008    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7009    compare_ule(env, pwd, pws, pwt, df, 0, GETPC());
7010}
7011
7012void helper_msa_fcor_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7013                        uint32_t ws, uint32_t wt)
7014{
7015    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7016    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7017    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7018    compare_or(env, pwd, pws, pwt, df, 1, GETPC());
7019}
7020
7021void helper_msa_fcune_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7022                         uint32_t ws, uint32_t wt)
7023{
7024    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7025    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7026    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7027    compare_une(env, pwd, pws, pwt, df, 1, GETPC());
7028}
7029
7030void helper_msa_fcne_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7031                        uint32_t ws, uint32_t wt)
7032{
7033    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7034    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7035    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7036    compare_ne(env, pwd, pws, pwt, df, 1, GETPC());
7037}
7038
7039void helper_msa_fsor_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7040                        uint32_t ws, uint32_t wt)
7041{
7042    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7043    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7044    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7045    compare_or(env, pwd, pws, pwt, df, 0, GETPC());
7046}
7047
7048void helper_msa_fsune_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7049                         uint32_t ws, uint32_t wt)
7050{
7051    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7052    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7053    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7054    compare_une(env, pwd, pws, pwt, df, 0, GETPC());
7055}
7056
7057void helper_msa_fsne_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7058                        uint32_t ws, uint32_t wt)
7059{
7060    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7061    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7062    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7063    compare_ne(env, pwd, pws, pwt, df, 0, GETPC());
7064}
7065
7066#define float16_is_zero(ARG) 0
7067#define float16_is_zero_or_denormal(ARG) 0
7068
7069#define IS_DENORMAL(ARG, BITS)                      \
7070    (!float ## BITS ## _is_zero(ARG)                \
7071    && float ## BITS ## _is_zero_or_denormal(ARG))
7072
7073#define MSA_FLOAT_BINOP(DEST, OP, ARG1, ARG2, BITS)                         \
7074    do {                                                                    \
7075        float_status *status = &env->active_tc.msa_fp_status;               \
7076        int c;                                                              \
7077                                                                            \
7078        set_float_exception_flags(0, status);                               \
7079        DEST = float ## BITS ## _ ## OP(ARG1, ARG2, status);                \
7080        c = update_msacsr(env, 0, IS_DENORMAL(DEST, BITS));                 \
7081                                                                            \
7082        if (get_enabled_exceptions(env, c)) {                               \
7083            DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
7084        }                                                                   \
7085    } while (0)
7086
7087void helper_msa_fadd_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7088        uint32_t ws, uint32_t wt)
7089{
7090    wr_t wx, *pwx = &wx;
7091    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7092    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7093    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7094    uint32_t i;
7095
7096    clear_msacsr_cause(env);
7097
7098    switch (df) {
7099    case DF_WORD:
7100        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7101            MSA_FLOAT_BINOP(pwx->w[i], add, pws->w[i], pwt->w[i], 32);
7102        }
7103        break;
7104    case DF_DOUBLE:
7105        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7106            MSA_FLOAT_BINOP(pwx->d[i], add, pws->d[i], pwt->d[i], 64);
7107        }
7108        break;
7109    default:
7110        assert(0);
7111    }
7112
7113    check_msacsr_cause(env, GETPC());
7114    msa_move_v(pwd, pwx);
7115}
7116
7117void helper_msa_fsub_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7118        uint32_t ws, uint32_t wt)
7119{
7120    wr_t wx, *pwx = &wx;
7121    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7122    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7123    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7124    uint32_t i;
7125
7126    clear_msacsr_cause(env);
7127
7128    switch (df) {
7129    case DF_WORD:
7130        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7131            MSA_FLOAT_BINOP(pwx->w[i], sub, pws->w[i], pwt->w[i], 32);
7132        }
7133        break;
7134    case DF_DOUBLE:
7135        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7136            MSA_FLOAT_BINOP(pwx->d[i], sub, pws->d[i], pwt->d[i], 64);
7137        }
7138        break;
7139    default:
7140        assert(0);
7141    }
7142
7143    check_msacsr_cause(env, GETPC());
7144    msa_move_v(pwd, pwx);
7145}
7146
7147void helper_msa_fmul_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7148        uint32_t ws, uint32_t wt)
7149{
7150    wr_t wx, *pwx = &wx;
7151    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7152    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7153    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7154    uint32_t i;
7155
7156    clear_msacsr_cause(env);
7157
7158    switch (df) {
7159    case DF_WORD:
7160        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7161            MSA_FLOAT_BINOP(pwx->w[i], mul, pws->w[i], pwt->w[i], 32);
7162        }
7163        break;
7164    case DF_DOUBLE:
7165        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7166            MSA_FLOAT_BINOP(pwx->d[i], mul, pws->d[i], pwt->d[i], 64);
7167        }
7168        break;
7169    default:
7170        assert(0);
7171    }
7172
7173    check_msacsr_cause(env, GETPC());
7174
7175    msa_move_v(pwd, pwx);
7176}
7177
7178void helper_msa_fdiv_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7179        uint32_t ws, uint32_t wt)
7180{
7181    wr_t wx, *pwx = &wx;
7182    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7183    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7184    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7185    uint32_t i;
7186
7187    clear_msacsr_cause(env);
7188
7189    switch (df) {
7190    case DF_WORD:
7191        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7192            MSA_FLOAT_BINOP(pwx->w[i], div, pws->w[i], pwt->w[i], 32);
7193        }
7194        break;
7195    case DF_DOUBLE:
7196        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7197            MSA_FLOAT_BINOP(pwx->d[i], div, pws->d[i], pwt->d[i], 64);
7198        }
7199        break;
7200    default:
7201        assert(0);
7202    }
7203
7204    check_msacsr_cause(env, GETPC());
7205
7206    msa_move_v(pwd, pwx);
7207}
7208
7209#define MSA_FLOAT_MULADD(DEST, ARG1, ARG2, ARG3, NEGATE, BITS)              \
7210    do {                                                                    \
7211        float_status *status = &env->active_tc.msa_fp_status;               \
7212        int c;                                                              \
7213                                                                            \
7214        set_float_exception_flags(0, status);                               \
7215        DEST = float ## BITS ## _muladd(ARG2, ARG3, ARG1, NEGATE, status);  \
7216        c = update_msacsr(env, 0, IS_DENORMAL(DEST, BITS));                 \
7217                                                                            \
7218        if (get_enabled_exceptions(env, c)) {                               \
7219            DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
7220        }                                                                   \
7221    } while (0)
7222
7223void helper_msa_fmadd_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7224        uint32_t ws, uint32_t wt)
7225{
7226    wr_t wx, *pwx = &wx;
7227    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7228    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7229    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7230    uint32_t i;
7231
7232    clear_msacsr_cause(env);
7233
7234    switch (df) {
7235    case DF_WORD:
7236        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7237            MSA_FLOAT_MULADD(pwx->w[i], pwd->w[i],
7238                           pws->w[i], pwt->w[i], 0, 32);
7239        }
7240        break;
7241    case DF_DOUBLE:
7242        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7243            MSA_FLOAT_MULADD(pwx->d[i], pwd->d[i],
7244                           pws->d[i], pwt->d[i], 0, 64);
7245        }
7246        break;
7247    default:
7248        assert(0);
7249    }
7250
7251    check_msacsr_cause(env, GETPC());
7252
7253    msa_move_v(pwd, pwx);
7254}
7255
7256void helper_msa_fmsub_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7257        uint32_t ws, uint32_t wt)
7258{
7259    wr_t wx, *pwx = &wx;
7260    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7261    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7262    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7263    uint32_t i;
7264
7265    clear_msacsr_cause(env);
7266
7267    switch (df) {
7268    case DF_WORD:
7269        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7270            MSA_FLOAT_MULADD(pwx->w[i], pwd->w[i],
7271                           pws->w[i], pwt->w[i],
7272                           float_muladd_negate_product, 32);
7273      }
7274      break;
7275    case DF_DOUBLE:
7276        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7277            MSA_FLOAT_MULADD(pwx->d[i], pwd->d[i],
7278                           pws->d[i], pwt->d[i],
7279                           float_muladd_negate_product, 64);
7280        }
7281        break;
7282    default:
7283        assert(0);
7284    }
7285
7286    check_msacsr_cause(env, GETPC());
7287
7288    msa_move_v(pwd, pwx);
7289}
7290
7291void helper_msa_fexp2_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7292        uint32_t ws, uint32_t wt)
7293{
7294    wr_t wx, *pwx = &wx;
7295    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7296    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7297    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7298    uint32_t i;
7299
7300    clear_msacsr_cause(env);
7301
7302    switch (df) {
7303    case DF_WORD:
7304        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7305            MSA_FLOAT_BINOP(pwx->w[i], scalbn, pws->w[i],
7306                            pwt->w[i] >  0x200 ?  0x200 :
7307                            pwt->w[i] < -0x200 ? -0x200 : pwt->w[i],
7308                            32);
7309        }
7310        break;
7311    case DF_DOUBLE:
7312        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7313            MSA_FLOAT_BINOP(pwx->d[i], scalbn, pws->d[i],
7314                            pwt->d[i] >  0x1000 ?  0x1000 :
7315                            pwt->d[i] < -0x1000 ? -0x1000 : pwt->d[i],
7316                            64);
7317        }
7318        break;
7319    default:
7320        assert(0);
7321    }
7322
7323    check_msacsr_cause(env, GETPC());
7324
7325    msa_move_v(pwd, pwx);
7326}
7327
7328#define MSA_FLOAT_UNOP(DEST, OP, ARG, BITS)                                 \
7329    do {                                                                    \
7330        float_status *status = &env->active_tc.msa_fp_status;               \
7331        int c;                                                              \
7332                                                                            \
7333        set_float_exception_flags(0, status);                               \
7334        DEST = float ## BITS ## _ ## OP(ARG, status);                       \
7335        c = update_msacsr(env, 0, IS_DENORMAL(DEST, BITS));                 \
7336                                                                            \
7337        if (get_enabled_exceptions(env, c)) {                               \
7338            DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
7339        }                                                                   \
7340    } while (0)
7341
7342void helper_msa_fexdo_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7343                         uint32_t ws, uint32_t wt)
7344{
7345    wr_t wx, *pwx = &wx;
7346    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7347    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7348    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7349    uint32_t i;
7350
7351    clear_msacsr_cause(env);
7352
7353    switch (df) {
7354    case DF_WORD:
7355        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7356            /*
7357             * Half precision floats come in two formats: standard
7358             * IEEE and "ARM" format.  The latter gains extra exponent
7359             * range by omitting the NaN/Inf encodings.
7360             */
7361            bool ieee = true;
7362
7363            MSA_FLOAT_BINOP(Lh(pwx, i), from_float32, pws->w[i], ieee, 16);
7364            MSA_FLOAT_BINOP(Rh(pwx, i), from_float32, pwt->w[i], ieee, 16);
7365        }
7366        break;
7367    case DF_DOUBLE:
7368        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7369            MSA_FLOAT_UNOP(Lw(pwx, i), from_float64, pws->d[i], 32);
7370            MSA_FLOAT_UNOP(Rw(pwx, i), from_float64, pwt->d[i], 32);
7371        }
7372        break;
7373    default:
7374        assert(0);
7375    }
7376
7377    check_msacsr_cause(env, GETPC());
7378    msa_move_v(pwd, pwx);
7379}
7380
7381#define MSA_FLOAT_UNOP_XD(DEST, OP, ARG, BITS, XBITS)                       \
7382    do {                                                                    \
7383        float_status *status = &env->active_tc.msa_fp_status;               \
7384        int c;                                                              \
7385                                                                            \
7386        set_float_exception_flags(0, status);                               \
7387        DEST = float ## BITS ## _ ## OP(ARG, status);                       \
7388        c = update_msacsr(env, CLEAR_FS_UNDERFLOW, 0);                      \
7389                                                                            \
7390        if (get_enabled_exceptions(env, c)) {                               \
7391            DEST = ((FLOAT_SNAN ## XBITS(status) >> 6) << 6) | c;           \
7392        }                                                                   \
7393    } while (0)
7394
7395void helper_msa_ftq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7396                       uint32_t ws, uint32_t wt)
7397{
7398    wr_t wx, *pwx = &wx;
7399    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7400    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7401    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7402    uint32_t i;
7403
7404    clear_msacsr_cause(env);
7405
7406    switch (df) {
7407    case DF_WORD:
7408        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7409            MSA_FLOAT_UNOP_XD(Lh(pwx, i), to_q16, pws->w[i], 32, 16);
7410            MSA_FLOAT_UNOP_XD(Rh(pwx, i), to_q16, pwt->w[i], 32, 16);
7411        }
7412        break;
7413    case DF_DOUBLE:
7414        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7415            MSA_FLOAT_UNOP_XD(Lw(pwx, i), to_q32, pws->d[i], 64, 32);
7416            MSA_FLOAT_UNOP_XD(Rw(pwx, i), to_q32, pwt->d[i], 64, 32);
7417        }
7418        break;
7419    default:
7420        assert(0);
7421    }
7422
7423    check_msacsr_cause(env, GETPC());
7424
7425    msa_move_v(pwd, pwx);
7426}
7427
7428#define NUMBER_QNAN_PAIR(ARG1, ARG2, BITS, STATUS)      \
7429    !float ## BITS ## _is_any_nan(ARG1)                 \
7430    && float ## BITS ## _is_quiet_nan(ARG2, STATUS)
7431
7432#define MSA_FLOAT_MAXOP(DEST, OP, ARG1, ARG2, BITS)                         \
7433    do {                                                                    \
7434        float_status *status = &env->active_tc.msa_fp_status;               \
7435        int c;                                                              \
7436                                                                            \
7437        set_float_exception_flags(0, status);                               \
7438        DEST = float ## BITS ## _ ## OP(ARG1, ARG2, status);                \
7439        c = update_msacsr(env, 0, 0);                                       \
7440                                                                            \
7441        if (get_enabled_exceptions(env, c)) {                               \
7442            DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
7443        }                                                                   \
7444    } while (0)
7445
7446#define FMAXMIN_A(F, G, X, _S, _T, BITS, STATUS)                    \
7447    do {                                                            \
7448        uint## BITS ##_t S = _S, T = _T;                            \
7449        uint## BITS ##_t as, at, xs, xt, xd;                        \
7450        if (NUMBER_QNAN_PAIR(S, T, BITS, STATUS)) {                 \
7451            T = S;                                                  \
7452        }                                                           \
7453        else if (NUMBER_QNAN_PAIR(T, S, BITS, STATUS)) {            \
7454            S = T;                                                  \
7455        }                                                           \
7456        as = float## BITS ##_abs(S);                                \
7457        at = float## BITS ##_abs(T);                                \
7458        MSA_FLOAT_MAXOP(xs, F,  S,  T, BITS);                       \
7459        MSA_FLOAT_MAXOP(xt, G,  S,  T, BITS);                       \
7460        MSA_FLOAT_MAXOP(xd, F, as, at, BITS);                       \
7461        X = (as == at || xd == float## BITS ##_abs(xs)) ? xs : xt;  \
7462    } while (0)
7463
7464void helper_msa_fmin_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7465        uint32_t ws, uint32_t wt)
7466{
7467    float_status *status = &env->active_tc.msa_fp_status;
7468    wr_t wx, *pwx = &wx;
7469    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7470    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7471    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7472
7473    clear_msacsr_cause(env);
7474
7475    if (df == DF_WORD) {
7476
7477        if (NUMBER_QNAN_PAIR(pws->w[0], pwt->w[0], 32, status)) {
7478            MSA_FLOAT_MAXOP(pwx->w[0], min, pws->w[0], pws->w[0], 32);
7479        } else if (NUMBER_QNAN_PAIR(pwt->w[0], pws->w[0], 32, status)) {
7480            MSA_FLOAT_MAXOP(pwx->w[0], min, pwt->w[0], pwt->w[0], 32);
7481        } else {
7482            MSA_FLOAT_MAXOP(pwx->w[0], min, pws->w[0], pwt->w[0], 32);
7483        }
7484
7485        if (NUMBER_QNAN_PAIR(pws->w[1], pwt->w[1], 32, status)) {
7486            MSA_FLOAT_MAXOP(pwx->w[1], min, pws->w[1], pws->w[1], 32);
7487        } else if (NUMBER_QNAN_PAIR(pwt->w[1], pws->w[1], 32, status)) {
7488            MSA_FLOAT_MAXOP(pwx->w[1], min, pwt->w[1], pwt->w[1], 32);
7489        } else {
7490            MSA_FLOAT_MAXOP(pwx->w[1], min, pws->w[1], pwt->w[1], 32);
7491        }
7492
7493        if (NUMBER_QNAN_PAIR(pws->w[2], pwt->w[2], 32, status)) {
7494            MSA_FLOAT_MAXOP(pwx->w[2], min, pws->w[2], pws->w[2], 32);
7495        } else if (NUMBER_QNAN_PAIR(pwt->w[2], pws->w[2], 32, status)) {
7496            MSA_FLOAT_MAXOP(pwx->w[2], min, pwt->w[2], pwt->w[2], 32);
7497        } else {
7498            MSA_FLOAT_MAXOP(pwx->w[2], min, pws->w[2], pwt->w[2], 32);
7499        }
7500
7501        if (NUMBER_QNAN_PAIR(pws->w[3], pwt->w[3], 32, status)) {
7502            MSA_FLOAT_MAXOP(pwx->w[3], min, pws->w[3], pws->w[3], 32);
7503        } else if (NUMBER_QNAN_PAIR(pwt->w[3], pws->w[3], 32, status)) {
7504            MSA_FLOAT_MAXOP(pwx->w[3], min, pwt->w[3], pwt->w[3], 32);
7505        } else {
7506            MSA_FLOAT_MAXOP(pwx->w[3], min, pws->w[3], pwt->w[3], 32);
7507        }
7508
7509    } else if (df == DF_DOUBLE) {
7510
7511        if (NUMBER_QNAN_PAIR(pws->d[0], pwt->d[0], 64, status)) {
7512            MSA_FLOAT_MAXOP(pwx->d[0], min, pws->d[0], pws->d[0], 64);
7513        } else if (NUMBER_QNAN_PAIR(pwt->d[0], pws->d[0], 64, status)) {
7514            MSA_FLOAT_MAXOP(pwx->d[0], min, pwt->d[0], pwt->d[0], 64);
7515        } else {
7516            MSA_FLOAT_MAXOP(pwx->d[0], min, pws->d[0], pwt->d[0], 64);
7517        }
7518
7519        if (NUMBER_QNAN_PAIR(pws->d[1], pwt->d[1], 64, status)) {
7520            MSA_FLOAT_MAXOP(pwx->d[1], min, pws->d[1], pws->d[1], 64);
7521        } else if (NUMBER_QNAN_PAIR(pwt->d[1], pws->d[1], 64, status)) {
7522            MSA_FLOAT_MAXOP(pwx->d[1], min, pwt->d[1], pwt->d[1], 64);
7523        } else {
7524            MSA_FLOAT_MAXOP(pwx->d[1], min, pws->d[1], pwt->d[1], 64);
7525        }
7526
7527    } else {
7528
7529        assert(0);
7530
7531    }
7532
7533    check_msacsr_cause(env, GETPC());
7534
7535    msa_move_v(pwd, pwx);
7536}
7537
7538void helper_msa_fmin_a_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7539        uint32_t ws, uint32_t wt)
7540{
7541    float_status *status = &env->active_tc.msa_fp_status;
7542    wr_t wx, *pwx = &wx;
7543    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7544    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7545    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7546
7547    clear_msacsr_cause(env);
7548
7549    if (df == DF_WORD) {
7550        FMAXMIN_A(min, max, pwx->w[0], pws->w[0], pwt->w[0], 32, status);
7551        FMAXMIN_A(min, max, pwx->w[1], pws->w[1], pwt->w[1], 32, status);
7552        FMAXMIN_A(min, max, pwx->w[2], pws->w[2], pwt->w[2], 32, status);
7553        FMAXMIN_A(min, max, pwx->w[3], pws->w[3], pwt->w[3], 32, status);
7554    } else if (df == DF_DOUBLE) {
7555        FMAXMIN_A(min, max, pwx->d[0], pws->d[0], pwt->d[0], 64, status);
7556        FMAXMIN_A(min, max, pwx->d[1], pws->d[1], pwt->d[1], 64, status);
7557    } else {
7558        assert(0);
7559    }
7560
7561    check_msacsr_cause(env, GETPC());
7562
7563    msa_move_v(pwd, pwx);
7564}
7565
7566void helper_msa_fmax_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7567        uint32_t ws, uint32_t wt)
7568{
7569     float_status *status = &env->active_tc.msa_fp_status;
7570    wr_t wx, *pwx = &wx;
7571    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7572    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7573    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7574
7575    clear_msacsr_cause(env);
7576
7577    if (df == DF_WORD) {
7578
7579        if (NUMBER_QNAN_PAIR(pws->w[0], pwt->w[0], 32, status)) {
7580            MSA_FLOAT_MAXOP(pwx->w[0], max, pws->w[0], pws->w[0], 32);
7581        } else if (NUMBER_QNAN_PAIR(pwt->w[0], pws->w[0], 32, status)) {
7582            MSA_FLOAT_MAXOP(pwx->w[0], max, pwt->w[0], pwt->w[0], 32);
7583        } else {
7584            MSA_FLOAT_MAXOP(pwx->w[0], max, pws->w[0], pwt->w[0], 32);
7585        }
7586
7587        if (NUMBER_QNAN_PAIR(pws->w[1], pwt->w[1], 32, status)) {
7588            MSA_FLOAT_MAXOP(pwx->w[1], max, pws->w[1], pws->w[1], 32);
7589        } else if (NUMBER_QNAN_PAIR(pwt->w[1], pws->w[1], 32, status)) {
7590            MSA_FLOAT_MAXOP(pwx->w[1], max, pwt->w[1], pwt->w[1], 32);
7591        } else {
7592            MSA_FLOAT_MAXOP(pwx->w[1], max, pws->w[1], pwt->w[1], 32);
7593        }
7594
7595        if (NUMBER_QNAN_PAIR(pws->w[2], pwt->w[2], 32, status)) {
7596            MSA_FLOAT_MAXOP(pwx->w[2], max, pws->w[2], pws->w[2], 32);
7597        } else if (NUMBER_QNAN_PAIR(pwt->w[2], pws->w[2], 32, status)) {
7598            MSA_FLOAT_MAXOP(pwx->w[2], max, pwt->w[2], pwt->w[2], 32);
7599        } else {
7600            MSA_FLOAT_MAXOP(pwx->w[2], max, pws->w[2], pwt->w[2], 32);
7601        }
7602
7603        if (NUMBER_QNAN_PAIR(pws->w[3], pwt->w[3], 32, status)) {
7604            MSA_FLOAT_MAXOP(pwx->w[3], max, pws->w[3], pws->w[3], 32);
7605        } else if (NUMBER_QNAN_PAIR(pwt->w[3], pws->w[3], 32, status)) {
7606            MSA_FLOAT_MAXOP(pwx->w[3], max, pwt->w[3], pwt->w[3], 32);
7607        } else {
7608            MSA_FLOAT_MAXOP(pwx->w[3], max, pws->w[3], pwt->w[3], 32);
7609        }
7610
7611    } else if (df == DF_DOUBLE) {
7612
7613        if (NUMBER_QNAN_PAIR(pws->d[0], pwt->d[0], 64, status)) {
7614            MSA_FLOAT_MAXOP(pwx->d[0], max, pws->d[0], pws->d[0], 64);
7615        } else if (NUMBER_QNAN_PAIR(pwt->d[0], pws->d[0], 64, status)) {
7616            MSA_FLOAT_MAXOP(pwx->d[0], max, pwt->d[0], pwt->d[0], 64);
7617        } else {
7618            MSA_FLOAT_MAXOP(pwx->d[0], max, pws->d[0], pwt->d[0], 64);
7619        }
7620
7621        if (NUMBER_QNAN_PAIR(pws->d[1], pwt->d[1], 64, status)) {
7622            MSA_FLOAT_MAXOP(pwx->d[1], max, pws->d[1], pws->d[1], 64);
7623        } else if (NUMBER_QNAN_PAIR(pwt->d[1], pws->d[1], 64, status)) {
7624            MSA_FLOAT_MAXOP(pwx->d[1], max, pwt->d[1], pwt->d[1], 64);
7625        } else {
7626            MSA_FLOAT_MAXOP(pwx->d[1], max, pws->d[1], pwt->d[1], 64);
7627        }
7628
7629    } else {
7630
7631        assert(0);
7632
7633    }
7634
7635    check_msacsr_cause(env, GETPC());
7636
7637    msa_move_v(pwd, pwx);
7638}
7639
7640void helper_msa_fmax_a_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7641        uint32_t ws, uint32_t wt)
7642{
7643    float_status *status = &env->active_tc.msa_fp_status;
7644    wr_t wx, *pwx = &wx;
7645    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7646    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7647    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
7648
7649    clear_msacsr_cause(env);
7650
7651    if (df == DF_WORD) {
7652        FMAXMIN_A(max, min, pwx->w[0], pws->w[0], pwt->w[0], 32, status);
7653        FMAXMIN_A(max, min, pwx->w[1], pws->w[1], pwt->w[1], 32, status);
7654        FMAXMIN_A(max, min, pwx->w[2], pws->w[2], pwt->w[2], 32, status);
7655        FMAXMIN_A(max, min, pwx->w[3], pws->w[3], pwt->w[3], 32, status);
7656    } else if (df == DF_DOUBLE) {
7657        FMAXMIN_A(max, min, pwx->d[0], pws->d[0], pwt->d[0], 64, status);
7658        FMAXMIN_A(max, min, pwx->d[1], pws->d[1], pwt->d[1], 64, status);
7659    } else {
7660        assert(0);
7661    }
7662
7663    check_msacsr_cause(env, GETPC());
7664
7665    msa_move_v(pwd, pwx);
7666}
7667
7668void helper_msa_fclass_df(CPUMIPSState *env, uint32_t df,
7669        uint32_t wd, uint32_t ws)
7670{
7671    float_status *status = &env->active_tc.msa_fp_status;
7672
7673    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7674    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7675    if (df == DF_WORD) {
7676        pwd->w[0] = float_class_s(pws->w[0], status);
7677        pwd->w[1] = float_class_s(pws->w[1], status);
7678        pwd->w[2] = float_class_s(pws->w[2], status);
7679        pwd->w[3] = float_class_s(pws->w[3], status);
7680    } else if (df == DF_DOUBLE) {
7681        pwd->d[0] = float_class_d(pws->d[0], status);
7682        pwd->d[1] = float_class_d(pws->d[1], status);
7683    } else {
7684        assert(0);
7685    }
7686}
7687
7688#define MSA_FLOAT_UNOP0(DEST, OP, ARG, BITS)                                \
7689    do {                                                                    \
7690        float_status *status = &env->active_tc.msa_fp_status;               \
7691        int c;                                                              \
7692                                                                            \
7693        set_float_exception_flags(0, status);                               \
7694        DEST = float ## BITS ## _ ## OP(ARG, status);                       \
7695        c = update_msacsr(env, CLEAR_FS_UNDERFLOW, 0);                      \
7696                                                                            \
7697        if (get_enabled_exceptions(env, c)) {                               \
7698            DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
7699        } else if (float ## BITS ## _is_any_nan(ARG)) {                     \
7700            DEST = 0;                                                       \
7701        }                                                                   \
7702    } while (0)
7703
7704void helper_msa_ftrunc_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7705                            uint32_t ws)
7706{
7707    wr_t wx, *pwx = &wx;
7708    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7709    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7710    uint32_t i;
7711
7712    clear_msacsr_cause(env);
7713
7714    switch (df) {
7715    case DF_WORD:
7716        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7717            MSA_FLOAT_UNOP0(pwx->w[i], to_int32_round_to_zero, pws->w[i], 32);
7718        }
7719        break;
7720    case DF_DOUBLE:
7721        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7722            MSA_FLOAT_UNOP0(pwx->d[i], to_int64_round_to_zero, pws->d[i], 64);
7723        }
7724        break;
7725    default:
7726        assert(0);
7727    }
7728
7729    check_msacsr_cause(env, GETPC());
7730
7731    msa_move_v(pwd, pwx);
7732}
7733
7734void helper_msa_ftrunc_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7735                            uint32_t ws)
7736{
7737    wr_t wx, *pwx = &wx;
7738    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7739    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7740    uint32_t i;
7741
7742    clear_msacsr_cause(env);
7743
7744    switch (df) {
7745    case DF_WORD:
7746        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7747            MSA_FLOAT_UNOP0(pwx->w[i], to_uint32_round_to_zero, pws->w[i], 32);
7748        }
7749        break;
7750    case DF_DOUBLE:
7751        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7752            MSA_FLOAT_UNOP0(pwx->d[i], to_uint64_round_to_zero, pws->d[i], 64);
7753        }
7754        break;
7755    default:
7756        assert(0);
7757    }
7758
7759    check_msacsr_cause(env, GETPC());
7760
7761    msa_move_v(pwd, pwx);
7762}
7763
7764void helper_msa_fsqrt_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7765                         uint32_t ws)
7766{
7767    wr_t wx, *pwx = &wx;
7768    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7769    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7770    uint32_t i;
7771
7772    clear_msacsr_cause(env);
7773
7774    switch (df) {
7775    case DF_WORD:
7776        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7777            MSA_FLOAT_UNOP(pwx->w[i], sqrt, pws->w[i], 32);
7778        }
7779        break;
7780    case DF_DOUBLE:
7781        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7782            MSA_FLOAT_UNOP(pwx->d[i], sqrt, pws->d[i], 64);
7783        }
7784        break;
7785    default:
7786        assert(0);
7787    }
7788
7789    check_msacsr_cause(env, GETPC());
7790
7791    msa_move_v(pwd, pwx);
7792}
7793
7794#define MSA_FLOAT_RECIPROCAL(DEST, ARG, BITS)                               \
7795    do {                                                                    \
7796        float_status *status = &env->active_tc.msa_fp_status;               \
7797        int c;                                                              \
7798                                                                            \
7799        set_float_exception_flags(0, status);                               \
7800        DEST = float ## BITS ## _ ## div(FLOAT_ONE ## BITS, ARG, status);   \
7801        c = update_msacsr(env, float ## BITS ## _is_infinity(ARG) ||        \
7802                          float ## BITS ## _is_quiet_nan(DEST, status) ?    \
7803                          0 : RECIPROCAL_INEXACT,                           \
7804                          IS_DENORMAL(DEST, BITS));                         \
7805                                                                            \
7806        if (get_enabled_exceptions(env, c)) {                               \
7807            DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
7808        }                                                                   \
7809    } while (0)
7810
7811void helper_msa_frsqrt_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7812                          uint32_t ws)
7813{
7814    wr_t wx, *pwx = &wx;
7815    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7816    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7817    uint32_t i;
7818
7819    clear_msacsr_cause(env);
7820
7821    switch (df) {
7822    case DF_WORD:
7823        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7824            MSA_FLOAT_RECIPROCAL(pwx->w[i], float32_sqrt(pws->w[i],
7825                    &env->active_tc.msa_fp_status), 32);
7826        }
7827        break;
7828    case DF_DOUBLE:
7829        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7830            MSA_FLOAT_RECIPROCAL(pwx->d[i], float64_sqrt(pws->d[i],
7831                    &env->active_tc.msa_fp_status), 64);
7832        }
7833        break;
7834    default:
7835        assert(0);
7836    }
7837
7838    check_msacsr_cause(env, GETPC());
7839
7840    msa_move_v(pwd, pwx);
7841}
7842
7843void helper_msa_frcp_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7844                        uint32_t ws)
7845{
7846    wr_t wx, *pwx = &wx;
7847    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7848    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7849    uint32_t i;
7850
7851    clear_msacsr_cause(env);
7852
7853    switch (df) {
7854    case DF_WORD:
7855        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7856            MSA_FLOAT_RECIPROCAL(pwx->w[i], pws->w[i], 32);
7857        }
7858        break;
7859    case DF_DOUBLE:
7860        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7861            MSA_FLOAT_RECIPROCAL(pwx->d[i], pws->d[i], 64);
7862        }
7863        break;
7864    default:
7865        assert(0);
7866    }
7867
7868    check_msacsr_cause(env, GETPC());
7869
7870    msa_move_v(pwd, pwx);
7871}
7872
7873void helper_msa_frint_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7874                         uint32_t ws)
7875{
7876    wr_t wx, *pwx = &wx;
7877    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7878    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7879    uint32_t i;
7880
7881    clear_msacsr_cause(env);
7882
7883    switch (df) {
7884    case DF_WORD:
7885        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7886            MSA_FLOAT_UNOP(pwx->w[i], round_to_int, pws->w[i], 32);
7887        }
7888        break;
7889    case DF_DOUBLE:
7890        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7891            MSA_FLOAT_UNOP(pwx->d[i], round_to_int, pws->d[i], 64);
7892        }
7893        break;
7894    default:
7895        assert(0);
7896    }
7897
7898    check_msacsr_cause(env, GETPC());
7899
7900    msa_move_v(pwd, pwx);
7901}
7902
7903#define MSA_FLOAT_LOGB(DEST, ARG, BITS)                                     \
7904    do {                                                                    \
7905        float_status *status = &env->active_tc.msa_fp_status;               \
7906        int c;                                                              \
7907                                                                            \
7908        set_float_exception_flags(0, status);                               \
7909        set_float_rounding_mode(float_round_down, status);                  \
7910        DEST = float ## BITS ## _ ## log2(ARG, status);                     \
7911        DEST = float ## BITS ## _ ## round_to_int(DEST, status);            \
7912        set_float_rounding_mode(ieee_rm[(env->active_tc.msacsr &            \
7913                                         MSACSR_RM_MASK) >> MSACSR_RM],     \
7914                                status);                                    \
7915                                                                            \
7916        set_float_exception_flags(get_float_exception_flags(status) &       \
7917                                  (~float_flag_inexact),                    \
7918                                  status);                                  \
7919                                                                            \
7920        c = update_msacsr(env, 0, IS_DENORMAL(DEST, BITS));                 \
7921                                                                            \
7922        if (get_enabled_exceptions(env, c)) {                               \
7923            DEST = ((FLOAT_SNAN ## BITS(status) >> 6) << 6) | c;            \
7924        }                                                                   \
7925    } while (0)
7926
7927void helper_msa_flog2_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7928                         uint32_t ws)
7929{
7930    wr_t wx, *pwx = &wx;
7931    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7932    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7933    uint32_t i;
7934
7935    clear_msacsr_cause(env);
7936
7937    switch (df) {
7938    case DF_WORD:
7939        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7940            MSA_FLOAT_LOGB(pwx->w[i], pws->w[i], 32);
7941        }
7942        break;
7943    case DF_DOUBLE:
7944        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7945            MSA_FLOAT_LOGB(pwx->d[i], pws->d[i], 64);
7946        }
7947        break;
7948    default:
7949        assert(0);
7950    }
7951
7952    check_msacsr_cause(env, GETPC());
7953
7954    msa_move_v(pwd, pwx);
7955}
7956
7957void helper_msa_fexupl_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7958                          uint32_t ws)
7959{
7960    wr_t wx, *pwx = &wx;
7961    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7962    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7963    uint32_t i;
7964
7965    clear_msacsr_cause(env);
7966
7967    switch (df) {
7968    case DF_WORD:
7969        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
7970            /*
7971             * Half precision floats come in two formats: standard
7972             * IEEE and "ARM" format.  The latter gains extra exponent
7973             * range by omitting the NaN/Inf encodings.
7974             */
7975            bool ieee = true;
7976
7977            MSA_FLOAT_BINOP(pwx->w[i], from_float16, Lh(pws, i), ieee, 32);
7978        }
7979        break;
7980    case DF_DOUBLE:
7981        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
7982            MSA_FLOAT_UNOP(pwx->d[i], from_float32, Lw(pws, i), 64);
7983        }
7984        break;
7985    default:
7986        assert(0);
7987    }
7988
7989    check_msacsr_cause(env, GETPC());
7990    msa_move_v(pwd, pwx);
7991}
7992
7993void helper_msa_fexupr_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
7994                          uint32_t ws)
7995{
7996    wr_t wx, *pwx = &wx;
7997    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
7998    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
7999    uint32_t i;
8000
8001    clear_msacsr_cause(env);
8002
8003    switch (df) {
8004    case DF_WORD:
8005        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8006            /*
8007             * Half precision floats come in two formats: standard
8008             * IEEE and "ARM" format.  The latter gains extra exponent
8009             * range by omitting the NaN/Inf encodings.
8010             */
8011            bool ieee = true;
8012
8013            MSA_FLOAT_BINOP(pwx->w[i], from_float16, Rh(pws, i), ieee, 32);
8014        }
8015        break;
8016    case DF_DOUBLE:
8017        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8018            MSA_FLOAT_UNOP(pwx->d[i], from_float32, Rw(pws, i), 64);
8019        }
8020        break;
8021    default:
8022        assert(0);
8023    }
8024
8025    check_msacsr_cause(env, GETPC());
8026    msa_move_v(pwd, pwx);
8027}
8028
8029void helper_msa_ffql_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
8030                        uint32_t ws)
8031{
8032    wr_t wx, *pwx = &wx;
8033    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8034    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
8035    uint32_t i;
8036
8037    switch (df) {
8038    case DF_WORD:
8039        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8040            MSA_FLOAT_UNOP(pwx->w[i], from_q16, Lh(pws, i), 32);
8041        }
8042        break;
8043    case DF_DOUBLE:
8044        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8045            MSA_FLOAT_UNOP(pwx->d[i], from_q32, Lw(pws, i), 64);
8046        }
8047        break;
8048    default:
8049        assert(0);
8050    }
8051
8052    msa_move_v(pwd, pwx);
8053}
8054
8055void helper_msa_ffqr_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
8056                        uint32_t ws)
8057{
8058    wr_t wx, *pwx = &wx;
8059    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8060    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
8061    uint32_t i;
8062
8063    switch (df) {
8064    case DF_WORD:
8065        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8066            MSA_FLOAT_UNOP(pwx->w[i], from_q16, Rh(pws, i), 32);
8067        }
8068        break;
8069    case DF_DOUBLE:
8070        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8071            MSA_FLOAT_UNOP(pwx->d[i], from_q32, Rw(pws, i), 64);
8072        }
8073        break;
8074    default:
8075        assert(0);
8076    }
8077
8078    msa_move_v(pwd, pwx);
8079}
8080
8081void helper_msa_ftint_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
8082                           uint32_t ws)
8083{
8084    wr_t wx, *pwx = &wx;
8085    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8086    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
8087    uint32_t i;
8088
8089    clear_msacsr_cause(env);
8090
8091    switch (df) {
8092    case DF_WORD:
8093        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8094            MSA_FLOAT_UNOP0(pwx->w[i], to_int32, pws->w[i], 32);
8095        }
8096        break;
8097    case DF_DOUBLE:
8098        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8099            MSA_FLOAT_UNOP0(pwx->d[i], to_int64, pws->d[i], 64);
8100        }
8101        break;
8102    default:
8103        assert(0);
8104    }
8105
8106    check_msacsr_cause(env, GETPC());
8107
8108    msa_move_v(pwd, pwx);
8109}
8110
8111void helper_msa_ftint_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
8112                           uint32_t ws)
8113{
8114    wr_t wx, *pwx = &wx;
8115    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8116    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
8117    uint32_t i;
8118
8119    clear_msacsr_cause(env);
8120
8121    switch (df) {
8122    case DF_WORD:
8123        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8124            MSA_FLOAT_UNOP0(pwx->w[i], to_uint32, pws->w[i], 32);
8125        }
8126        break;
8127    case DF_DOUBLE:
8128        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8129            MSA_FLOAT_UNOP0(pwx->d[i], to_uint64, pws->d[i], 64);
8130        }
8131        break;
8132    default:
8133        assert(0);
8134    }
8135
8136    check_msacsr_cause(env, GETPC());
8137
8138    msa_move_v(pwd, pwx);
8139}
8140
8141#define float32_from_int32 int32_to_float32
8142#define float32_from_uint32 uint32_to_float32
8143
8144#define float64_from_int64 int64_to_float64
8145#define float64_from_uint64 uint64_to_float64
8146
8147void helper_msa_ffint_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
8148                           uint32_t ws)
8149{
8150    wr_t wx, *pwx = &wx;
8151    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8152    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
8153    uint32_t i;
8154
8155    clear_msacsr_cause(env);
8156
8157    switch (df) {
8158    case DF_WORD:
8159        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8160            MSA_FLOAT_UNOP(pwx->w[i], from_int32, pws->w[i], 32);
8161        }
8162        break;
8163    case DF_DOUBLE:
8164        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8165            MSA_FLOAT_UNOP(pwx->d[i], from_int64, pws->d[i], 64);
8166        }
8167        break;
8168    default:
8169        assert(0);
8170    }
8171
8172    check_msacsr_cause(env, GETPC());
8173
8174    msa_move_v(pwd, pwx);
8175}
8176
8177void helper_msa_ffint_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
8178                           uint32_t ws)
8179{
8180    wr_t wx, *pwx = &wx;
8181    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8182    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
8183    uint32_t i;
8184
8185    clear_msacsr_cause(env);
8186
8187    switch (df) {
8188    case DF_WORD:
8189        for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
8190            MSA_FLOAT_UNOP(pwx->w[i], from_uint32, pws->w[i], 32);
8191        }
8192        break;
8193    case DF_DOUBLE:
8194        for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
8195            MSA_FLOAT_UNOP(pwx->d[i], from_uint64, pws->d[i], 64);
8196        }
8197        break;
8198    default:
8199        assert(0);
8200    }
8201
8202    check_msacsr_cause(env, GETPC());
8203
8204    msa_move_v(pwd, pwx);
8205}
8206
8207/* Data format min and max values */
8208#define DF_BITS(df) (1 << ((df) + 3))
8209
8210/* Element-by-element access macros */
8211#define DF_ELEMENTS(df) (MSA_WRLEN / DF_BITS(df))
8212
8213#if !defined(CONFIG_USER_ONLY)
8214#define MEMOP_IDX(DF)                                                   \
8215    MemOpIdx oi = make_memop_idx(MO_TE | DF | MO_UNALN,                 \
8216                                 cpu_mmu_index(env, false));
8217#else
8218#define MEMOP_IDX(DF)
8219#endif
8220
8221#ifdef TARGET_WORDS_BIGENDIAN
8222static inline uint64_t bswap16x4(uint64_t x)
8223{
8224    uint64_t m = 0x00ff00ff00ff00ffull;
8225    return ((x & m) << 8) | ((x >> 8) & m);
8226}
8227
8228static inline uint64_t bswap32x2(uint64_t x)
8229{
8230    return ror64(bswap64(x), 32);
8231}
8232#endif
8233
8234void helper_msa_ld_b(CPUMIPSState *env, uint32_t wd,
8235                     target_ulong addr)
8236{
8237    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8238    uintptr_t ra = GETPC();
8239    uint64_t d0, d1;
8240
8241    /* Load 8 bytes at a time.  Vector element ordering makes this LE.  */
8242    d0 = cpu_ldq_le_data_ra(env, addr + 0, ra);
8243    d1 = cpu_ldq_le_data_ra(env, addr + 8, ra);
8244    pwd->d[0] = d0;
8245    pwd->d[1] = d1;
8246}
8247
8248void helper_msa_ld_h(CPUMIPSState *env, uint32_t wd,
8249                     target_ulong addr)
8250{
8251    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8252    uintptr_t ra = GETPC();
8253    uint64_t d0, d1;
8254
8255    /*
8256     * Load 8 bytes at a time.  Use little-endian load, then for
8257     * big-endian target, we must then swap the four halfwords.
8258     */
8259    d0 = cpu_ldq_le_data_ra(env, addr + 0, ra);
8260    d1 = cpu_ldq_le_data_ra(env, addr + 8, ra);
8261#ifdef TARGET_WORDS_BIGENDIAN
8262    d0 = bswap16x4(d0);
8263    d1 = bswap16x4(d1);
8264#endif
8265    pwd->d[0] = d0;
8266    pwd->d[1] = d1;
8267}
8268
8269void helper_msa_ld_w(CPUMIPSState *env, uint32_t wd,
8270                     target_ulong addr)
8271{
8272    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8273    uintptr_t ra = GETPC();
8274    uint64_t d0, d1;
8275
8276    /*
8277     * Load 8 bytes at a time.  Use little-endian load, then for
8278     * big-endian target, we must then bswap the two words.
8279     */
8280    d0 = cpu_ldq_le_data_ra(env, addr + 0, ra);
8281    d1 = cpu_ldq_le_data_ra(env, addr + 8, ra);
8282#ifdef TARGET_WORDS_BIGENDIAN
8283    d0 = bswap32x2(d0);
8284    d1 = bswap32x2(d1);
8285#endif
8286    pwd->d[0] = d0;
8287    pwd->d[1] = d1;
8288}
8289
8290void helper_msa_ld_d(CPUMIPSState *env, uint32_t wd,
8291                     target_ulong addr)
8292{
8293    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8294    uintptr_t ra = GETPC();
8295    uint64_t d0, d1;
8296
8297    d0 = cpu_ldq_data_ra(env, addr + 0, ra);
8298    d1 = cpu_ldq_data_ra(env, addr + 8, ra);
8299    pwd->d[0] = d0;
8300    pwd->d[1] = d1;
8301}
8302
8303#define MSA_PAGESPAN(x) \
8304        ((((x) & ~TARGET_PAGE_MASK) + MSA_WRLEN / 8 - 1) >= TARGET_PAGE_SIZE)
8305
8306static inline void ensure_writable_pages(CPUMIPSState *env,
8307                                         target_ulong addr,
8308                                         int mmu_idx,
8309                                         uintptr_t retaddr)
8310{
8311    /* FIXME: Probe the actual accesses (pass and use a size) */
8312    if (unlikely(MSA_PAGESPAN(addr))) {
8313        /* first page */
8314        probe_write(env, addr, 0, mmu_idx, retaddr);
8315        /* second page */
8316        addr = (addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
8317        probe_write(env, addr, 0, mmu_idx, retaddr);
8318    }
8319}
8320
8321void helper_msa_st_b(CPUMIPSState *env, uint32_t wd,
8322                     target_ulong addr)
8323{
8324    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8325    int mmu_idx = cpu_mmu_index(env, false);
8326    uintptr_t ra = GETPC();
8327
8328    ensure_writable_pages(env, addr, mmu_idx, ra);
8329
8330    /* Store 8 bytes at a time.  Vector element ordering makes this LE.  */
8331    cpu_stq_le_data_ra(env, addr + 0, pwd->d[0], ra);
8332    cpu_stq_le_data_ra(env, addr + 0, pwd->d[1], ra);
8333}
8334
8335void helper_msa_st_h(CPUMIPSState *env, uint32_t wd,
8336                     target_ulong addr)
8337{
8338    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8339    int mmu_idx = cpu_mmu_index(env, false);
8340    uintptr_t ra = GETPC();
8341    uint64_t d0, d1;
8342
8343    ensure_writable_pages(env, addr, mmu_idx, ra);
8344
8345    /* Store 8 bytes at a time.  See helper_msa_ld_h. */
8346    d0 = pwd->d[0];
8347    d1 = pwd->d[1];
8348#ifdef TARGET_WORDS_BIGENDIAN
8349    d0 = bswap16x4(d0);
8350    d1 = bswap16x4(d1);
8351#endif
8352    cpu_stq_le_data_ra(env, addr + 0, d0, ra);
8353    cpu_stq_le_data_ra(env, addr + 8, d1, ra);
8354}
8355
8356void helper_msa_st_w(CPUMIPSState *env, uint32_t wd,
8357                     target_ulong addr)
8358{
8359    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8360    int mmu_idx = cpu_mmu_index(env, false);
8361    uintptr_t ra = GETPC();
8362    uint64_t d0, d1;
8363
8364    ensure_writable_pages(env, addr, mmu_idx, ra);
8365
8366    /* Store 8 bytes at a time.  See helper_msa_ld_w. */
8367    d0 = pwd->d[0];
8368    d1 = pwd->d[1];
8369#ifdef TARGET_WORDS_BIGENDIAN
8370    d0 = bswap32x2(d0);
8371    d1 = bswap32x2(d1);
8372#endif
8373    cpu_stq_le_data_ra(env, addr + 0, d0, ra);
8374    cpu_stq_le_data_ra(env, addr + 8, d1, ra);
8375}
8376
8377void helper_msa_st_d(CPUMIPSState *env, uint32_t wd,
8378                     target_ulong addr)
8379{
8380    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
8381    int mmu_idx = cpu_mmu_index(env, false);
8382    uintptr_t ra = GETPC();
8383
8384    ensure_writable_pages(env, addr, mmu_idx, GETPC());
8385
8386    cpu_stq_data_ra(env, addr + 0, pwd->d[0], ra);
8387    cpu_stq_data_ra(env, addr + 8, pwd->d[1], ra);
8388}
8389