qemu/tcg/tcg-op-vec.c
<<
>>
Prefs
   1/*
   2 * Tiny Code Generator for QEMU
   3 *
   4 * Copyright (c) 2018 Linaro, Inc.
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2.1 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include "cpu.h"
  22#include "tcg.h"
  23#include "tcg-op.h"
  24#include "tcg-mo.h"
  25
  26/* Reduce the number of ifdefs below.  This assumes that all uses of
  27   TCGV_HIGH and TCGV_LOW are properly protected by a conditional that
  28   the compiler can eliminate.  */
  29#if TCG_TARGET_REG_BITS == 64
  30extern TCGv_i32 TCGV_LOW_link_error(TCGv_i64);
  31extern TCGv_i32 TCGV_HIGH_link_error(TCGv_i64);
  32#define TCGV_LOW  TCGV_LOW_link_error
  33#define TCGV_HIGH TCGV_HIGH_link_error
  34#endif
  35
  36/*
  37 * Vector optional opcode tracking.
  38 * Except for the basic logical operations (and, or, xor), and
  39 * data movement (mov, ld, st, dupi), many vector opcodes are
  40 * optional and may not be supported on the host.  Thank Intel
  41 * for the irregularity in their instruction set.
  42 *
  43 * The gvec expanders allow custom vector operations to be composed,
  44 * generally via the .fniv callback in the GVecGen* structures.  At
  45 * the same time, in deciding whether to use this hook we need to
  46 * know if the host supports the required operations.  This is
  47 * presented as an array of opcodes, terminated by 0.  Each opcode
  48 * is assumed to be expanded with the given VECE.
  49 *
  50 * For debugging, we want to validate this array.  Therefore, when
  51 * tcg_ctx->vec_opt_opc is non-NULL, the tcg_gen_*_vec expanders
  52 * will validate that their opcode is present in the list.
  53 */
  54#ifdef CONFIG_DEBUG_TCG
  55void tcg_assert_listed_vecop(TCGOpcode op)
  56{
  57    const TCGOpcode *p = tcg_ctx->vecop_list;
  58    if (p) {
  59        for (; *p; ++p) {
  60            if (*p == op) {
  61                return;
  62            }
  63        }
  64        g_assert_not_reached();
  65    }
  66}
  67#endif
  68
  69bool tcg_can_emit_vecop_list(const TCGOpcode *list,
  70                             TCGType type, unsigned vece)
  71{
  72    if (list == NULL) {
  73        return true;
  74    }
  75
  76    for (; *list; ++list) {
  77        TCGOpcode opc = *list;
  78
  79#ifdef CONFIG_DEBUG_TCG
  80        switch (opc) {
  81        case INDEX_op_and_vec:
  82        case INDEX_op_or_vec:
  83        case INDEX_op_xor_vec:
  84        case INDEX_op_mov_vec:
  85        case INDEX_op_dup_vec:
  86        case INDEX_op_dupi_vec:
  87        case INDEX_op_dup2_vec:
  88        case INDEX_op_ld_vec:
  89        case INDEX_op_st_vec:
  90        case INDEX_op_bitsel_vec:
  91            /* These opcodes are mandatory and should not be listed.  */
  92            g_assert_not_reached();
  93        case INDEX_op_not_vec:
  94            /* These opcodes have generic expansions using the above.  */
  95            g_assert_not_reached();
  96        default:
  97            break;
  98        }
  99#endif
 100
 101        if (tcg_can_emit_vec_op(opc, type, vece)) {
 102            continue;
 103        }
 104
 105        /*
 106         * The opcode list is created by front ends based on what they
 107         * actually invoke.  We must mirror the logic in the routines
 108         * below for generic expansions using other opcodes.
 109         */
 110        switch (opc) {
 111        case INDEX_op_neg_vec:
 112            if (tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece)) {
 113                continue;
 114            }
 115            break;
 116        case INDEX_op_abs_vec:
 117            if (tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece)
 118                && (tcg_can_emit_vec_op(INDEX_op_smax_vec, type, vece) > 0
 119                    || tcg_can_emit_vec_op(INDEX_op_sari_vec, type, vece) > 0
 120                    || tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece))) {
 121                continue;
 122            }
 123            break;
 124        case INDEX_op_cmpsel_vec:
 125        case INDEX_op_smin_vec:
 126        case INDEX_op_smax_vec:
 127        case INDEX_op_umin_vec:
 128        case INDEX_op_umax_vec:
 129            if (tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) {
 130                continue;
 131            }
 132            break;
 133        default:
 134            break;
 135        }
 136        return false;
 137    }
 138    return true;
 139}
 140
 141void vec_gen_2(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a)
 142{
 143    TCGOp *op = tcg_emit_op(opc);
 144    TCGOP_VECL(op) = type - TCG_TYPE_V64;
 145    TCGOP_VECE(op) = vece;
 146    op->args[0] = r;
 147    op->args[1] = a;
 148}
 149
 150void vec_gen_3(TCGOpcode opc, TCGType type, unsigned vece,
 151               TCGArg r, TCGArg a, TCGArg b)
 152{
 153    TCGOp *op = tcg_emit_op(opc);
 154    TCGOP_VECL(op) = type - TCG_TYPE_V64;
 155    TCGOP_VECE(op) = vece;
 156    op->args[0] = r;
 157    op->args[1] = a;
 158    op->args[2] = b;
 159}
 160
 161void vec_gen_4(TCGOpcode opc, TCGType type, unsigned vece,
 162               TCGArg r, TCGArg a, TCGArg b, TCGArg c)
 163{
 164    TCGOp *op = tcg_emit_op(opc);
 165    TCGOP_VECL(op) = type - TCG_TYPE_V64;
 166    TCGOP_VECE(op) = vece;
 167    op->args[0] = r;
 168    op->args[1] = a;
 169    op->args[2] = b;
 170    op->args[3] = c;
 171}
 172
 173static void vec_gen_6(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r,
 174                      TCGArg a, TCGArg b, TCGArg c, TCGArg d, TCGArg e)
 175{
 176    TCGOp *op = tcg_emit_op(opc);
 177    TCGOP_VECL(op) = type - TCG_TYPE_V64;
 178    TCGOP_VECE(op) = vece;
 179    op->args[0] = r;
 180    op->args[1] = a;
 181    op->args[2] = b;
 182    op->args[3] = c;
 183    op->args[4] = d;
 184    op->args[5] = e;
 185}
 186
 187static void vec_gen_op2(TCGOpcode opc, unsigned vece, TCGv_vec r, TCGv_vec a)
 188{
 189    TCGTemp *rt = tcgv_vec_temp(r);
 190    TCGTemp *at = tcgv_vec_temp(a);
 191    TCGType type = rt->base_type;
 192
 193    /* Must enough inputs for the output.  */
 194    tcg_debug_assert(at->base_type >= type);
 195    vec_gen_2(opc, type, vece, temp_arg(rt), temp_arg(at));
 196}
 197
 198static void vec_gen_op3(TCGOpcode opc, unsigned vece,
 199                        TCGv_vec r, TCGv_vec a, TCGv_vec b)
 200{
 201    TCGTemp *rt = tcgv_vec_temp(r);
 202    TCGTemp *at = tcgv_vec_temp(a);
 203    TCGTemp *bt = tcgv_vec_temp(b);
 204    TCGType type = rt->base_type;
 205
 206    /* Must enough inputs for the output.  */
 207    tcg_debug_assert(at->base_type >= type);
 208    tcg_debug_assert(bt->base_type >= type);
 209    vec_gen_3(opc, type, vece, temp_arg(rt), temp_arg(at), temp_arg(bt));
 210}
 211
 212void tcg_gen_mov_vec(TCGv_vec r, TCGv_vec a)
 213{
 214    if (r != a) {
 215        vec_gen_op2(INDEX_op_mov_vec, 0, r, a);
 216    }
 217}
 218
 219#define MO_REG  (TCG_TARGET_REG_BITS == 64 ? MO_64 : MO_32)
 220
 221static void do_dupi_vec(TCGv_vec r, unsigned vece, TCGArg a)
 222{
 223    TCGTemp *rt = tcgv_vec_temp(r);
 224    vec_gen_2(INDEX_op_dupi_vec, rt->base_type, vece, temp_arg(rt), a);
 225}
 226
 227TCGv_vec tcg_const_zeros_vec(TCGType type)
 228{
 229    TCGv_vec ret = tcg_temp_new_vec(type);
 230    do_dupi_vec(ret, MO_REG, 0);
 231    return ret;
 232}
 233
 234TCGv_vec tcg_const_ones_vec(TCGType type)
 235{
 236    TCGv_vec ret = tcg_temp_new_vec(type);
 237    do_dupi_vec(ret, MO_REG, -1);
 238    return ret;
 239}
 240
 241TCGv_vec tcg_const_zeros_vec_matching(TCGv_vec m)
 242{
 243    TCGTemp *t = tcgv_vec_temp(m);
 244    return tcg_const_zeros_vec(t->base_type);
 245}
 246
 247TCGv_vec tcg_const_ones_vec_matching(TCGv_vec m)
 248{
 249    TCGTemp *t = tcgv_vec_temp(m);
 250    return tcg_const_ones_vec(t->base_type);
 251}
 252
 253void tcg_gen_dup64i_vec(TCGv_vec r, uint64_t a)
 254{
 255    if (TCG_TARGET_REG_BITS == 32 && a == deposit64(a, 32, 32, a)) {
 256        do_dupi_vec(r, MO_32, a);
 257    } else if (TCG_TARGET_REG_BITS == 64 || a == (uint64_t)(int32_t)a) {
 258        do_dupi_vec(r, MO_64, a);
 259    } else {
 260        TCGv_i64 c = tcg_const_i64(a);
 261        tcg_gen_dup_i64_vec(MO_64, r, c);
 262        tcg_temp_free_i64(c);
 263    }
 264}
 265
 266void tcg_gen_dup32i_vec(TCGv_vec r, uint32_t a)
 267{
 268    do_dupi_vec(r, MO_REG, dup_const(MO_32, a));
 269}
 270
 271void tcg_gen_dup16i_vec(TCGv_vec r, uint32_t a)
 272{
 273    do_dupi_vec(r, MO_REG, dup_const(MO_16, a));
 274}
 275
 276void tcg_gen_dup8i_vec(TCGv_vec r, uint32_t a)
 277{
 278    do_dupi_vec(r, MO_REG, dup_const(MO_8, a));
 279}
 280
 281void tcg_gen_dupi_vec(unsigned vece, TCGv_vec r, uint64_t a)
 282{
 283    do_dupi_vec(r, MO_REG, dup_const(vece, a));
 284}
 285
 286void tcg_gen_dup_i64_vec(unsigned vece, TCGv_vec r, TCGv_i64 a)
 287{
 288    TCGArg ri = tcgv_vec_arg(r);
 289    TCGTemp *rt = arg_temp(ri);
 290    TCGType type = rt->base_type;
 291
 292    if (TCG_TARGET_REG_BITS == 64) {
 293        TCGArg ai = tcgv_i64_arg(a);
 294        vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
 295    } else if (vece == MO_64) {
 296        TCGArg al = tcgv_i32_arg(TCGV_LOW(a));
 297        TCGArg ah = tcgv_i32_arg(TCGV_HIGH(a));
 298        vec_gen_3(INDEX_op_dup2_vec, type, MO_64, ri, al, ah);
 299    } else {
 300        TCGArg ai = tcgv_i32_arg(TCGV_LOW(a));
 301        vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
 302    }
 303}
 304
 305void tcg_gen_dup_i32_vec(unsigned vece, TCGv_vec r, TCGv_i32 a)
 306{
 307    TCGArg ri = tcgv_vec_arg(r);
 308    TCGArg ai = tcgv_i32_arg(a);
 309    TCGTemp *rt = arg_temp(ri);
 310    TCGType type = rt->base_type;
 311
 312    vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
 313}
 314
 315void tcg_gen_dup_mem_vec(unsigned vece, TCGv_vec r, TCGv_ptr b,
 316                         tcg_target_long ofs)
 317{
 318    TCGArg ri = tcgv_vec_arg(r);
 319    TCGArg bi = tcgv_ptr_arg(b);
 320    TCGTemp *rt = arg_temp(ri);
 321    TCGType type = rt->base_type;
 322
 323    vec_gen_3(INDEX_op_dupm_vec, type, vece, ri, bi, ofs);
 324}
 325
 326static void vec_gen_ldst(TCGOpcode opc, TCGv_vec r, TCGv_ptr b, TCGArg o)
 327{
 328    TCGArg ri = tcgv_vec_arg(r);
 329    TCGArg bi = tcgv_ptr_arg(b);
 330    TCGTemp *rt = arg_temp(ri);
 331    TCGType type = rt->base_type;
 332
 333    vec_gen_3(opc, type, 0, ri, bi, o);
 334}
 335
 336void tcg_gen_ld_vec(TCGv_vec r, TCGv_ptr b, TCGArg o)
 337{
 338    vec_gen_ldst(INDEX_op_ld_vec, r, b, o);
 339}
 340
 341void tcg_gen_st_vec(TCGv_vec r, TCGv_ptr b, TCGArg o)
 342{
 343    vec_gen_ldst(INDEX_op_st_vec, r, b, o);
 344}
 345
 346void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr b, TCGArg o, TCGType low_type)
 347{
 348    TCGArg ri = tcgv_vec_arg(r);
 349    TCGArg bi = tcgv_ptr_arg(b);
 350    TCGTemp *rt = arg_temp(ri);
 351    TCGType type = rt->base_type;
 352
 353    tcg_debug_assert(low_type >= TCG_TYPE_V64);
 354    tcg_debug_assert(low_type <= type);
 355    vec_gen_3(INDEX_op_st_vec, low_type, 0, ri, bi, o);
 356}
 357
 358void tcg_gen_and_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 359{
 360    vec_gen_op3(INDEX_op_and_vec, 0, r, a, b);
 361}
 362
 363void tcg_gen_or_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 364{
 365    vec_gen_op3(INDEX_op_or_vec, 0, r, a, b);
 366}
 367
 368void tcg_gen_xor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 369{
 370    vec_gen_op3(INDEX_op_xor_vec, 0, r, a, b);
 371}
 372
 373void tcg_gen_andc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 374{
 375    if (TCG_TARGET_HAS_andc_vec) {
 376        vec_gen_op3(INDEX_op_andc_vec, 0, r, a, b);
 377    } else {
 378        TCGv_vec t = tcg_temp_new_vec_matching(r);
 379        tcg_gen_not_vec(0, t, b);
 380        tcg_gen_and_vec(0, r, a, t);
 381        tcg_temp_free_vec(t);
 382    }
 383}
 384
 385void tcg_gen_orc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 386{
 387    if (TCG_TARGET_HAS_orc_vec) {
 388        vec_gen_op3(INDEX_op_orc_vec, 0, r, a, b);
 389    } else {
 390        TCGv_vec t = tcg_temp_new_vec_matching(r);
 391        tcg_gen_not_vec(0, t, b);
 392        tcg_gen_or_vec(0, r, a, t);
 393        tcg_temp_free_vec(t);
 394    }
 395}
 396
 397void tcg_gen_nand_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 398{
 399    /* TODO: Add TCG_TARGET_HAS_nand_vec when adding a backend supports it. */
 400    tcg_gen_and_vec(0, r, a, b);
 401    tcg_gen_not_vec(0, r, r);
 402}
 403
 404void tcg_gen_nor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 405{
 406    /* TODO: Add TCG_TARGET_HAS_nor_vec when adding a backend supports it. */
 407    tcg_gen_or_vec(0, r, a, b);
 408    tcg_gen_not_vec(0, r, r);
 409}
 410
 411void tcg_gen_eqv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 412{
 413    /* TODO: Add TCG_TARGET_HAS_eqv_vec when adding a backend supports it. */
 414    tcg_gen_xor_vec(0, r, a, b);
 415    tcg_gen_not_vec(0, r, r);
 416}
 417
 418static bool do_op2(unsigned vece, TCGv_vec r, TCGv_vec a, TCGOpcode opc)
 419{
 420    TCGTemp *rt = tcgv_vec_temp(r);
 421    TCGTemp *at = tcgv_vec_temp(a);
 422    TCGArg ri = temp_arg(rt);
 423    TCGArg ai = temp_arg(at);
 424    TCGType type = rt->base_type;
 425    int can;
 426
 427    tcg_debug_assert(at->base_type >= type);
 428    tcg_assert_listed_vecop(opc);
 429    can = tcg_can_emit_vec_op(opc, type, vece);
 430    if (can > 0) {
 431        vec_gen_2(opc, type, vece, ri, ai);
 432    } else if (can < 0) {
 433        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
 434        tcg_expand_vec_op(opc, type, vece, ri, ai);
 435        tcg_swap_vecop_list(hold_list);
 436    } else {
 437        return false;
 438    }
 439    return true;
 440}
 441
 442void tcg_gen_not_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
 443{
 444    const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
 445
 446    if (!TCG_TARGET_HAS_not_vec || !do_op2(vece, r, a, INDEX_op_not_vec)) {
 447        TCGv_vec t = tcg_const_ones_vec_matching(r);
 448        tcg_gen_xor_vec(0, r, a, t);
 449        tcg_temp_free_vec(t);
 450    }
 451    tcg_swap_vecop_list(hold_list);
 452}
 453
 454void tcg_gen_neg_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
 455{
 456    const TCGOpcode *hold_list;
 457
 458    tcg_assert_listed_vecop(INDEX_op_neg_vec);
 459    hold_list = tcg_swap_vecop_list(NULL);
 460
 461    if (!TCG_TARGET_HAS_neg_vec || !do_op2(vece, r, a, INDEX_op_neg_vec)) {
 462        TCGv_vec t = tcg_const_zeros_vec_matching(r);
 463        tcg_gen_sub_vec(vece, r, t, a);
 464        tcg_temp_free_vec(t);
 465    }
 466    tcg_swap_vecop_list(hold_list);
 467}
 468
 469void tcg_gen_abs_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
 470{
 471    const TCGOpcode *hold_list;
 472
 473    tcg_assert_listed_vecop(INDEX_op_abs_vec);
 474    hold_list = tcg_swap_vecop_list(NULL);
 475
 476    if (!do_op2(vece, r, a, INDEX_op_abs_vec)) {
 477        TCGType type = tcgv_vec_temp(r)->base_type;
 478        TCGv_vec t = tcg_temp_new_vec(type);
 479
 480        tcg_debug_assert(tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece));
 481        if (tcg_can_emit_vec_op(INDEX_op_smax_vec, type, vece) > 0) {
 482            tcg_gen_neg_vec(vece, t, a);
 483            tcg_gen_smax_vec(vece, r, a, t);
 484        } else {
 485            if (tcg_can_emit_vec_op(INDEX_op_sari_vec, type, vece) > 0) {
 486                tcg_gen_sari_vec(vece, t, a, (8 << vece) - 1);
 487            } else {
 488                do_dupi_vec(t, MO_REG, 0);
 489                tcg_gen_cmp_vec(TCG_COND_LT, vece, t, a, t);
 490            }
 491            tcg_gen_xor_vec(vece, r, a, t);
 492            tcg_gen_sub_vec(vece, r, r, t);
 493        }
 494
 495        tcg_temp_free_vec(t);
 496    }
 497    tcg_swap_vecop_list(hold_list);
 498}
 499
 500static void do_shifti(TCGOpcode opc, unsigned vece,
 501                      TCGv_vec r, TCGv_vec a, int64_t i)
 502{
 503    TCGTemp *rt = tcgv_vec_temp(r);
 504    TCGTemp *at = tcgv_vec_temp(a);
 505    TCGArg ri = temp_arg(rt);
 506    TCGArg ai = temp_arg(at);
 507    TCGType type = rt->base_type;
 508    int can;
 509
 510    tcg_debug_assert(at->base_type == type);
 511    tcg_debug_assert(i >= 0 && i < (8 << vece));
 512    tcg_assert_listed_vecop(opc);
 513
 514    if (i == 0) {
 515        tcg_gen_mov_vec(r, a);
 516        return;
 517    }
 518
 519    can = tcg_can_emit_vec_op(opc, type, vece);
 520    if (can > 0) {
 521        vec_gen_3(opc, type, vece, ri, ai, i);
 522    } else {
 523        /* We leave the choice of expansion via scalar or vector shift
 524           to the target.  Often, but not always, dupi can feed a vector
 525           shift easier than a scalar.  */
 526        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
 527        tcg_debug_assert(can < 0);
 528        tcg_expand_vec_op(opc, type, vece, ri, ai, i);
 529        tcg_swap_vecop_list(hold_list);
 530    }
 531}
 532
 533void tcg_gen_shli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
 534{
 535    do_shifti(INDEX_op_shli_vec, vece, r, a, i);
 536}
 537
 538void tcg_gen_shri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
 539{
 540    do_shifti(INDEX_op_shri_vec, vece, r, a, i);
 541}
 542
 543void tcg_gen_sari_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
 544{
 545    do_shifti(INDEX_op_sari_vec, vece, r, a, i);
 546}
 547
 548void tcg_gen_cmp_vec(TCGCond cond, unsigned vece,
 549                     TCGv_vec r, TCGv_vec a, TCGv_vec b)
 550{
 551    TCGTemp *rt = tcgv_vec_temp(r);
 552    TCGTemp *at = tcgv_vec_temp(a);
 553    TCGTemp *bt = tcgv_vec_temp(b);
 554    TCGArg ri = temp_arg(rt);
 555    TCGArg ai = temp_arg(at);
 556    TCGArg bi = temp_arg(bt);
 557    TCGType type = rt->base_type;
 558    int can;
 559
 560    tcg_debug_assert(at->base_type >= type);
 561    tcg_debug_assert(bt->base_type >= type);
 562    tcg_assert_listed_vecop(INDEX_op_cmp_vec);
 563    can = tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece);
 564    if (can > 0) {
 565        vec_gen_4(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond);
 566    } else {
 567        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
 568        tcg_debug_assert(can < 0);
 569        tcg_expand_vec_op(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond);
 570        tcg_swap_vecop_list(hold_list);
 571    }
 572}
 573
 574static bool do_op3(unsigned vece, TCGv_vec r, TCGv_vec a,
 575                   TCGv_vec b, TCGOpcode opc)
 576{
 577    TCGTemp *rt = tcgv_vec_temp(r);
 578    TCGTemp *at = tcgv_vec_temp(a);
 579    TCGTemp *bt = tcgv_vec_temp(b);
 580    TCGArg ri = temp_arg(rt);
 581    TCGArg ai = temp_arg(at);
 582    TCGArg bi = temp_arg(bt);
 583    TCGType type = rt->base_type;
 584    int can;
 585
 586    tcg_debug_assert(at->base_type >= type);
 587    tcg_debug_assert(bt->base_type >= type);
 588    tcg_assert_listed_vecop(opc);
 589    can = tcg_can_emit_vec_op(opc, type, vece);
 590    if (can > 0) {
 591        vec_gen_3(opc, type, vece, ri, ai, bi);
 592    } else if (can < 0) {
 593        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
 594        tcg_expand_vec_op(opc, type, vece, ri, ai, bi);
 595        tcg_swap_vecop_list(hold_list);
 596    } else {
 597        return false;
 598    }
 599    return true;
 600}
 601
 602static void do_op3_nofail(unsigned vece, TCGv_vec r, TCGv_vec a,
 603                          TCGv_vec b, TCGOpcode opc)
 604{
 605    bool ok = do_op3(vece, r, a, b, opc);
 606    tcg_debug_assert(ok);
 607}
 608
 609void tcg_gen_add_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 610{
 611    do_op3_nofail(vece, r, a, b, INDEX_op_add_vec);
 612}
 613
 614void tcg_gen_sub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 615{
 616    do_op3_nofail(vece, r, a, b, INDEX_op_sub_vec);
 617}
 618
 619void tcg_gen_mul_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 620{
 621    do_op3_nofail(vece, r, a, b, INDEX_op_mul_vec);
 622}
 623
 624void tcg_gen_ssadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 625{
 626    do_op3_nofail(vece, r, a, b, INDEX_op_ssadd_vec);
 627}
 628
 629void tcg_gen_usadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 630{
 631    do_op3_nofail(vece, r, a, b, INDEX_op_usadd_vec);
 632}
 633
 634void tcg_gen_sssub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 635{
 636    do_op3_nofail(vece, r, a, b, INDEX_op_sssub_vec);
 637}
 638
 639void tcg_gen_ussub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 640{
 641    do_op3_nofail(vece, r, a, b, INDEX_op_ussub_vec);
 642}
 643
 644static void do_minmax(unsigned vece, TCGv_vec r, TCGv_vec a,
 645                      TCGv_vec b, TCGOpcode opc, TCGCond cond)
 646{
 647    if (!do_op3(vece, r, a, b, opc)) {
 648        tcg_gen_cmpsel_vec(cond, vece, r, a, b, a, b);
 649    }
 650}
 651
 652void tcg_gen_smin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 653{
 654    do_minmax(vece, r, a, b, INDEX_op_smin_vec, TCG_COND_LT);
 655}
 656
 657void tcg_gen_umin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 658{
 659    do_minmax(vece, r, a, b, INDEX_op_umin_vec, TCG_COND_LTU);
 660}
 661
 662void tcg_gen_smax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 663{
 664    do_minmax(vece, r, a, b, INDEX_op_smax_vec, TCG_COND_GT);
 665}
 666
 667void tcg_gen_umax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 668{
 669    do_minmax(vece, r, a, b, INDEX_op_umax_vec, TCG_COND_GTU);
 670}
 671
 672void tcg_gen_shlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 673{
 674    do_op3_nofail(vece, r, a, b, INDEX_op_shlv_vec);
 675}
 676
 677void tcg_gen_shrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 678{
 679    do_op3_nofail(vece, r, a, b, INDEX_op_shrv_vec);
 680}
 681
 682void tcg_gen_sarv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 683{
 684    do_op3_nofail(vece, r, a, b, INDEX_op_sarv_vec);
 685}
 686
 687static void do_shifts(unsigned vece, TCGv_vec r, TCGv_vec a,
 688                      TCGv_i32 s, TCGOpcode opc_s, TCGOpcode opc_v)
 689{
 690    TCGTemp *rt = tcgv_vec_temp(r);
 691    TCGTemp *at = tcgv_vec_temp(a);
 692    TCGTemp *st = tcgv_i32_temp(s);
 693    TCGArg ri = temp_arg(rt);
 694    TCGArg ai = temp_arg(at);
 695    TCGArg si = temp_arg(st);
 696    TCGType type = rt->base_type;
 697    const TCGOpcode *hold_list;
 698    int can;
 699
 700    tcg_debug_assert(at->base_type >= type);
 701    tcg_assert_listed_vecop(opc_s);
 702    hold_list = tcg_swap_vecop_list(NULL);
 703
 704    can = tcg_can_emit_vec_op(opc_s, type, vece);
 705    if (can > 0) {
 706        vec_gen_3(opc_s, type, vece, ri, ai, si);
 707    } else if (can < 0) {
 708        tcg_expand_vec_op(opc_s, type, vece, ri, ai, si);
 709    } else {
 710        TCGv_vec vec_s = tcg_temp_new_vec(type);
 711
 712        if (vece == MO_64) {
 713            TCGv_i64 s64 = tcg_temp_new_i64();
 714            tcg_gen_extu_i32_i64(s64, s);
 715            tcg_gen_dup_i64_vec(MO_64, vec_s, s64);
 716            tcg_temp_free_i64(s64);
 717        } else {
 718            tcg_gen_dup_i32_vec(vece, vec_s, s);
 719        }
 720        do_op3_nofail(vece, r, a, vec_s, opc_v);
 721        tcg_temp_free_vec(vec_s);
 722    }
 723    tcg_swap_vecop_list(hold_list);
 724}
 725
 726void tcg_gen_shls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
 727{
 728    do_shifts(vece, r, a, b, INDEX_op_shls_vec, INDEX_op_shlv_vec);
 729}
 730
 731void tcg_gen_shrs_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
 732{
 733    do_shifts(vece, r, a, b, INDEX_op_shrs_vec, INDEX_op_shrv_vec);
 734}
 735
 736void tcg_gen_sars_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
 737{
 738    do_shifts(vece, r, a, b, INDEX_op_sars_vec, INDEX_op_sarv_vec);
 739}
 740
 741void tcg_gen_bitsel_vec(unsigned vece, TCGv_vec r, TCGv_vec a,
 742                        TCGv_vec b, TCGv_vec c)
 743{
 744    TCGTemp *rt = tcgv_vec_temp(r);
 745    TCGTemp *at = tcgv_vec_temp(a);
 746    TCGTemp *bt = tcgv_vec_temp(b);
 747    TCGTemp *ct = tcgv_vec_temp(c);
 748    TCGType type = rt->base_type;
 749
 750    tcg_debug_assert(at->base_type >= type);
 751    tcg_debug_assert(bt->base_type >= type);
 752    tcg_debug_assert(ct->base_type >= type);
 753
 754    if (TCG_TARGET_HAS_bitsel_vec) {
 755        vec_gen_4(INDEX_op_bitsel_vec, type, MO_8,
 756                  temp_arg(rt), temp_arg(at), temp_arg(bt), temp_arg(ct));
 757    } else {
 758        TCGv_vec t = tcg_temp_new_vec(type);
 759        tcg_gen_and_vec(MO_8, t, a, b);
 760        tcg_gen_andc_vec(MO_8, r, c, a);
 761        tcg_gen_or_vec(MO_8, r, r, t);
 762        tcg_temp_free_vec(t);
 763    }
 764}
 765
 766void tcg_gen_cmpsel_vec(TCGCond cond, unsigned vece, TCGv_vec r,
 767                        TCGv_vec a, TCGv_vec b, TCGv_vec c, TCGv_vec d)
 768{
 769    TCGTemp *rt = tcgv_vec_temp(r);
 770    TCGTemp *at = tcgv_vec_temp(a);
 771    TCGTemp *bt = tcgv_vec_temp(b);
 772    TCGTemp *ct = tcgv_vec_temp(c);
 773    TCGTemp *dt = tcgv_vec_temp(d);
 774    TCGArg ri = temp_arg(rt);
 775    TCGArg ai = temp_arg(at);
 776    TCGArg bi = temp_arg(bt);
 777    TCGArg ci = temp_arg(ct);
 778    TCGArg di = temp_arg(dt);
 779    TCGType type = rt->base_type;
 780    const TCGOpcode *hold_list;
 781    int can;
 782
 783    tcg_debug_assert(at->base_type >= type);
 784    tcg_debug_assert(bt->base_type >= type);
 785    tcg_debug_assert(ct->base_type >= type);
 786    tcg_debug_assert(dt->base_type >= type);
 787
 788    tcg_assert_listed_vecop(INDEX_op_cmpsel_vec);
 789    hold_list = tcg_swap_vecop_list(NULL);
 790    can = tcg_can_emit_vec_op(INDEX_op_cmpsel_vec, type, vece);
 791
 792    if (can > 0) {
 793        vec_gen_6(INDEX_op_cmpsel_vec, type, vece, ri, ai, bi, ci, di, cond);
 794    } else if (can < 0) {
 795        tcg_expand_vec_op(INDEX_op_cmpsel_vec, type, vece,
 796                          ri, ai, bi, ci, di, cond);
 797    } else {
 798        TCGv_vec t = tcg_temp_new_vec(type);
 799        tcg_gen_cmp_vec(cond, vece, t, a, b);
 800        tcg_gen_bitsel_vec(vece, r, t, c, d);
 801        tcg_temp_free_vec(t);
 802    }
 803    tcg_swap_vecop_list(hold_list);
 804}
 805