qemu/tcg/tcg-op-vec.c
<<
>>
Prefs
   1/*
   2 * Tiny Code Generator for QEMU
   3 *
   4 * Copyright (c) 2018 Linaro, Inc.
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2.1 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include "cpu.h"
  22#include "tcg/tcg.h"
  23#include "tcg/tcg-op.h"
  24#include "tcg/tcg-mo.h"
  25
  26/* Reduce the number of ifdefs below.  This assumes that all uses of
  27   TCGV_HIGH and TCGV_LOW are properly protected by a conditional that
  28   the compiler can eliminate.  */
  29#if TCG_TARGET_REG_BITS == 64
  30extern TCGv_i32 TCGV_LOW_link_error(TCGv_i64);
  31extern TCGv_i32 TCGV_HIGH_link_error(TCGv_i64);
  32#define TCGV_LOW  TCGV_LOW_link_error
  33#define TCGV_HIGH TCGV_HIGH_link_error
  34#endif
  35
  36/*
  37 * Vector optional opcode tracking.
  38 * Except for the basic logical operations (and, or, xor), and
  39 * data movement (mov, ld, st, dupi), many vector opcodes are
  40 * optional and may not be supported on the host.  Thank Intel
  41 * for the irregularity in their instruction set.
  42 *
  43 * The gvec expanders allow custom vector operations to be composed,
  44 * generally via the .fniv callback in the GVecGen* structures.  At
  45 * the same time, in deciding whether to use this hook we need to
  46 * know if the host supports the required operations.  This is
  47 * presented as an array of opcodes, terminated by 0.  Each opcode
  48 * is assumed to be expanded with the given VECE.
  49 *
  50 * For debugging, we want to validate this array.  Therefore, when
  51 * tcg_ctx->vec_opt_opc is non-NULL, the tcg_gen_*_vec expanders
  52 * will validate that their opcode is present in the list.
  53 */
  54#ifdef CONFIG_DEBUG_TCG
  55void tcg_assert_listed_vecop(TCGOpcode op)
  56{
  57    const TCGOpcode *p = tcg_ctx->vecop_list;
  58    if (p) {
  59        for (; *p; ++p) {
  60            if (*p == op) {
  61                return;
  62            }
  63        }
  64        g_assert_not_reached();
  65    }
  66}
  67#endif
  68
  69bool tcg_can_emit_vecop_list(const TCGOpcode *list,
  70                             TCGType type, unsigned vece)
  71{
  72    if (list == NULL) {
  73        return true;
  74    }
  75
  76    for (; *list; ++list) {
  77        TCGOpcode opc = *list;
  78
  79#ifdef CONFIG_DEBUG_TCG
  80        switch (opc) {
  81        case INDEX_op_and_vec:
  82        case INDEX_op_or_vec:
  83        case INDEX_op_xor_vec:
  84        case INDEX_op_mov_vec:
  85        case INDEX_op_dup_vec:
  86        case INDEX_op_dupi_vec:
  87        case INDEX_op_dup2_vec:
  88        case INDEX_op_ld_vec:
  89        case INDEX_op_st_vec:
  90        case INDEX_op_bitsel_vec:
  91            /* These opcodes are mandatory and should not be listed.  */
  92            g_assert_not_reached();
  93        case INDEX_op_not_vec:
  94            /* These opcodes have generic expansions using the above.  */
  95            g_assert_not_reached();
  96        default:
  97            break;
  98        }
  99#endif
 100
 101        if (tcg_can_emit_vec_op(opc, type, vece)) {
 102            continue;
 103        }
 104
 105        /*
 106         * The opcode list is created by front ends based on what they
 107         * actually invoke.  We must mirror the logic in the routines
 108         * below for generic expansions using other opcodes.
 109         */
 110        switch (opc) {
 111        case INDEX_op_neg_vec:
 112            if (tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece)) {
 113                continue;
 114            }
 115            break;
 116        case INDEX_op_abs_vec:
 117            if (tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece)
 118                && (tcg_can_emit_vec_op(INDEX_op_smax_vec, type, vece) > 0
 119                    || tcg_can_emit_vec_op(INDEX_op_sari_vec, type, vece) > 0
 120                    || tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece))) {
 121                continue;
 122            }
 123            break;
 124        case INDEX_op_cmpsel_vec:
 125        case INDEX_op_smin_vec:
 126        case INDEX_op_smax_vec:
 127        case INDEX_op_umin_vec:
 128        case INDEX_op_umax_vec:
 129            if (tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) {
 130                continue;
 131            }
 132            break;
 133        default:
 134            break;
 135        }
 136        return false;
 137    }
 138    return true;
 139}
 140
 141void vec_gen_2(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a)
 142{
 143    TCGOp *op = tcg_emit_op(opc);
 144    TCGOP_VECL(op) = type - TCG_TYPE_V64;
 145    TCGOP_VECE(op) = vece;
 146    op->args[0] = r;
 147    op->args[1] = a;
 148}
 149
 150void vec_gen_3(TCGOpcode opc, TCGType type, unsigned vece,
 151               TCGArg r, TCGArg a, TCGArg b)
 152{
 153    TCGOp *op = tcg_emit_op(opc);
 154    TCGOP_VECL(op) = type - TCG_TYPE_V64;
 155    TCGOP_VECE(op) = vece;
 156    op->args[0] = r;
 157    op->args[1] = a;
 158    op->args[2] = b;
 159}
 160
 161void vec_gen_4(TCGOpcode opc, TCGType type, unsigned vece,
 162               TCGArg r, TCGArg a, TCGArg b, TCGArg c)
 163{
 164    TCGOp *op = tcg_emit_op(opc);
 165    TCGOP_VECL(op) = type - TCG_TYPE_V64;
 166    TCGOP_VECE(op) = vece;
 167    op->args[0] = r;
 168    op->args[1] = a;
 169    op->args[2] = b;
 170    op->args[3] = c;
 171}
 172
 173static void vec_gen_6(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r,
 174                      TCGArg a, TCGArg b, TCGArg c, TCGArg d, TCGArg e)
 175{
 176    TCGOp *op = tcg_emit_op(opc);
 177    TCGOP_VECL(op) = type - TCG_TYPE_V64;
 178    TCGOP_VECE(op) = vece;
 179    op->args[0] = r;
 180    op->args[1] = a;
 181    op->args[2] = b;
 182    op->args[3] = c;
 183    op->args[4] = d;
 184    op->args[5] = e;
 185}
 186
 187static void vec_gen_op2(TCGOpcode opc, unsigned vece, TCGv_vec r, TCGv_vec a)
 188{
 189    TCGTemp *rt = tcgv_vec_temp(r);
 190    TCGTemp *at = tcgv_vec_temp(a);
 191    TCGType type = rt->base_type;
 192
 193    /* Must enough inputs for the output.  */
 194    tcg_debug_assert(at->base_type >= type);
 195    vec_gen_2(opc, type, vece, temp_arg(rt), temp_arg(at));
 196}
 197
 198static void vec_gen_op3(TCGOpcode opc, unsigned vece,
 199                        TCGv_vec r, TCGv_vec a, TCGv_vec b)
 200{
 201    TCGTemp *rt = tcgv_vec_temp(r);
 202    TCGTemp *at = tcgv_vec_temp(a);
 203    TCGTemp *bt = tcgv_vec_temp(b);
 204    TCGType type = rt->base_type;
 205
 206    /* Must enough inputs for the output.  */
 207    tcg_debug_assert(at->base_type >= type);
 208    tcg_debug_assert(bt->base_type >= type);
 209    vec_gen_3(opc, type, vece, temp_arg(rt), temp_arg(at), temp_arg(bt));
 210}
 211
 212void tcg_gen_mov_vec(TCGv_vec r, TCGv_vec a)
 213{
 214    if (r != a) {
 215        vec_gen_op2(INDEX_op_mov_vec, 0, r, a);
 216    }
 217}
 218
 219#define MO_REG  (TCG_TARGET_REG_BITS == 64 ? MO_64 : MO_32)
 220
 221static void do_dupi_vec(TCGv_vec r, unsigned vece, TCGArg a)
 222{
 223    TCGTemp *rt = tcgv_vec_temp(r);
 224    vec_gen_2(INDEX_op_dupi_vec, rt->base_type, vece, temp_arg(rt), a);
 225}
 226
 227TCGv_vec tcg_const_zeros_vec(TCGType type)
 228{
 229    TCGv_vec ret = tcg_temp_new_vec(type);
 230    do_dupi_vec(ret, MO_REG, 0);
 231    return ret;
 232}
 233
 234TCGv_vec tcg_const_ones_vec(TCGType type)
 235{
 236    TCGv_vec ret = tcg_temp_new_vec(type);
 237    do_dupi_vec(ret, MO_REG, -1);
 238    return ret;
 239}
 240
 241TCGv_vec tcg_const_zeros_vec_matching(TCGv_vec m)
 242{
 243    TCGTemp *t = tcgv_vec_temp(m);
 244    return tcg_const_zeros_vec(t->base_type);
 245}
 246
 247TCGv_vec tcg_const_ones_vec_matching(TCGv_vec m)
 248{
 249    TCGTemp *t = tcgv_vec_temp(m);
 250    return tcg_const_ones_vec(t->base_type);
 251}
 252
 253void tcg_gen_dup64i_vec(TCGv_vec r, uint64_t a)
 254{
 255    if (TCG_TARGET_REG_BITS == 32 && a == deposit64(a, 32, 32, a)) {
 256        do_dupi_vec(r, MO_32, a);
 257    } else if (TCG_TARGET_REG_BITS == 64 || a == (uint64_t)(int32_t)a) {
 258        do_dupi_vec(r, MO_64, a);
 259    } else {
 260        TCGv_i64 c = tcg_const_i64(a);
 261        tcg_gen_dup_i64_vec(MO_64, r, c);
 262        tcg_temp_free_i64(c);
 263    }
 264}
 265
 266void tcg_gen_dup32i_vec(TCGv_vec r, uint32_t a)
 267{
 268    do_dupi_vec(r, MO_REG, dup_const(MO_32, a));
 269}
 270
 271void tcg_gen_dup16i_vec(TCGv_vec r, uint32_t a)
 272{
 273    do_dupi_vec(r, MO_REG, dup_const(MO_16, a));
 274}
 275
 276void tcg_gen_dup8i_vec(TCGv_vec r, uint32_t a)
 277{
 278    do_dupi_vec(r, MO_REG, dup_const(MO_8, a));
 279}
 280
 281void tcg_gen_dupi_vec(unsigned vece, TCGv_vec r, uint64_t a)
 282{
 283    do_dupi_vec(r, MO_REG, dup_const(vece, a));
 284}
 285
 286void tcg_gen_dup_i64_vec(unsigned vece, TCGv_vec r, TCGv_i64 a)
 287{
 288    TCGArg ri = tcgv_vec_arg(r);
 289    TCGTemp *rt = arg_temp(ri);
 290    TCGType type = rt->base_type;
 291
 292    if (TCG_TARGET_REG_BITS == 64) {
 293        TCGArg ai = tcgv_i64_arg(a);
 294        vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
 295    } else if (vece == MO_64) {
 296        TCGArg al = tcgv_i32_arg(TCGV_LOW(a));
 297        TCGArg ah = tcgv_i32_arg(TCGV_HIGH(a));
 298        vec_gen_3(INDEX_op_dup2_vec, type, MO_64, ri, al, ah);
 299    } else {
 300        TCGArg ai = tcgv_i32_arg(TCGV_LOW(a));
 301        vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
 302    }
 303}
 304
 305void tcg_gen_dup_i32_vec(unsigned vece, TCGv_vec r, TCGv_i32 a)
 306{
 307    TCGArg ri = tcgv_vec_arg(r);
 308    TCGArg ai = tcgv_i32_arg(a);
 309    TCGTemp *rt = arg_temp(ri);
 310    TCGType type = rt->base_type;
 311
 312    vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
 313}
 314
 315void tcg_gen_dup_mem_vec(unsigned vece, TCGv_vec r, TCGv_ptr b,
 316                         tcg_target_long ofs)
 317{
 318    TCGArg ri = tcgv_vec_arg(r);
 319    TCGArg bi = tcgv_ptr_arg(b);
 320    TCGTemp *rt = arg_temp(ri);
 321    TCGType type = rt->base_type;
 322
 323    vec_gen_3(INDEX_op_dupm_vec, type, vece, ri, bi, ofs);
 324}
 325
 326static void vec_gen_ldst(TCGOpcode opc, TCGv_vec r, TCGv_ptr b, TCGArg o)
 327{
 328    TCGArg ri = tcgv_vec_arg(r);
 329    TCGArg bi = tcgv_ptr_arg(b);
 330    TCGTemp *rt = arg_temp(ri);
 331    TCGType type = rt->base_type;
 332
 333    vec_gen_3(opc, type, 0, ri, bi, o);
 334}
 335
 336void tcg_gen_ld_vec(TCGv_vec r, TCGv_ptr b, TCGArg o)
 337{
 338    vec_gen_ldst(INDEX_op_ld_vec, r, b, o);
 339}
 340
 341void tcg_gen_st_vec(TCGv_vec r, TCGv_ptr b, TCGArg o)
 342{
 343    vec_gen_ldst(INDEX_op_st_vec, r, b, o);
 344}
 345
 346void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr b, TCGArg o, TCGType low_type)
 347{
 348    TCGArg ri = tcgv_vec_arg(r);
 349    TCGArg bi = tcgv_ptr_arg(b);
 350    TCGTemp *rt = arg_temp(ri);
 351    TCGType type = rt->base_type;
 352
 353    tcg_debug_assert(low_type >= TCG_TYPE_V64);
 354    tcg_debug_assert(low_type <= type);
 355    vec_gen_3(INDEX_op_st_vec, low_type, 0, ri, bi, o);
 356}
 357
 358void tcg_gen_and_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 359{
 360    vec_gen_op3(INDEX_op_and_vec, 0, r, a, b);
 361}
 362
 363void tcg_gen_or_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 364{
 365    vec_gen_op3(INDEX_op_or_vec, 0, r, a, b);
 366}
 367
 368void tcg_gen_xor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 369{
 370    vec_gen_op3(INDEX_op_xor_vec, 0, r, a, b);
 371}
 372
 373void tcg_gen_andc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 374{
 375    if (TCG_TARGET_HAS_andc_vec) {
 376        vec_gen_op3(INDEX_op_andc_vec, 0, r, a, b);
 377    } else {
 378        TCGv_vec t = tcg_temp_new_vec_matching(r);
 379        tcg_gen_not_vec(0, t, b);
 380        tcg_gen_and_vec(0, r, a, t);
 381        tcg_temp_free_vec(t);
 382    }
 383}
 384
 385void tcg_gen_orc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 386{
 387    if (TCG_TARGET_HAS_orc_vec) {
 388        vec_gen_op3(INDEX_op_orc_vec, 0, r, a, b);
 389    } else {
 390        TCGv_vec t = tcg_temp_new_vec_matching(r);
 391        tcg_gen_not_vec(0, t, b);
 392        tcg_gen_or_vec(0, r, a, t);
 393        tcg_temp_free_vec(t);
 394    }
 395}
 396
 397void tcg_gen_nand_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 398{
 399    /* TODO: Add TCG_TARGET_HAS_nand_vec when adding a backend supports it. */
 400    tcg_gen_and_vec(0, r, a, b);
 401    tcg_gen_not_vec(0, r, r);
 402}
 403
 404void tcg_gen_nor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 405{
 406    /* TODO: Add TCG_TARGET_HAS_nor_vec when adding a backend supports it. */
 407    tcg_gen_or_vec(0, r, a, b);
 408    tcg_gen_not_vec(0, r, r);
 409}
 410
 411void tcg_gen_eqv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 412{
 413    /* TODO: Add TCG_TARGET_HAS_eqv_vec when adding a backend supports it. */
 414    tcg_gen_xor_vec(0, r, a, b);
 415    tcg_gen_not_vec(0, r, r);
 416}
 417
 418static bool do_op2(unsigned vece, TCGv_vec r, TCGv_vec a, TCGOpcode opc)
 419{
 420    TCGTemp *rt = tcgv_vec_temp(r);
 421    TCGTemp *at = tcgv_vec_temp(a);
 422    TCGArg ri = temp_arg(rt);
 423    TCGArg ai = temp_arg(at);
 424    TCGType type = rt->base_type;
 425    int can;
 426
 427    tcg_debug_assert(at->base_type >= type);
 428    tcg_assert_listed_vecop(opc);
 429    can = tcg_can_emit_vec_op(opc, type, vece);
 430    if (can > 0) {
 431        vec_gen_2(opc, type, vece, ri, ai);
 432    } else if (can < 0) {
 433        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
 434        tcg_expand_vec_op(opc, type, vece, ri, ai);
 435        tcg_swap_vecop_list(hold_list);
 436    } else {
 437        return false;
 438    }
 439    return true;
 440}
 441
 442void tcg_gen_not_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
 443{
 444    const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
 445
 446    if (!TCG_TARGET_HAS_not_vec || !do_op2(vece, r, a, INDEX_op_not_vec)) {
 447        TCGv_vec t = tcg_const_ones_vec_matching(r);
 448        tcg_gen_xor_vec(0, r, a, t);
 449        tcg_temp_free_vec(t);
 450    }
 451    tcg_swap_vecop_list(hold_list);
 452}
 453
 454void tcg_gen_neg_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
 455{
 456    const TCGOpcode *hold_list;
 457
 458    tcg_assert_listed_vecop(INDEX_op_neg_vec);
 459    hold_list = tcg_swap_vecop_list(NULL);
 460
 461    if (!TCG_TARGET_HAS_neg_vec || !do_op2(vece, r, a, INDEX_op_neg_vec)) {
 462        TCGv_vec t = tcg_const_zeros_vec_matching(r);
 463        tcg_gen_sub_vec(vece, r, t, a);
 464        tcg_temp_free_vec(t);
 465    }
 466    tcg_swap_vecop_list(hold_list);
 467}
 468
 469void tcg_gen_abs_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
 470{
 471    const TCGOpcode *hold_list;
 472
 473    tcg_assert_listed_vecop(INDEX_op_abs_vec);
 474    hold_list = tcg_swap_vecop_list(NULL);
 475
 476    if (!do_op2(vece, r, a, INDEX_op_abs_vec)) {
 477        TCGType type = tcgv_vec_temp(r)->base_type;
 478        TCGv_vec t = tcg_temp_new_vec(type);
 479
 480        tcg_debug_assert(tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece));
 481        if (tcg_can_emit_vec_op(INDEX_op_smax_vec, type, vece) > 0) {
 482            tcg_gen_neg_vec(vece, t, a);
 483            tcg_gen_smax_vec(vece, r, a, t);
 484        } else {
 485            if (tcg_can_emit_vec_op(INDEX_op_sari_vec, type, vece) > 0) {
 486                tcg_gen_sari_vec(vece, t, a, (8 << vece) - 1);
 487            } else {
 488                do_dupi_vec(t, MO_REG, 0);
 489                tcg_gen_cmp_vec(TCG_COND_LT, vece, t, a, t);
 490            }
 491            tcg_gen_xor_vec(vece, r, a, t);
 492            tcg_gen_sub_vec(vece, r, r, t);
 493        }
 494
 495        tcg_temp_free_vec(t);
 496    }
 497    tcg_swap_vecop_list(hold_list);
 498}
 499
 500static void do_shifti(TCGOpcode opc, unsigned vece,
 501                      TCGv_vec r, TCGv_vec a, int64_t i)
 502{
 503    TCGTemp *rt = tcgv_vec_temp(r);
 504    TCGTemp *at = tcgv_vec_temp(a);
 505    TCGArg ri = temp_arg(rt);
 506    TCGArg ai = temp_arg(at);
 507    TCGType type = rt->base_type;
 508    int can;
 509
 510    tcg_debug_assert(at->base_type == type);
 511    tcg_debug_assert(i >= 0 && i < (8 << vece));
 512    tcg_assert_listed_vecop(opc);
 513
 514    if (i == 0) {
 515        tcg_gen_mov_vec(r, a);
 516        return;
 517    }
 518
 519    can = tcg_can_emit_vec_op(opc, type, vece);
 520    if (can > 0) {
 521        vec_gen_3(opc, type, vece, ri, ai, i);
 522    } else {
 523        /* We leave the choice of expansion via scalar or vector shift
 524           to the target.  Often, but not always, dupi can feed a vector
 525           shift easier than a scalar.  */
 526        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
 527        tcg_debug_assert(can < 0);
 528        tcg_expand_vec_op(opc, type, vece, ri, ai, i);
 529        tcg_swap_vecop_list(hold_list);
 530    }
 531}
 532
 533void tcg_gen_shli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
 534{
 535    do_shifti(INDEX_op_shli_vec, vece, r, a, i);
 536}
 537
 538void tcg_gen_shri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
 539{
 540    do_shifti(INDEX_op_shri_vec, vece, r, a, i);
 541}
 542
 543void tcg_gen_sari_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
 544{
 545    do_shifti(INDEX_op_sari_vec, vece, r, a, i);
 546}
 547
 548void tcg_gen_rotli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
 549{
 550    do_shifti(INDEX_op_rotli_vec, vece, r, a, i);
 551}
 552
 553void tcg_gen_rotri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
 554{
 555    int bits = 8 << vece;
 556    tcg_debug_assert(i >= 0 && i < bits);
 557    do_shifti(INDEX_op_rotli_vec, vece, r, a, -i & (bits - 1));
 558}
 559
 560void tcg_gen_cmp_vec(TCGCond cond, unsigned vece,
 561                     TCGv_vec r, TCGv_vec a, TCGv_vec b)
 562{
 563    TCGTemp *rt = tcgv_vec_temp(r);
 564    TCGTemp *at = tcgv_vec_temp(a);
 565    TCGTemp *bt = tcgv_vec_temp(b);
 566    TCGArg ri = temp_arg(rt);
 567    TCGArg ai = temp_arg(at);
 568    TCGArg bi = temp_arg(bt);
 569    TCGType type = rt->base_type;
 570    int can;
 571
 572    tcg_debug_assert(at->base_type >= type);
 573    tcg_debug_assert(bt->base_type >= type);
 574    tcg_assert_listed_vecop(INDEX_op_cmp_vec);
 575    can = tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece);
 576    if (can > 0) {
 577        vec_gen_4(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond);
 578    } else {
 579        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
 580        tcg_debug_assert(can < 0);
 581        tcg_expand_vec_op(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond);
 582        tcg_swap_vecop_list(hold_list);
 583    }
 584}
 585
 586static bool do_op3(unsigned vece, TCGv_vec r, TCGv_vec a,
 587                   TCGv_vec b, TCGOpcode opc)
 588{
 589    TCGTemp *rt = tcgv_vec_temp(r);
 590    TCGTemp *at = tcgv_vec_temp(a);
 591    TCGTemp *bt = tcgv_vec_temp(b);
 592    TCGArg ri = temp_arg(rt);
 593    TCGArg ai = temp_arg(at);
 594    TCGArg bi = temp_arg(bt);
 595    TCGType type = rt->base_type;
 596    int can;
 597
 598    tcg_debug_assert(at->base_type >= type);
 599    tcg_debug_assert(bt->base_type >= type);
 600    tcg_assert_listed_vecop(opc);
 601    can = tcg_can_emit_vec_op(opc, type, vece);
 602    if (can > 0) {
 603        vec_gen_3(opc, type, vece, ri, ai, bi);
 604    } else if (can < 0) {
 605        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
 606        tcg_expand_vec_op(opc, type, vece, ri, ai, bi);
 607        tcg_swap_vecop_list(hold_list);
 608    } else {
 609        return false;
 610    }
 611    return true;
 612}
 613
 614static void do_op3_nofail(unsigned vece, TCGv_vec r, TCGv_vec a,
 615                          TCGv_vec b, TCGOpcode opc)
 616{
 617    bool ok = do_op3(vece, r, a, b, opc);
 618    tcg_debug_assert(ok);
 619}
 620
 621void tcg_gen_add_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 622{
 623    do_op3_nofail(vece, r, a, b, INDEX_op_add_vec);
 624}
 625
 626void tcg_gen_sub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 627{
 628    do_op3_nofail(vece, r, a, b, INDEX_op_sub_vec);
 629}
 630
 631void tcg_gen_mul_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 632{
 633    do_op3_nofail(vece, r, a, b, INDEX_op_mul_vec);
 634}
 635
 636void tcg_gen_ssadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 637{
 638    do_op3_nofail(vece, r, a, b, INDEX_op_ssadd_vec);
 639}
 640
 641void tcg_gen_usadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 642{
 643    do_op3_nofail(vece, r, a, b, INDEX_op_usadd_vec);
 644}
 645
 646void tcg_gen_sssub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 647{
 648    do_op3_nofail(vece, r, a, b, INDEX_op_sssub_vec);
 649}
 650
 651void tcg_gen_ussub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 652{
 653    do_op3_nofail(vece, r, a, b, INDEX_op_ussub_vec);
 654}
 655
 656static void do_minmax(unsigned vece, TCGv_vec r, TCGv_vec a,
 657                      TCGv_vec b, TCGOpcode opc, TCGCond cond)
 658{
 659    if (!do_op3(vece, r, a, b, opc)) {
 660        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
 661        tcg_gen_cmpsel_vec(cond, vece, r, a, b, a, b);
 662        tcg_swap_vecop_list(hold_list);
 663    }
 664}
 665
 666void tcg_gen_smin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 667{
 668    do_minmax(vece, r, a, b, INDEX_op_smin_vec, TCG_COND_LT);
 669}
 670
 671void tcg_gen_umin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 672{
 673    do_minmax(vece, r, a, b, INDEX_op_umin_vec, TCG_COND_LTU);
 674}
 675
 676void tcg_gen_smax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 677{
 678    do_minmax(vece, r, a, b, INDEX_op_smax_vec, TCG_COND_GT);
 679}
 680
 681void tcg_gen_umax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 682{
 683    do_minmax(vece, r, a, b, INDEX_op_umax_vec, TCG_COND_GTU);
 684}
 685
 686void tcg_gen_shlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 687{
 688    do_op3_nofail(vece, r, a, b, INDEX_op_shlv_vec);
 689}
 690
 691void tcg_gen_shrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 692{
 693    do_op3_nofail(vece, r, a, b, INDEX_op_shrv_vec);
 694}
 695
 696void tcg_gen_sarv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 697{
 698    do_op3_nofail(vece, r, a, b, INDEX_op_sarv_vec);
 699}
 700
 701void tcg_gen_rotlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 702{
 703    do_op3_nofail(vece, r, a, b, INDEX_op_rotlv_vec);
 704}
 705
 706void tcg_gen_rotrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 707{
 708    do_op3_nofail(vece, r, a, b, INDEX_op_rotrv_vec);
 709}
 710
 711static void do_shifts(unsigned vece, TCGv_vec r, TCGv_vec a,
 712                      TCGv_i32 s, TCGOpcode opc)
 713{
 714    TCGTemp *rt = tcgv_vec_temp(r);
 715    TCGTemp *at = tcgv_vec_temp(a);
 716    TCGTemp *st = tcgv_i32_temp(s);
 717    TCGArg ri = temp_arg(rt);
 718    TCGArg ai = temp_arg(at);
 719    TCGArg si = temp_arg(st);
 720    TCGType type = rt->base_type;
 721    int can;
 722
 723    tcg_debug_assert(at->base_type >= type);
 724    tcg_assert_listed_vecop(opc);
 725    can = tcg_can_emit_vec_op(opc, type, vece);
 726    if (can > 0) {
 727        vec_gen_3(opc, type, vece, ri, ai, si);
 728    } else if (can < 0) {
 729        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
 730        tcg_expand_vec_op(opc, type, vece, ri, ai, si);
 731        tcg_swap_vecop_list(hold_list);
 732    } else {
 733        g_assert_not_reached();
 734    }
 735}
 736
 737void tcg_gen_shls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
 738{
 739    do_shifts(vece, r, a, b, INDEX_op_shls_vec);
 740}
 741
 742void tcg_gen_shrs_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
 743{
 744    do_shifts(vece, r, a, b, INDEX_op_shrs_vec);
 745}
 746
 747void tcg_gen_sars_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
 748{
 749    do_shifts(vece, r, a, b, INDEX_op_sars_vec);
 750}
 751
 752void tcg_gen_rotls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s)
 753{
 754    do_shifts(vece, r, a, s, INDEX_op_rotls_vec);
 755}
 756
 757void tcg_gen_bitsel_vec(unsigned vece, TCGv_vec r, TCGv_vec a,
 758                        TCGv_vec b, TCGv_vec c)
 759{
 760    TCGTemp *rt = tcgv_vec_temp(r);
 761    TCGTemp *at = tcgv_vec_temp(a);
 762    TCGTemp *bt = tcgv_vec_temp(b);
 763    TCGTemp *ct = tcgv_vec_temp(c);
 764    TCGType type = rt->base_type;
 765
 766    tcg_debug_assert(at->base_type >= type);
 767    tcg_debug_assert(bt->base_type >= type);
 768    tcg_debug_assert(ct->base_type >= type);
 769
 770    if (TCG_TARGET_HAS_bitsel_vec) {
 771        vec_gen_4(INDEX_op_bitsel_vec, type, MO_8,
 772                  temp_arg(rt), temp_arg(at), temp_arg(bt), temp_arg(ct));
 773    } else {
 774        TCGv_vec t = tcg_temp_new_vec(type);
 775        tcg_gen_and_vec(MO_8, t, a, b);
 776        tcg_gen_andc_vec(MO_8, r, c, a);
 777        tcg_gen_or_vec(MO_8, r, r, t);
 778        tcg_temp_free_vec(t);
 779    }
 780}
 781
 782void tcg_gen_cmpsel_vec(TCGCond cond, unsigned vece, TCGv_vec r,
 783                        TCGv_vec a, TCGv_vec b, TCGv_vec c, TCGv_vec d)
 784{
 785    TCGTemp *rt = tcgv_vec_temp(r);
 786    TCGTemp *at = tcgv_vec_temp(a);
 787    TCGTemp *bt = tcgv_vec_temp(b);
 788    TCGTemp *ct = tcgv_vec_temp(c);
 789    TCGTemp *dt = tcgv_vec_temp(d);
 790    TCGArg ri = temp_arg(rt);
 791    TCGArg ai = temp_arg(at);
 792    TCGArg bi = temp_arg(bt);
 793    TCGArg ci = temp_arg(ct);
 794    TCGArg di = temp_arg(dt);
 795    TCGType type = rt->base_type;
 796    const TCGOpcode *hold_list;
 797    int can;
 798
 799    tcg_debug_assert(at->base_type >= type);
 800    tcg_debug_assert(bt->base_type >= type);
 801    tcg_debug_assert(ct->base_type >= type);
 802    tcg_debug_assert(dt->base_type >= type);
 803
 804    tcg_assert_listed_vecop(INDEX_op_cmpsel_vec);
 805    hold_list = tcg_swap_vecop_list(NULL);
 806    can = tcg_can_emit_vec_op(INDEX_op_cmpsel_vec, type, vece);
 807
 808    if (can > 0) {
 809        vec_gen_6(INDEX_op_cmpsel_vec, type, vece, ri, ai, bi, ci, di, cond);
 810    } else if (can < 0) {
 811        tcg_expand_vec_op(INDEX_op_cmpsel_vec, type, vece,
 812                          ri, ai, bi, ci, di, cond);
 813    } else {
 814        TCGv_vec t = tcg_temp_new_vec(type);
 815        tcg_gen_cmp_vec(cond, vece, t, a, b);
 816        tcg_gen_bitsel_vec(vece, r, t, c, d);
 817        tcg_temp_free_vec(t);
 818    }
 819    tcg_swap_vecop_list(hold_list);
 820}
 821