qemu/tcg/tcg-op-vec.c
<<
>>
Prefs
   1/*
   2 * Tiny Code Generator for QEMU
   3 *
   4 * Copyright (c) 2018 Linaro, Inc.
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2.1 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include "cpu.h"
  22#include "tcg/tcg.h"
  23#include "tcg/tcg-op.h"
  24#include "tcg/tcg-mo.h"
  25
  26/* Reduce the number of ifdefs below.  This assumes that all uses of
  27   TCGV_HIGH and TCGV_LOW are properly protected by a conditional that
  28   the compiler can eliminate.  */
  29#if TCG_TARGET_REG_BITS == 64
  30extern TCGv_i32 TCGV_LOW_link_error(TCGv_i64);
  31extern TCGv_i32 TCGV_HIGH_link_error(TCGv_i64);
  32#define TCGV_LOW  TCGV_LOW_link_error
  33#define TCGV_HIGH TCGV_HIGH_link_error
  34#endif
  35
  36/*
  37 * Vector optional opcode tracking.
  38 * Except for the basic logical operations (and, or, xor), and
  39 * data movement (mov, ld, st, dupi), many vector opcodes are
  40 * optional and may not be supported on the host.  Thank Intel
  41 * for the irregularity in their instruction set.
  42 *
  43 * The gvec expanders allow custom vector operations to be composed,
  44 * generally via the .fniv callback in the GVecGen* structures.  At
  45 * the same time, in deciding whether to use this hook we need to
  46 * know if the host supports the required operations.  This is
  47 * presented as an array of opcodes, terminated by 0.  Each opcode
  48 * is assumed to be expanded with the given VECE.
  49 *
  50 * For debugging, we want to validate this array.  Therefore, when
  51 * tcg_ctx->vec_opt_opc is non-NULL, the tcg_gen_*_vec expanders
  52 * will validate that their opcode is present in the list.
  53 */
  54#ifdef CONFIG_DEBUG_TCG
  55void tcg_assert_listed_vecop(TCGOpcode op)
  56{
  57    const TCGOpcode *p = tcg_ctx->vecop_list;
  58    if (p) {
  59        for (; *p; ++p) {
  60            if (*p == op) {
  61                return;
  62            }
  63        }
  64        g_assert_not_reached();
  65    }
  66}
  67#endif
  68
  69bool tcg_can_emit_vecop_list(const TCGOpcode *list,
  70                             TCGType type, unsigned vece)
  71{
  72    if (list == NULL) {
  73        return true;
  74    }
  75
  76    for (; *list; ++list) {
  77        TCGOpcode opc = *list;
  78
  79#ifdef CONFIG_DEBUG_TCG
  80        switch (opc) {
  81        case INDEX_op_and_vec:
  82        case INDEX_op_or_vec:
  83        case INDEX_op_xor_vec:
  84        case INDEX_op_mov_vec:
  85        case INDEX_op_dup_vec:
  86        case INDEX_op_dup2_vec:
  87        case INDEX_op_ld_vec:
  88        case INDEX_op_st_vec:
  89        case INDEX_op_bitsel_vec:
  90            /* These opcodes are mandatory and should not be listed.  */
  91            g_assert_not_reached();
  92        case INDEX_op_not_vec:
  93            /* These opcodes have generic expansions using the above.  */
  94            g_assert_not_reached();
  95        default:
  96            break;
  97        }
  98#endif
  99
 100        if (tcg_can_emit_vec_op(opc, type, vece)) {
 101            continue;
 102        }
 103
 104        /*
 105         * The opcode list is created by front ends based on what they
 106         * actually invoke.  We must mirror the logic in the routines
 107         * below for generic expansions using other opcodes.
 108         */
 109        switch (opc) {
 110        case INDEX_op_neg_vec:
 111            if (tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece)) {
 112                continue;
 113            }
 114            break;
 115        case INDEX_op_abs_vec:
 116            if (tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece)
 117                && (tcg_can_emit_vec_op(INDEX_op_smax_vec, type, vece) > 0
 118                    || tcg_can_emit_vec_op(INDEX_op_sari_vec, type, vece) > 0
 119                    || tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece))) {
 120                continue;
 121            }
 122            break;
 123        case INDEX_op_cmpsel_vec:
 124        case INDEX_op_smin_vec:
 125        case INDEX_op_smax_vec:
 126        case INDEX_op_umin_vec:
 127        case INDEX_op_umax_vec:
 128            if (tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) {
 129                continue;
 130            }
 131            break;
 132        default:
 133            break;
 134        }
 135        return false;
 136    }
 137    return true;
 138}
 139
 140void vec_gen_2(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a)
 141{
 142    TCGOp *op = tcg_emit_op(opc);
 143    TCGOP_VECL(op) = type - TCG_TYPE_V64;
 144    TCGOP_VECE(op) = vece;
 145    op->args[0] = r;
 146    op->args[1] = a;
 147}
 148
 149void vec_gen_3(TCGOpcode opc, TCGType type, unsigned vece,
 150               TCGArg r, TCGArg a, TCGArg b)
 151{
 152    TCGOp *op = tcg_emit_op(opc);
 153    TCGOP_VECL(op) = type - TCG_TYPE_V64;
 154    TCGOP_VECE(op) = vece;
 155    op->args[0] = r;
 156    op->args[1] = a;
 157    op->args[2] = b;
 158}
 159
 160void vec_gen_4(TCGOpcode opc, TCGType type, unsigned vece,
 161               TCGArg r, TCGArg a, TCGArg b, TCGArg c)
 162{
 163    TCGOp *op = tcg_emit_op(opc);
 164    TCGOP_VECL(op) = type - TCG_TYPE_V64;
 165    TCGOP_VECE(op) = vece;
 166    op->args[0] = r;
 167    op->args[1] = a;
 168    op->args[2] = b;
 169    op->args[3] = c;
 170}
 171
 172static void vec_gen_6(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r,
 173                      TCGArg a, TCGArg b, TCGArg c, TCGArg d, TCGArg e)
 174{
 175    TCGOp *op = tcg_emit_op(opc);
 176    TCGOP_VECL(op) = type - TCG_TYPE_V64;
 177    TCGOP_VECE(op) = vece;
 178    op->args[0] = r;
 179    op->args[1] = a;
 180    op->args[2] = b;
 181    op->args[3] = c;
 182    op->args[4] = d;
 183    op->args[5] = e;
 184}
 185
 186static void vec_gen_op2(TCGOpcode opc, unsigned vece, TCGv_vec r, TCGv_vec a)
 187{
 188    TCGTemp *rt = tcgv_vec_temp(r);
 189    TCGTemp *at = tcgv_vec_temp(a);
 190    TCGType type = rt->base_type;
 191
 192    /* Must enough inputs for the output.  */
 193    tcg_debug_assert(at->base_type >= type);
 194    vec_gen_2(opc, type, vece, temp_arg(rt), temp_arg(at));
 195}
 196
 197static void vec_gen_op3(TCGOpcode opc, unsigned vece,
 198                        TCGv_vec r, TCGv_vec a, TCGv_vec b)
 199{
 200    TCGTemp *rt = tcgv_vec_temp(r);
 201    TCGTemp *at = tcgv_vec_temp(a);
 202    TCGTemp *bt = tcgv_vec_temp(b);
 203    TCGType type = rt->base_type;
 204
 205    /* Must enough inputs for the output.  */
 206    tcg_debug_assert(at->base_type >= type);
 207    tcg_debug_assert(bt->base_type >= type);
 208    vec_gen_3(opc, type, vece, temp_arg(rt), temp_arg(at), temp_arg(bt));
 209}
 210
 211void tcg_gen_mov_vec(TCGv_vec r, TCGv_vec a)
 212{
 213    if (r != a) {
 214        vec_gen_op2(INDEX_op_mov_vec, 0, r, a);
 215    }
 216}
 217
 218TCGv_vec tcg_const_zeros_vec(TCGType type)
 219{
 220    TCGv_vec ret = tcg_temp_new_vec(type);
 221    tcg_gen_dupi_vec(MO_64, ret, 0);
 222    return ret;
 223}
 224
 225TCGv_vec tcg_const_ones_vec(TCGType type)
 226{
 227    TCGv_vec ret = tcg_temp_new_vec(type);
 228    tcg_gen_dupi_vec(MO_64, ret, -1);
 229    return ret;
 230}
 231
 232TCGv_vec tcg_const_zeros_vec_matching(TCGv_vec m)
 233{
 234    TCGTemp *t = tcgv_vec_temp(m);
 235    return tcg_const_zeros_vec(t->base_type);
 236}
 237
 238TCGv_vec tcg_const_ones_vec_matching(TCGv_vec m)
 239{
 240    TCGTemp *t = tcgv_vec_temp(m);
 241    return tcg_const_ones_vec(t->base_type);
 242}
 243
 244void tcg_gen_dupi_vec(unsigned vece, TCGv_vec r, uint64_t a)
 245{
 246    TCGTemp *rt = tcgv_vec_temp(r);
 247    tcg_gen_mov_vec(r, tcg_constant_vec(rt->base_type, vece, a));
 248}
 249
 250void tcg_gen_dup_i64_vec(unsigned vece, TCGv_vec r, TCGv_i64 a)
 251{
 252    TCGArg ri = tcgv_vec_arg(r);
 253    TCGTemp *rt = arg_temp(ri);
 254    TCGType type = rt->base_type;
 255
 256    if (TCG_TARGET_REG_BITS == 64) {
 257        TCGArg ai = tcgv_i64_arg(a);
 258        vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
 259    } else if (vece == MO_64) {
 260        TCGArg al = tcgv_i32_arg(TCGV_LOW(a));
 261        TCGArg ah = tcgv_i32_arg(TCGV_HIGH(a));
 262        vec_gen_3(INDEX_op_dup2_vec, type, MO_64, ri, al, ah);
 263    } else {
 264        TCGArg ai = tcgv_i32_arg(TCGV_LOW(a));
 265        vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
 266    }
 267}
 268
 269void tcg_gen_dup_i32_vec(unsigned vece, TCGv_vec r, TCGv_i32 a)
 270{
 271    TCGArg ri = tcgv_vec_arg(r);
 272    TCGArg ai = tcgv_i32_arg(a);
 273    TCGTemp *rt = arg_temp(ri);
 274    TCGType type = rt->base_type;
 275
 276    vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
 277}
 278
 279void tcg_gen_dup_mem_vec(unsigned vece, TCGv_vec r, TCGv_ptr b,
 280                         tcg_target_long ofs)
 281{
 282    TCGArg ri = tcgv_vec_arg(r);
 283    TCGArg bi = tcgv_ptr_arg(b);
 284    TCGTemp *rt = arg_temp(ri);
 285    TCGType type = rt->base_type;
 286
 287    vec_gen_3(INDEX_op_dupm_vec, type, vece, ri, bi, ofs);
 288}
 289
 290static void vec_gen_ldst(TCGOpcode opc, TCGv_vec r, TCGv_ptr b, TCGArg o)
 291{
 292    TCGArg ri = tcgv_vec_arg(r);
 293    TCGArg bi = tcgv_ptr_arg(b);
 294    TCGTemp *rt = arg_temp(ri);
 295    TCGType type = rt->base_type;
 296
 297    vec_gen_3(opc, type, 0, ri, bi, o);
 298}
 299
 300void tcg_gen_ld_vec(TCGv_vec r, TCGv_ptr b, TCGArg o)
 301{
 302    vec_gen_ldst(INDEX_op_ld_vec, r, b, o);
 303}
 304
 305void tcg_gen_st_vec(TCGv_vec r, TCGv_ptr b, TCGArg o)
 306{
 307    vec_gen_ldst(INDEX_op_st_vec, r, b, o);
 308}
 309
 310void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr b, TCGArg o, TCGType low_type)
 311{
 312    TCGArg ri = tcgv_vec_arg(r);
 313    TCGArg bi = tcgv_ptr_arg(b);
 314    TCGTemp *rt = arg_temp(ri);
 315    TCGType type = rt->base_type;
 316
 317    tcg_debug_assert(low_type >= TCG_TYPE_V64);
 318    tcg_debug_assert(low_type <= type);
 319    vec_gen_3(INDEX_op_st_vec, low_type, 0, ri, bi, o);
 320}
 321
 322void tcg_gen_and_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 323{
 324    vec_gen_op3(INDEX_op_and_vec, 0, r, a, b);
 325}
 326
 327void tcg_gen_or_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 328{
 329    vec_gen_op3(INDEX_op_or_vec, 0, r, a, b);
 330}
 331
 332void tcg_gen_xor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 333{
 334    vec_gen_op3(INDEX_op_xor_vec, 0, r, a, b);
 335}
 336
 337void tcg_gen_andc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 338{
 339    if (TCG_TARGET_HAS_andc_vec) {
 340        vec_gen_op3(INDEX_op_andc_vec, 0, r, a, b);
 341    } else {
 342        TCGv_vec t = tcg_temp_new_vec_matching(r);
 343        tcg_gen_not_vec(0, t, b);
 344        tcg_gen_and_vec(0, r, a, t);
 345        tcg_temp_free_vec(t);
 346    }
 347}
 348
 349void tcg_gen_orc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 350{
 351    if (TCG_TARGET_HAS_orc_vec) {
 352        vec_gen_op3(INDEX_op_orc_vec, 0, r, a, b);
 353    } else {
 354        TCGv_vec t = tcg_temp_new_vec_matching(r);
 355        tcg_gen_not_vec(0, t, b);
 356        tcg_gen_or_vec(0, r, a, t);
 357        tcg_temp_free_vec(t);
 358    }
 359}
 360
 361void tcg_gen_nand_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 362{
 363    /* TODO: Add TCG_TARGET_HAS_nand_vec when adding a backend supports it. */
 364    tcg_gen_and_vec(0, r, a, b);
 365    tcg_gen_not_vec(0, r, r);
 366}
 367
 368void tcg_gen_nor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 369{
 370    /* TODO: Add TCG_TARGET_HAS_nor_vec when adding a backend supports it. */
 371    tcg_gen_or_vec(0, r, a, b);
 372    tcg_gen_not_vec(0, r, r);
 373}
 374
 375void tcg_gen_eqv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 376{
 377    /* TODO: Add TCG_TARGET_HAS_eqv_vec when adding a backend supports it. */
 378    tcg_gen_xor_vec(0, r, a, b);
 379    tcg_gen_not_vec(0, r, r);
 380}
 381
 382static bool do_op2(unsigned vece, TCGv_vec r, TCGv_vec a, TCGOpcode opc)
 383{
 384    TCGTemp *rt = tcgv_vec_temp(r);
 385    TCGTemp *at = tcgv_vec_temp(a);
 386    TCGArg ri = temp_arg(rt);
 387    TCGArg ai = temp_arg(at);
 388    TCGType type = rt->base_type;
 389    int can;
 390
 391    tcg_debug_assert(at->base_type >= type);
 392    tcg_assert_listed_vecop(opc);
 393    can = tcg_can_emit_vec_op(opc, type, vece);
 394    if (can > 0) {
 395        vec_gen_2(opc, type, vece, ri, ai);
 396    } else if (can < 0) {
 397        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
 398        tcg_expand_vec_op(opc, type, vece, ri, ai);
 399        tcg_swap_vecop_list(hold_list);
 400    } else {
 401        return false;
 402    }
 403    return true;
 404}
 405
 406void tcg_gen_not_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
 407{
 408    const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
 409
 410    if (!TCG_TARGET_HAS_not_vec || !do_op2(vece, r, a, INDEX_op_not_vec)) {
 411        TCGv_vec t = tcg_const_ones_vec_matching(r);
 412        tcg_gen_xor_vec(0, r, a, t);
 413        tcg_temp_free_vec(t);
 414    }
 415    tcg_swap_vecop_list(hold_list);
 416}
 417
 418void tcg_gen_neg_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
 419{
 420    const TCGOpcode *hold_list;
 421
 422    tcg_assert_listed_vecop(INDEX_op_neg_vec);
 423    hold_list = tcg_swap_vecop_list(NULL);
 424
 425    if (!TCG_TARGET_HAS_neg_vec || !do_op2(vece, r, a, INDEX_op_neg_vec)) {
 426        TCGv_vec t = tcg_const_zeros_vec_matching(r);
 427        tcg_gen_sub_vec(vece, r, t, a);
 428        tcg_temp_free_vec(t);
 429    }
 430    tcg_swap_vecop_list(hold_list);
 431}
 432
 433void tcg_gen_abs_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
 434{
 435    const TCGOpcode *hold_list;
 436
 437    tcg_assert_listed_vecop(INDEX_op_abs_vec);
 438    hold_list = tcg_swap_vecop_list(NULL);
 439
 440    if (!do_op2(vece, r, a, INDEX_op_abs_vec)) {
 441        TCGType type = tcgv_vec_temp(r)->base_type;
 442        TCGv_vec t = tcg_temp_new_vec(type);
 443
 444        tcg_debug_assert(tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece));
 445        if (tcg_can_emit_vec_op(INDEX_op_smax_vec, type, vece) > 0) {
 446            tcg_gen_neg_vec(vece, t, a);
 447            tcg_gen_smax_vec(vece, r, a, t);
 448        } else {
 449            if (tcg_can_emit_vec_op(INDEX_op_sari_vec, type, vece) > 0) {
 450                tcg_gen_sari_vec(vece, t, a, (8 << vece) - 1);
 451            } else {
 452                tcg_gen_cmp_vec(TCG_COND_LT, vece, t, a,
 453                                tcg_constant_vec(type, vece, 0));
 454            }
 455            tcg_gen_xor_vec(vece, r, a, t);
 456            tcg_gen_sub_vec(vece, r, r, t);
 457        }
 458
 459        tcg_temp_free_vec(t);
 460    }
 461    tcg_swap_vecop_list(hold_list);
 462}
 463
 464static void do_shifti(TCGOpcode opc, unsigned vece,
 465                      TCGv_vec r, TCGv_vec a, int64_t i)
 466{
 467    TCGTemp *rt = tcgv_vec_temp(r);
 468    TCGTemp *at = tcgv_vec_temp(a);
 469    TCGArg ri = temp_arg(rt);
 470    TCGArg ai = temp_arg(at);
 471    TCGType type = rt->base_type;
 472    int can;
 473
 474    tcg_debug_assert(at->base_type == type);
 475    tcg_debug_assert(i >= 0 && i < (8 << vece));
 476    tcg_assert_listed_vecop(opc);
 477
 478    if (i == 0) {
 479        tcg_gen_mov_vec(r, a);
 480        return;
 481    }
 482
 483    can = tcg_can_emit_vec_op(opc, type, vece);
 484    if (can > 0) {
 485        vec_gen_3(opc, type, vece, ri, ai, i);
 486    } else {
 487        /* We leave the choice of expansion via scalar or vector shift
 488           to the target.  Often, but not always, dupi can feed a vector
 489           shift easier than a scalar.  */
 490        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
 491        tcg_debug_assert(can < 0);
 492        tcg_expand_vec_op(opc, type, vece, ri, ai, i);
 493        tcg_swap_vecop_list(hold_list);
 494    }
 495}
 496
 497void tcg_gen_shli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
 498{
 499    do_shifti(INDEX_op_shli_vec, vece, r, a, i);
 500}
 501
 502void tcg_gen_shri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
 503{
 504    do_shifti(INDEX_op_shri_vec, vece, r, a, i);
 505}
 506
 507void tcg_gen_sari_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
 508{
 509    do_shifti(INDEX_op_sari_vec, vece, r, a, i);
 510}
 511
 512void tcg_gen_rotli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
 513{
 514    do_shifti(INDEX_op_rotli_vec, vece, r, a, i);
 515}
 516
 517void tcg_gen_rotri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
 518{
 519    int bits = 8 << vece;
 520    tcg_debug_assert(i >= 0 && i < bits);
 521    do_shifti(INDEX_op_rotli_vec, vece, r, a, -i & (bits - 1));
 522}
 523
 524void tcg_gen_cmp_vec(TCGCond cond, unsigned vece,
 525                     TCGv_vec r, TCGv_vec a, TCGv_vec b)
 526{
 527    TCGTemp *rt = tcgv_vec_temp(r);
 528    TCGTemp *at = tcgv_vec_temp(a);
 529    TCGTemp *bt = tcgv_vec_temp(b);
 530    TCGArg ri = temp_arg(rt);
 531    TCGArg ai = temp_arg(at);
 532    TCGArg bi = temp_arg(bt);
 533    TCGType type = rt->base_type;
 534    int can;
 535
 536    tcg_debug_assert(at->base_type >= type);
 537    tcg_debug_assert(bt->base_type >= type);
 538    tcg_assert_listed_vecop(INDEX_op_cmp_vec);
 539    can = tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece);
 540    if (can > 0) {
 541        vec_gen_4(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond);
 542    } else {
 543        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
 544        tcg_debug_assert(can < 0);
 545        tcg_expand_vec_op(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond);
 546        tcg_swap_vecop_list(hold_list);
 547    }
 548}
 549
 550static bool do_op3(unsigned vece, TCGv_vec r, TCGv_vec a,
 551                   TCGv_vec b, TCGOpcode opc)
 552{
 553    TCGTemp *rt = tcgv_vec_temp(r);
 554    TCGTemp *at = tcgv_vec_temp(a);
 555    TCGTemp *bt = tcgv_vec_temp(b);
 556    TCGArg ri = temp_arg(rt);
 557    TCGArg ai = temp_arg(at);
 558    TCGArg bi = temp_arg(bt);
 559    TCGType type = rt->base_type;
 560    int can;
 561
 562    tcg_debug_assert(at->base_type >= type);
 563    tcg_debug_assert(bt->base_type >= type);
 564    tcg_assert_listed_vecop(opc);
 565    can = tcg_can_emit_vec_op(opc, type, vece);
 566    if (can > 0) {
 567        vec_gen_3(opc, type, vece, ri, ai, bi);
 568    } else if (can < 0) {
 569        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
 570        tcg_expand_vec_op(opc, type, vece, ri, ai, bi);
 571        tcg_swap_vecop_list(hold_list);
 572    } else {
 573        return false;
 574    }
 575    return true;
 576}
 577
 578static void do_op3_nofail(unsigned vece, TCGv_vec r, TCGv_vec a,
 579                          TCGv_vec b, TCGOpcode opc)
 580{
 581    bool ok = do_op3(vece, r, a, b, opc);
 582    tcg_debug_assert(ok);
 583}
 584
 585void tcg_gen_add_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 586{
 587    do_op3_nofail(vece, r, a, b, INDEX_op_add_vec);
 588}
 589
 590void tcg_gen_sub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 591{
 592    do_op3_nofail(vece, r, a, b, INDEX_op_sub_vec);
 593}
 594
 595void tcg_gen_mul_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 596{
 597    do_op3_nofail(vece, r, a, b, INDEX_op_mul_vec);
 598}
 599
 600void tcg_gen_ssadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 601{
 602    do_op3_nofail(vece, r, a, b, INDEX_op_ssadd_vec);
 603}
 604
 605void tcg_gen_usadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 606{
 607    do_op3_nofail(vece, r, a, b, INDEX_op_usadd_vec);
 608}
 609
 610void tcg_gen_sssub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 611{
 612    do_op3_nofail(vece, r, a, b, INDEX_op_sssub_vec);
 613}
 614
 615void tcg_gen_ussub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 616{
 617    do_op3_nofail(vece, r, a, b, INDEX_op_ussub_vec);
 618}
 619
 620static void do_minmax(unsigned vece, TCGv_vec r, TCGv_vec a,
 621                      TCGv_vec b, TCGOpcode opc, TCGCond cond)
 622{
 623    if (!do_op3(vece, r, a, b, opc)) {
 624        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
 625        tcg_gen_cmpsel_vec(cond, vece, r, a, b, a, b);
 626        tcg_swap_vecop_list(hold_list);
 627    }
 628}
 629
 630void tcg_gen_smin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 631{
 632    do_minmax(vece, r, a, b, INDEX_op_smin_vec, TCG_COND_LT);
 633}
 634
 635void tcg_gen_umin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 636{
 637    do_minmax(vece, r, a, b, INDEX_op_umin_vec, TCG_COND_LTU);
 638}
 639
 640void tcg_gen_smax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 641{
 642    do_minmax(vece, r, a, b, INDEX_op_smax_vec, TCG_COND_GT);
 643}
 644
 645void tcg_gen_umax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 646{
 647    do_minmax(vece, r, a, b, INDEX_op_umax_vec, TCG_COND_GTU);
 648}
 649
 650void tcg_gen_shlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 651{
 652    do_op3_nofail(vece, r, a, b, INDEX_op_shlv_vec);
 653}
 654
 655void tcg_gen_shrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 656{
 657    do_op3_nofail(vece, r, a, b, INDEX_op_shrv_vec);
 658}
 659
 660void tcg_gen_sarv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 661{
 662    do_op3_nofail(vece, r, a, b, INDEX_op_sarv_vec);
 663}
 664
 665void tcg_gen_rotlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 666{
 667    do_op3_nofail(vece, r, a, b, INDEX_op_rotlv_vec);
 668}
 669
 670void tcg_gen_rotrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 671{
 672    do_op3_nofail(vece, r, a, b, INDEX_op_rotrv_vec);
 673}
 674
 675static void do_shifts(unsigned vece, TCGv_vec r, TCGv_vec a,
 676                      TCGv_i32 s, TCGOpcode opc)
 677{
 678    TCGTemp *rt = tcgv_vec_temp(r);
 679    TCGTemp *at = tcgv_vec_temp(a);
 680    TCGTemp *st = tcgv_i32_temp(s);
 681    TCGArg ri = temp_arg(rt);
 682    TCGArg ai = temp_arg(at);
 683    TCGArg si = temp_arg(st);
 684    TCGType type = rt->base_type;
 685    int can;
 686
 687    tcg_debug_assert(at->base_type >= type);
 688    tcg_assert_listed_vecop(opc);
 689    can = tcg_can_emit_vec_op(opc, type, vece);
 690    if (can > 0) {
 691        vec_gen_3(opc, type, vece, ri, ai, si);
 692    } else if (can < 0) {
 693        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
 694        tcg_expand_vec_op(opc, type, vece, ri, ai, si);
 695        tcg_swap_vecop_list(hold_list);
 696    } else {
 697        g_assert_not_reached();
 698    }
 699}
 700
 701void tcg_gen_shls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
 702{
 703    do_shifts(vece, r, a, b, INDEX_op_shls_vec);
 704}
 705
 706void tcg_gen_shrs_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
 707{
 708    do_shifts(vece, r, a, b, INDEX_op_shrs_vec);
 709}
 710
 711void tcg_gen_sars_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
 712{
 713    do_shifts(vece, r, a, b, INDEX_op_sars_vec);
 714}
 715
 716void tcg_gen_rotls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s)
 717{
 718    do_shifts(vece, r, a, s, INDEX_op_rotls_vec);
 719}
 720
 721void tcg_gen_bitsel_vec(unsigned vece, TCGv_vec r, TCGv_vec a,
 722                        TCGv_vec b, TCGv_vec c)
 723{
 724    TCGTemp *rt = tcgv_vec_temp(r);
 725    TCGTemp *at = tcgv_vec_temp(a);
 726    TCGTemp *bt = tcgv_vec_temp(b);
 727    TCGTemp *ct = tcgv_vec_temp(c);
 728    TCGType type = rt->base_type;
 729
 730    tcg_debug_assert(at->base_type >= type);
 731    tcg_debug_assert(bt->base_type >= type);
 732    tcg_debug_assert(ct->base_type >= type);
 733
 734    if (TCG_TARGET_HAS_bitsel_vec) {
 735        vec_gen_4(INDEX_op_bitsel_vec, type, MO_8,
 736                  temp_arg(rt), temp_arg(at), temp_arg(bt), temp_arg(ct));
 737    } else {
 738        TCGv_vec t = tcg_temp_new_vec(type);
 739        tcg_gen_and_vec(MO_8, t, a, b);
 740        tcg_gen_andc_vec(MO_8, r, c, a);
 741        tcg_gen_or_vec(MO_8, r, r, t);
 742        tcg_temp_free_vec(t);
 743    }
 744}
 745
 746void tcg_gen_cmpsel_vec(TCGCond cond, unsigned vece, TCGv_vec r,
 747                        TCGv_vec a, TCGv_vec b, TCGv_vec c, TCGv_vec d)
 748{
 749    TCGTemp *rt = tcgv_vec_temp(r);
 750    TCGTemp *at = tcgv_vec_temp(a);
 751    TCGTemp *bt = tcgv_vec_temp(b);
 752    TCGTemp *ct = tcgv_vec_temp(c);
 753    TCGTemp *dt = tcgv_vec_temp(d);
 754    TCGArg ri = temp_arg(rt);
 755    TCGArg ai = temp_arg(at);
 756    TCGArg bi = temp_arg(bt);
 757    TCGArg ci = temp_arg(ct);
 758    TCGArg di = temp_arg(dt);
 759    TCGType type = rt->base_type;
 760    const TCGOpcode *hold_list;
 761    int can;
 762
 763    tcg_debug_assert(at->base_type >= type);
 764    tcg_debug_assert(bt->base_type >= type);
 765    tcg_debug_assert(ct->base_type >= type);
 766    tcg_debug_assert(dt->base_type >= type);
 767
 768    tcg_assert_listed_vecop(INDEX_op_cmpsel_vec);
 769    hold_list = tcg_swap_vecop_list(NULL);
 770    can = tcg_can_emit_vec_op(INDEX_op_cmpsel_vec, type, vece);
 771
 772    if (can > 0) {
 773        vec_gen_6(INDEX_op_cmpsel_vec, type, vece, ri, ai, bi, ci, di, cond);
 774    } else if (can < 0) {
 775        tcg_expand_vec_op(INDEX_op_cmpsel_vec, type, vece,
 776                          ri, ai, bi, ci, di, cond);
 777    } else {
 778        TCGv_vec t = tcg_temp_new_vec(type);
 779        tcg_gen_cmp_vec(cond, vece, t, a, b);
 780        tcg_gen_bitsel_vec(vece, r, t, c, d);
 781        tcg_temp_free_vec(t);
 782    }
 783    tcg_swap_vecop_list(hold_list);
 784}
 785