qemu/tcg/tcg-op-vec.c
<<
>>
Prefs
   1/*
   2 * Tiny Code Generator for QEMU
   3 *
   4 * Copyright (c) 2018 Linaro, Inc.
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2.1 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include "tcg/tcg.h"
  22#include "tcg/tcg-op.h"
  23#include "tcg/tcg-mo.h"
  24
  25/* Reduce the number of ifdefs below.  This assumes that all uses of
  26   TCGV_HIGH and TCGV_LOW are properly protected by a conditional that
  27   the compiler can eliminate.  */
  28#if TCG_TARGET_REG_BITS == 64
  29extern TCGv_i32 TCGV_LOW_link_error(TCGv_i64);
  30extern TCGv_i32 TCGV_HIGH_link_error(TCGv_i64);
  31#define TCGV_LOW  TCGV_LOW_link_error
  32#define TCGV_HIGH TCGV_HIGH_link_error
  33#endif
  34
  35/*
  36 * Vector optional opcode tracking.
  37 * Except for the basic logical operations (and, or, xor), and
  38 * data movement (mov, ld, st, dupi), many vector opcodes are
  39 * optional and may not be supported on the host.  Thank Intel
  40 * for the irregularity in their instruction set.
  41 *
  42 * The gvec expanders allow custom vector operations to be composed,
  43 * generally via the .fniv callback in the GVecGen* structures.  At
  44 * the same time, in deciding whether to use this hook we need to
  45 * know if the host supports the required operations.  This is
  46 * presented as an array of opcodes, terminated by 0.  Each opcode
  47 * is assumed to be expanded with the given VECE.
  48 *
  49 * For debugging, we want to validate this array.  Therefore, when
  50 * tcg_ctx->vec_opt_opc is non-NULL, the tcg_gen_*_vec expanders
  51 * will validate that their opcode is present in the list.
  52 */
  53#ifdef CONFIG_DEBUG_TCG
  54void tcg_assert_listed_vecop(TCGOpcode op)
  55{
  56    const TCGOpcode *p = tcg_ctx->vecop_list;
  57    if (p) {
  58        for (; *p; ++p) {
  59            if (*p == op) {
  60                return;
  61            }
  62        }
  63        g_assert_not_reached();
  64    }
  65}
  66#endif
  67
  68bool tcg_can_emit_vecop_list(const TCGOpcode *list,
  69                             TCGType type, unsigned vece)
  70{
  71    if (list == NULL) {
  72        return true;
  73    }
  74
  75    for (; *list; ++list) {
  76        TCGOpcode opc = *list;
  77
  78#ifdef CONFIG_DEBUG_TCG
  79        switch (opc) {
  80        case INDEX_op_and_vec:
  81        case INDEX_op_or_vec:
  82        case INDEX_op_xor_vec:
  83        case INDEX_op_mov_vec:
  84        case INDEX_op_dup_vec:
  85        case INDEX_op_dup2_vec:
  86        case INDEX_op_ld_vec:
  87        case INDEX_op_st_vec:
  88        case INDEX_op_bitsel_vec:
  89            /* These opcodes are mandatory and should not be listed.  */
  90            g_assert_not_reached();
  91        case INDEX_op_not_vec:
  92            /* These opcodes have generic expansions using the above.  */
  93            g_assert_not_reached();
  94        default:
  95            break;
  96        }
  97#endif
  98
  99        if (tcg_can_emit_vec_op(opc, type, vece)) {
 100            continue;
 101        }
 102
 103        /*
 104         * The opcode list is created by front ends based on what they
 105         * actually invoke.  We must mirror the logic in the routines
 106         * below for generic expansions using other opcodes.
 107         */
 108        switch (opc) {
 109        case INDEX_op_neg_vec:
 110            if (tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece)) {
 111                continue;
 112            }
 113            break;
 114        case INDEX_op_abs_vec:
 115            if (tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece)
 116                && (tcg_can_emit_vec_op(INDEX_op_smax_vec, type, vece) > 0
 117                    || tcg_can_emit_vec_op(INDEX_op_sari_vec, type, vece) > 0
 118                    || tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece))) {
 119                continue;
 120            }
 121            break;
 122        case INDEX_op_usadd_vec:
 123            if (tcg_can_emit_vec_op(INDEX_op_umin_vec, type, vece) ||
 124                tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) {
 125                continue;
 126            }
 127            break;
 128        case INDEX_op_ussub_vec:
 129            if (tcg_can_emit_vec_op(INDEX_op_umax_vec, type, vece) ||
 130                tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) {
 131                continue;
 132            }
 133            break;
 134        case INDEX_op_cmpsel_vec:
 135        case INDEX_op_smin_vec:
 136        case INDEX_op_smax_vec:
 137        case INDEX_op_umin_vec:
 138        case INDEX_op_umax_vec:
 139            if (tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) {
 140                continue;
 141            }
 142            break;
 143        default:
 144            break;
 145        }
 146        return false;
 147    }
 148    return true;
 149}
 150
 151void vec_gen_2(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a)
 152{
 153    TCGOp *op = tcg_emit_op(opc);
 154    TCGOP_VECL(op) = type - TCG_TYPE_V64;
 155    TCGOP_VECE(op) = vece;
 156    op->args[0] = r;
 157    op->args[1] = a;
 158}
 159
 160void vec_gen_3(TCGOpcode opc, TCGType type, unsigned vece,
 161               TCGArg r, TCGArg a, TCGArg b)
 162{
 163    TCGOp *op = tcg_emit_op(opc);
 164    TCGOP_VECL(op) = type - TCG_TYPE_V64;
 165    TCGOP_VECE(op) = vece;
 166    op->args[0] = r;
 167    op->args[1] = a;
 168    op->args[2] = b;
 169}
 170
 171void vec_gen_4(TCGOpcode opc, TCGType type, unsigned vece,
 172               TCGArg r, TCGArg a, TCGArg b, TCGArg c)
 173{
 174    TCGOp *op = tcg_emit_op(opc);
 175    TCGOP_VECL(op) = type - TCG_TYPE_V64;
 176    TCGOP_VECE(op) = vece;
 177    op->args[0] = r;
 178    op->args[1] = a;
 179    op->args[2] = b;
 180    op->args[3] = c;
 181}
 182
 183static void vec_gen_6(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r,
 184                      TCGArg a, TCGArg b, TCGArg c, TCGArg d, TCGArg e)
 185{
 186    TCGOp *op = tcg_emit_op(opc);
 187    TCGOP_VECL(op) = type - TCG_TYPE_V64;
 188    TCGOP_VECE(op) = vece;
 189    op->args[0] = r;
 190    op->args[1] = a;
 191    op->args[2] = b;
 192    op->args[3] = c;
 193    op->args[4] = d;
 194    op->args[5] = e;
 195}
 196
 197static void vec_gen_op2(TCGOpcode opc, unsigned vece, TCGv_vec r, TCGv_vec a)
 198{
 199    TCGTemp *rt = tcgv_vec_temp(r);
 200    TCGTemp *at = tcgv_vec_temp(a);
 201    TCGType type = rt->base_type;
 202
 203    /* Must enough inputs for the output.  */
 204    tcg_debug_assert(at->base_type >= type);
 205    vec_gen_2(opc, type, vece, temp_arg(rt), temp_arg(at));
 206}
 207
 208static void vec_gen_op3(TCGOpcode opc, unsigned vece,
 209                        TCGv_vec r, TCGv_vec a, TCGv_vec b)
 210{
 211    TCGTemp *rt = tcgv_vec_temp(r);
 212    TCGTemp *at = tcgv_vec_temp(a);
 213    TCGTemp *bt = tcgv_vec_temp(b);
 214    TCGType type = rt->base_type;
 215
 216    /* Must enough inputs for the output.  */
 217    tcg_debug_assert(at->base_type >= type);
 218    tcg_debug_assert(bt->base_type >= type);
 219    vec_gen_3(opc, type, vece, temp_arg(rt), temp_arg(at), temp_arg(bt));
 220}
 221
 222void tcg_gen_mov_vec(TCGv_vec r, TCGv_vec a)
 223{
 224    if (r != a) {
 225        vec_gen_op2(INDEX_op_mov_vec, 0, r, a);
 226    }
 227}
 228
 229TCGv_vec tcg_const_zeros_vec(TCGType type)
 230{
 231    TCGv_vec ret = tcg_temp_new_vec(type);
 232    tcg_gen_dupi_vec(MO_64, ret, 0);
 233    return ret;
 234}
 235
 236TCGv_vec tcg_const_ones_vec(TCGType type)
 237{
 238    TCGv_vec ret = tcg_temp_new_vec(type);
 239    tcg_gen_dupi_vec(MO_64, ret, -1);
 240    return ret;
 241}
 242
 243TCGv_vec tcg_const_zeros_vec_matching(TCGv_vec m)
 244{
 245    TCGTemp *t = tcgv_vec_temp(m);
 246    return tcg_const_zeros_vec(t->base_type);
 247}
 248
 249TCGv_vec tcg_const_ones_vec_matching(TCGv_vec m)
 250{
 251    TCGTemp *t = tcgv_vec_temp(m);
 252    return tcg_const_ones_vec(t->base_type);
 253}
 254
 255void tcg_gen_dupi_vec(unsigned vece, TCGv_vec r, uint64_t a)
 256{
 257    TCGTemp *rt = tcgv_vec_temp(r);
 258    tcg_gen_mov_vec(r, tcg_constant_vec(rt->base_type, vece, a));
 259}
 260
 261void tcg_gen_dup_i64_vec(unsigned vece, TCGv_vec r, TCGv_i64 a)
 262{
 263    TCGArg ri = tcgv_vec_arg(r);
 264    TCGTemp *rt = arg_temp(ri);
 265    TCGType type = rt->base_type;
 266
 267    if (TCG_TARGET_REG_BITS == 64) {
 268        TCGArg ai = tcgv_i64_arg(a);
 269        vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
 270    } else if (vece == MO_64) {
 271        TCGArg al = tcgv_i32_arg(TCGV_LOW(a));
 272        TCGArg ah = tcgv_i32_arg(TCGV_HIGH(a));
 273        vec_gen_3(INDEX_op_dup2_vec, type, MO_64, ri, al, ah);
 274    } else {
 275        TCGArg ai = tcgv_i32_arg(TCGV_LOW(a));
 276        vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
 277    }
 278}
 279
 280void tcg_gen_dup_i32_vec(unsigned vece, TCGv_vec r, TCGv_i32 a)
 281{
 282    TCGArg ri = tcgv_vec_arg(r);
 283    TCGArg ai = tcgv_i32_arg(a);
 284    TCGTemp *rt = arg_temp(ri);
 285    TCGType type = rt->base_type;
 286
 287    vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
 288}
 289
 290void tcg_gen_dup_mem_vec(unsigned vece, TCGv_vec r, TCGv_ptr b,
 291                         tcg_target_long ofs)
 292{
 293    TCGArg ri = tcgv_vec_arg(r);
 294    TCGArg bi = tcgv_ptr_arg(b);
 295    TCGTemp *rt = arg_temp(ri);
 296    TCGType type = rt->base_type;
 297
 298    vec_gen_3(INDEX_op_dupm_vec, type, vece, ri, bi, ofs);
 299}
 300
 301static void vec_gen_ldst(TCGOpcode opc, TCGv_vec r, TCGv_ptr b, TCGArg o)
 302{
 303    TCGArg ri = tcgv_vec_arg(r);
 304    TCGArg bi = tcgv_ptr_arg(b);
 305    TCGTemp *rt = arg_temp(ri);
 306    TCGType type = rt->base_type;
 307
 308    vec_gen_3(opc, type, 0, ri, bi, o);
 309}
 310
 311void tcg_gen_ld_vec(TCGv_vec r, TCGv_ptr b, TCGArg o)
 312{
 313    vec_gen_ldst(INDEX_op_ld_vec, r, b, o);
 314}
 315
 316void tcg_gen_st_vec(TCGv_vec r, TCGv_ptr b, TCGArg o)
 317{
 318    vec_gen_ldst(INDEX_op_st_vec, r, b, o);
 319}
 320
 321void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr b, TCGArg o, TCGType low_type)
 322{
 323    TCGArg ri = tcgv_vec_arg(r);
 324    TCGArg bi = tcgv_ptr_arg(b);
 325    TCGTemp *rt = arg_temp(ri);
 326    TCGType type = rt->base_type;
 327
 328    tcg_debug_assert(low_type >= TCG_TYPE_V64);
 329    tcg_debug_assert(low_type <= type);
 330    vec_gen_3(INDEX_op_st_vec, low_type, 0, ri, bi, o);
 331}
 332
 333void tcg_gen_and_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 334{
 335    vec_gen_op3(INDEX_op_and_vec, 0, r, a, b);
 336}
 337
 338void tcg_gen_or_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 339{
 340    vec_gen_op3(INDEX_op_or_vec, 0, r, a, b);
 341}
 342
 343void tcg_gen_xor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 344{
 345    vec_gen_op3(INDEX_op_xor_vec, 0, r, a, b);
 346}
 347
 348void tcg_gen_andc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 349{
 350    if (TCG_TARGET_HAS_andc_vec) {
 351        vec_gen_op3(INDEX_op_andc_vec, 0, r, a, b);
 352    } else {
 353        TCGv_vec t = tcg_temp_new_vec_matching(r);
 354        tcg_gen_not_vec(0, t, b);
 355        tcg_gen_and_vec(0, r, a, t);
 356        tcg_temp_free_vec(t);
 357    }
 358}
 359
 360void tcg_gen_orc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 361{
 362    if (TCG_TARGET_HAS_orc_vec) {
 363        vec_gen_op3(INDEX_op_orc_vec, 0, r, a, b);
 364    } else {
 365        TCGv_vec t = tcg_temp_new_vec_matching(r);
 366        tcg_gen_not_vec(0, t, b);
 367        tcg_gen_or_vec(0, r, a, t);
 368        tcg_temp_free_vec(t);
 369    }
 370}
 371
 372void tcg_gen_nand_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 373{
 374    if (TCG_TARGET_HAS_nand_vec) {
 375        vec_gen_op3(INDEX_op_nand_vec, 0, r, a, b);
 376    } else {
 377        tcg_gen_and_vec(0, r, a, b);
 378        tcg_gen_not_vec(0, r, r);
 379    }
 380}
 381
 382void tcg_gen_nor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 383{
 384    if (TCG_TARGET_HAS_nor_vec) {
 385        vec_gen_op3(INDEX_op_nor_vec, 0, r, a, b);
 386    } else {
 387        tcg_gen_or_vec(0, r, a, b);
 388        tcg_gen_not_vec(0, r, r);
 389    }
 390}
 391
 392void tcg_gen_eqv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 393{
 394    if (TCG_TARGET_HAS_eqv_vec) {
 395        vec_gen_op3(INDEX_op_eqv_vec, 0, r, a, b);
 396    } else {
 397        tcg_gen_xor_vec(0, r, a, b);
 398        tcg_gen_not_vec(0, r, r);
 399    }
 400}
 401
 402static bool do_op2(unsigned vece, TCGv_vec r, TCGv_vec a, TCGOpcode opc)
 403{
 404    TCGTemp *rt = tcgv_vec_temp(r);
 405    TCGTemp *at = tcgv_vec_temp(a);
 406    TCGArg ri = temp_arg(rt);
 407    TCGArg ai = temp_arg(at);
 408    TCGType type = rt->base_type;
 409    int can;
 410
 411    tcg_debug_assert(at->base_type >= type);
 412    tcg_assert_listed_vecop(opc);
 413    can = tcg_can_emit_vec_op(opc, type, vece);
 414    if (can > 0) {
 415        vec_gen_2(opc, type, vece, ri, ai);
 416    } else if (can < 0) {
 417        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
 418        tcg_expand_vec_op(opc, type, vece, ri, ai);
 419        tcg_swap_vecop_list(hold_list);
 420    } else {
 421        return false;
 422    }
 423    return true;
 424}
 425
 426void tcg_gen_not_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
 427{
 428    const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
 429
 430    if (!TCG_TARGET_HAS_not_vec || !do_op2(vece, r, a, INDEX_op_not_vec)) {
 431        TCGv_vec t = tcg_const_ones_vec_matching(r);
 432        tcg_gen_xor_vec(0, r, a, t);
 433        tcg_temp_free_vec(t);
 434    }
 435    tcg_swap_vecop_list(hold_list);
 436}
 437
 438void tcg_gen_neg_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
 439{
 440    const TCGOpcode *hold_list;
 441
 442    tcg_assert_listed_vecop(INDEX_op_neg_vec);
 443    hold_list = tcg_swap_vecop_list(NULL);
 444
 445    if (!TCG_TARGET_HAS_neg_vec || !do_op2(vece, r, a, INDEX_op_neg_vec)) {
 446        TCGv_vec t = tcg_const_zeros_vec_matching(r);
 447        tcg_gen_sub_vec(vece, r, t, a);
 448        tcg_temp_free_vec(t);
 449    }
 450    tcg_swap_vecop_list(hold_list);
 451}
 452
 453void tcg_gen_abs_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
 454{
 455    const TCGOpcode *hold_list;
 456
 457    tcg_assert_listed_vecop(INDEX_op_abs_vec);
 458    hold_list = tcg_swap_vecop_list(NULL);
 459
 460    if (!do_op2(vece, r, a, INDEX_op_abs_vec)) {
 461        TCGType type = tcgv_vec_temp(r)->base_type;
 462        TCGv_vec t = tcg_temp_new_vec(type);
 463
 464        tcg_debug_assert(tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece));
 465        if (tcg_can_emit_vec_op(INDEX_op_smax_vec, type, vece) > 0) {
 466            tcg_gen_neg_vec(vece, t, a);
 467            tcg_gen_smax_vec(vece, r, a, t);
 468        } else {
 469            if (tcg_can_emit_vec_op(INDEX_op_sari_vec, type, vece) > 0) {
 470                tcg_gen_sari_vec(vece, t, a, (8 << vece) - 1);
 471            } else {
 472                tcg_gen_cmp_vec(TCG_COND_LT, vece, t, a,
 473                                tcg_constant_vec(type, vece, 0));
 474            }
 475            tcg_gen_xor_vec(vece, r, a, t);
 476            tcg_gen_sub_vec(vece, r, r, t);
 477        }
 478
 479        tcg_temp_free_vec(t);
 480    }
 481    tcg_swap_vecop_list(hold_list);
 482}
 483
 484static void do_shifti(TCGOpcode opc, unsigned vece,
 485                      TCGv_vec r, TCGv_vec a, int64_t i)
 486{
 487    TCGTemp *rt = tcgv_vec_temp(r);
 488    TCGTemp *at = tcgv_vec_temp(a);
 489    TCGArg ri = temp_arg(rt);
 490    TCGArg ai = temp_arg(at);
 491    TCGType type = rt->base_type;
 492    int can;
 493
 494    tcg_debug_assert(at->base_type == type);
 495    tcg_debug_assert(i >= 0 && i < (8 << vece));
 496    tcg_assert_listed_vecop(opc);
 497
 498    if (i == 0) {
 499        tcg_gen_mov_vec(r, a);
 500        return;
 501    }
 502
 503    can = tcg_can_emit_vec_op(opc, type, vece);
 504    if (can > 0) {
 505        vec_gen_3(opc, type, vece, ri, ai, i);
 506    } else {
 507        /* We leave the choice of expansion via scalar or vector shift
 508           to the target.  Often, but not always, dupi can feed a vector
 509           shift easier than a scalar.  */
 510        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
 511        tcg_debug_assert(can < 0);
 512        tcg_expand_vec_op(opc, type, vece, ri, ai, i);
 513        tcg_swap_vecop_list(hold_list);
 514    }
 515}
 516
 517void tcg_gen_shli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
 518{
 519    do_shifti(INDEX_op_shli_vec, vece, r, a, i);
 520}
 521
 522void tcg_gen_shri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
 523{
 524    do_shifti(INDEX_op_shri_vec, vece, r, a, i);
 525}
 526
 527void tcg_gen_sari_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
 528{
 529    do_shifti(INDEX_op_sari_vec, vece, r, a, i);
 530}
 531
 532void tcg_gen_rotli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
 533{
 534    do_shifti(INDEX_op_rotli_vec, vece, r, a, i);
 535}
 536
 537void tcg_gen_rotri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
 538{
 539    int bits = 8 << vece;
 540    tcg_debug_assert(i >= 0 && i < bits);
 541    do_shifti(INDEX_op_rotli_vec, vece, r, a, -i & (bits - 1));
 542}
 543
 544void tcg_gen_cmp_vec(TCGCond cond, unsigned vece,
 545                     TCGv_vec r, TCGv_vec a, TCGv_vec b)
 546{
 547    TCGTemp *rt = tcgv_vec_temp(r);
 548    TCGTemp *at = tcgv_vec_temp(a);
 549    TCGTemp *bt = tcgv_vec_temp(b);
 550    TCGArg ri = temp_arg(rt);
 551    TCGArg ai = temp_arg(at);
 552    TCGArg bi = temp_arg(bt);
 553    TCGType type = rt->base_type;
 554    int can;
 555
 556    tcg_debug_assert(at->base_type >= type);
 557    tcg_debug_assert(bt->base_type >= type);
 558    tcg_assert_listed_vecop(INDEX_op_cmp_vec);
 559    can = tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece);
 560    if (can > 0) {
 561        vec_gen_4(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond);
 562    } else {
 563        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
 564        tcg_debug_assert(can < 0);
 565        tcg_expand_vec_op(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond);
 566        tcg_swap_vecop_list(hold_list);
 567    }
 568}
 569
 570static bool do_op3(unsigned vece, TCGv_vec r, TCGv_vec a,
 571                   TCGv_vec b, TCGOpcode opc)
 572{
 573    TCGTemp *rt = tcgv_vec_temp(r);
 574    TCGTemp *at = tcgv_vec_temp(a);
 575    TCGTemp *bt = tcgv_vec_temp(b);
 576    TCGArg ri = temp_arg(rt);
 577    TCGArg ai = temp_arg(at);
 578    TCGArg bi = temp_arg(bt);
 579    TCGType type = rt->base_type;
 580    int can;
 581
 582    tcg_debug_assert(at->base_type >= type);
 583    tcg_debug_assert(bt->base_type >= type);
 584    tcg_assert_listed_vecop(opc);
 585    can = tcg_can_emit_vec_op(opc, type, vece);
 586    if (can > 0) {
 587        vec_gen_3(opc, type, vece, ri, ai, bi);
 588    } else if (can < 0) {
 589        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
 590        tcg_expand_vec_op(opc, type, vece, ri, ai, bi);
 591        tcg_swap_vecop_list(hold_list);
 592    } else {
 593        return false;
 594    }
 595    return true;
 596}
 597
 598static void do_op3_nofail(unsigned vece, TCGv_vec r, TCGv_vec a,
 599                          TCGv_vec b, TCGOpcode opc)
 600{
 601    bool ok = do_op3(vece, r, a, b, opc);
 602    tcg_debug_assert(ok);
 603}
 604
 605void tcg_gen_add_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 606{
 607    do_op3_nofail(vece, r, a, b, INDEX_op_add_vec);
 608}
 609
 610void tcg_gen_sub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 611{
 612    do_op3_nofail(vece, r, a, b, INDEX_op_sub_vec);
 613}
 614
 615void tcg_gen_mul_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 616{
 617    do_op3_nofail(vece, r, a, b, INDEX_op_mul_vec);
 618}
 619
 620void tcg_gen_ssadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 621{
 622    do_op3_nofail(vece, r, a, b, INDEX_op_ssadd_vec);
 623}
 624
 625void tcg_gen_usadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 626{
 627    if (!do_op3(vece, r, a, b, INDEX_op_usadd_vec)) {
 628        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
 629        TCGv_vec t = tcg_temp_new_vec_matching(r);
 630
 631        /* usadd(a, b) = min(a, ~b) + b */
 632        tcg_gen_not_vec(vece, t, b);
 633        tcg_gen_umin_vec(vece, t, t, a);
 634        tcg_gen_add_vec(vece, r, t, b);
 635
 636        tcg_temp_free_vec(t);
 637        tcg_swap_vecop_list(hold_list);
 638    }
 639}
 640
 641void tcg_gen_sssub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 642{
 643    do_op3_nofail(vece, r, a, b, INDEX_op_sssub_vec);
 644}
 645
 646void tcg_gen_ussub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 647{
 648    if (!do_op3(vece, r, a, b, INDEX_op_ussub_vec)) {
 649        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
 650        TCGv_vec t = tcg_temp_new_vec_matching(r);
 651
 652        /* ussub(a, b) = max(a, b) - b */
 653        tcg_gen_umax_vec(vece, t, a, b);
 654        tcg_gen_sub_vec(vece, r, t, b);
 655
 656        tcg_temp_free_vec(t);
 657        tcg_swap_vecop_list(hold_list);
 658    }
 659}
 660
 661static void do_minmax(unsigned vece, TCGv_vec r, TCGv_vec a,
 662                      TCGv_vec b, TCGOpcode opc, TCGCond cond)
 663{
 664    if (!do_op3(vece, r, a, b, opc)) {
 665        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
 666        tcg_gen_cmpsel_vec(cond, vece, r, a, b, a, b);
 667        tcg_swap_vecop_list(hold_list);
 668    }
 669}
 670
 671void tcg_gen_smin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 672{
 673    do_minmax(vece, r, a, b, INDEX_op_smin_vec, TCG_COND_LT);
 674}
 675
 676void tcg_gen_umin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 677{
 678    do_minmax(vece, r, a, b, INDEX_op_umin_vec, TCG_COND_LTU);
 679}
 680
 681void tcg_gen_smax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 682{
 683    do_minmax(vece, r, a, b, INDEX_op_smax_vec, TCG_COND_GT);
 684}
 685
 686void tcg_gen_umax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 687{
 688    do_minmax(vece, r, a, b, INDEX_op_umax_vec, TCG_COND_GTU);
 689}
 690
 691void tcg_gen_shlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 692{
 693    do_op3_nofail(vece, r, a, b, INDEX_op_shlv_vec);
 694}
 695
 696void tcg_gen_shrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 697{
 698    do_op3_nofail(vece, r, a, b, INDEX_op_shrv_vec);
 699}
 700
 701void tcg_gen_sarv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 702{
 703    do_op3_nofail(vece, r, a, b, INDEX_op_sarv_vec);
 704}
 705
 706void tcg_gen_rotlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 707{
 708    do_op3_nofail(vece, r, a, b, INDEX_op_rotlv_vec);
 709}
 710
 711void tcg_gen_rotrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 712{
 713    do_op3_nofail(vece, r, a, b, INDEX_op_rotrv_vec);
 714}
 715
 716static void do_shifts(unsigned vece, TCGv_vec r, TCGv_vec a,
 717                      TCGv_i32 s, TCGOpcode opc)
 718{
 719    TCGTemp *rt = tcgv_vec_temp(r);
 720    TCGTemp *at = tcgv_vec_temp(a);
 721    TCGTemp *st = tcgv_i32_temp(s);
 722    TCGArg ri = temp_arg(rt);
 723    TCGArg ai = temp_arg(at);
 724    TCGArg si = temp_arg(st);
 725    TCGType type = rt->base_type;
 726    int can;
 727
 728    tcg_debug_assert(at->base_type >= type);
 729    tcg_assert_listed_vecop(opc);
 730    can = tcg_can_emit_vec_op(opc, type, vece);
 731    if (can > 0) {
 732        vec_gen_3(opc, type, vece, ri, ai, si);
 733    } else if (can < 0) {
 734        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
 735        tcg_expand_vec_op(opc, type, vece, ri, ai, si);
 736        tcg_swap_vecop_list(hold_list);
 737    } else {
 738        g_assert_not_reached();
 739    }
 740}
 741
 742void tcg_gen_shls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
 743{
 744    do_shifts(vece, r, a, b, INDEX_op_shls_vec);
 745}
 746
 747void tcg_gen_shrs_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
 748{
 749    do_shifts(vece, r, a, b, INDEX_op_shrs_vec);
 750}
 751
 752void tcg_gen_sars_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
 753{
 754    do_shifts(vece, r, a, b, INDEX_op_sars_vec);
 755}
 756
 757void tcg_gen_rotls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s)
 758{
 759    do_shifts(vece, r, a, s, INDEX_op_rotls_vec);
 760}
 761
 762void tcg_gen_bitsel_vec(unsigned vece, TCGv_vec r, TCGv_vec a,
 763                        TCGv_vec b, TCGv_vec c)
 764{
 765    TCGTemp *rt = tcgv_vec_temp(r);
 766    TCGTemp *at = tcgv_vec_temp(a);
 767    TCGTemp *bt = tcgv_vec_temp(b);
 768    TCGTemp *ct = tcgv_vec_temp(c);
 769    TCGType type = rt->base_type;
 770
 771    tcg_debug_assert(at->base_type >= type);
 772    tcg_debug_assert(bt->base_type >= type);
 773    tcg_debug_assert(ct->base_type >= type);
 774
 775    if (TCG_TARGET_HAS_bitsel_vec) {
 776        vec_gen_4(INDEX_op_bitsel_vec, type, MO_8,
 777                  temp_arg(rt), temp_arg(at), temp_arg(bt), temp_arg(ct));
 778    } else {
 779        TCGv_vec t = tcg_temp_new_vec(type);
 780        tcg_gen_and_vec(MO_8, t, a, b);
 781        tcg_gen_andc_vec(MO_8, r, c, a);
 782        tcg_gen_or_vec(MO_8, r, r, t);
 783        tcg_temp_free_vec(t);
 784    }
 785}
 786
 787void tcg_gen_cmpsel_vec(TCGCond cond, unsigned vece, TCGv_vec r,
 788                        TCGv_vec a, TCGv_vec b, TCGv_vec c, TCGv_vec d)
 789{
 790    TCGTemp *rt = tcgv_vec_temp(r);
 791    TCGTemp *at = tcgv_vec_temp(a);
 792    TCGTemp *bt = tcgv_vec_temp(b);
 793    TCGTemp *ct = tcgv_vec_temp(c);
 794    TCGTemp *dt = tcgv_vec_temp(d);
 795    TCGArg ri = temp_arg(rt);
 796    TCGArg ai = temp_arg(at);
 797    TCGArg bi = temp_arg(bt);
 798    TCGArg ci = temp_arg(ct);
 799    TCGArg di = temp_arg(dt);
 800    TCGType type = rt->base_type;
 801    const TCGOpcode *hold_list;
 802    int can;
 803
 804    tcg_debug_assert(at->base_type >= type);
 805    tcg_debug_assert(bt->base_type >= type);
 806    tcg_debug_assert(ct->base_type >= type);
 807    tcg_debug_assert(dt->base_type >= type);
 808
 809    tcg_assert_listed_vecop(INDEX_op_cmpsel_vec);
 810    hold_list = tcg_swap_vecop_list(NULL);
 811    can = tcg_can_emit_vec_op(INDEX_op_cmpsel_vec, type, vece);
 812
 813    if (can > 0) {
 814        vec_gen_6(INDEX_op_cmpsel_vec, type, vece, ri, ai, bi, ci, di, cond);
 815    } else if (can < 0) {
 816        tcg_expand_vec_op(INDEX_op_cmpsel_vec, type, vece,
 817                          ri, ai, bi, ci, di, cond);
 818    } else {
 819        TCGv_vec t = tcg_temp_new_vec(type);
 820        tcg_gen_cmp_vec(cond, vece, t, a, b);
 821        tcg_gen_bitsel_vec(vece, r, t, c, d);
 822        tcg_temp_free_vec(t);
 823    }
 824    tcg_swap_vecop_list(hold_list);
 825}
 826