qemu/tcg/tcg-op-vec.c
<<
>>
Prefs
   1/*
   2 * Tiny Code Generator for QEMU
   3 *
   4 * Copyright (c) 2018 Linaro, Inc.
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2.1 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include "tcg/tcg.h"
  22#include "tcg/tcg-op.h"
  23#include "tcg/tcg-mo.h"
  24
  25/* Reduce the number of ifdefs below.  This assumes that all uses of
  26   TCGV_HIGH and TCGV_LOW are properly protected by a conditional that
  27   the compiler can eliminate.  */
  28#if TCG_TARGET_REG_BITS == 64
  29extern TCGv_i32 TCGV_LOW_link_error(TCGv_i64);
  30extern TCGv_i32 TCGV_HIGH_link_error(TCGv_i64);
  31#define TCGV_LOW  TCGV_LOW_link_error
  32#define TCGV_HIGH TCGV_HIGH_link_error
  33#endif
  34
  35/*
  36 * Vector optional opcode tracking.
  37 * Except for the basic logical operations (and, or, xor), and
  38 * data movement (mov, ld, st, dupi), many vector opcodes are
  39 * optional and may not be supported on the host.  Thank Intel
  40 * for the irregularity in their instruction set.
  41 *
  42 * The gvec expanders allow custom vector operations to be composed,
  43 * generally via the .fniv callback in the GVecGen* structures.  At
  44 * the same time, in deciding whether to use this hook we need to
  45 * know if the host supports the required operations.  This is
  46 * presented as an array of opcodes, terminated by 0.  Each opcode
  47 * is assumed to be expanded with the given VECE.
  48 *
  49 * For debugging, we want to validate this array.  Therefore, when
  50 * tcg_ctx->vec_opt_opc is non-NULL, the tcg_gen_*_vec expanders
  51 * will validate that their opcode is present in the list.
  52 */
  53#ifdef CONFIG_DEBUG_TCG
  54void tcg_assert_listed_vecop(TCGOpcode op)
  55{
  56    const TCGOpcode *p = tcg_ctx->vecop_list;
  57    if (p) {
  58        for (; *p; ++p) {
  59            if (*p == op) {
  60                return;
  61            }
  62        }
  63        g_assert_not_reached();
  64    }
  65}
  66#endif
  67
  68bool tcg_can_emit_vecop_list(const TCGOpcode *list,
  69                             TCGType type, unsigned vece)
  70{
  71    if (list == NULL) {
  72        return true;
  73    }
  74
  75    for (; *list; ++list) {
  76        TCGOpcode opc = *list;
  77
  78#ifdef CONFIG_DEBUG_TCG
  79        switch (opc) {
  80        case INDEX_op_and_vec:
  81        case INDEX_op_or_vec:
  82        case INDEX_op_xor_vec:
  83        case INDEX_op_mov_vec:
  84        case INDEX_op_dup_vec:
  85        case INDEX_op_dup2_vec:
  86        case INDEX_op_ld_vec:
  87        case INDEX_op_st_vec:
  88        case INDEX_op_bitsel_vec:
  89            /* These opcodes are mandatory and should not be listed.  */
  90            g_assert_not_reached();
  91        case INDEX_op_not_vec:
  92            /* These opcodes have generic expansions using the above.  */
  93            g_assert_not_reached();
  94        default:
  95            break;
  96        }
  97#endif
  98
  99        if (tcg_can_emit_vec_op(opc, type, vece)) {
 100            continue;
 101        }
 102
 103        /*
 104         * The opcode list is created by front ends based on what they
 105         * actually invoke.  We must mirror the logic in the routines
 106         * below for generic expansions using other opcodes.
 107         */
 108        switch (opc) {
 109        case INDEX_op_neg_vec:
 110            if (tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece)) {
 111                continue;
 112            }
 113            break;
 114        case INDEX_op_abs_vec:
 115            if (tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece)
 116                && (tcg_can_emit_vec_op(INDEX_op_smax_vec, type, vece) > 0
 117                    || tcg_can_emit_vec_op(INDEX_op_sari_vec, type, vece) > 0
 118                    || tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece))) {
 119                continue;
 120            }
 121            break;
 122        case INDEX_op_usadd_vec:
 123            if (tcg_can_emit_vec_op(INDEX_op_umin_vec, type, vece) ||
 124                tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) {
 125                continue;
 126            }
 127            break;
 128        case INDEX_op_ussub_vec:
 129            if (tcg_can_emit_vec_op(INDEX_op_umax_vec, type, vece) ||
 130                tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) {
 131                continue;
 132            }
 133            break;
 134        case INDEX_op_cmpsel_vec:
 135        case INDEX_op_smin_vec:
 136        case INDEX_op_smax_vec:
 137        case INDEX_op_umin_vec:
 138        case INDEX_op_umax_vec:
 139            if (tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) {
 140                continue;
 141            }
 142            break;
 143        default:
 144            break;
 145        }
 146        return false;
 147    }
 148    return true;
 149}
 150
 151void vec_gen_2(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a)
 152{
 153    TCGOp *op = tcg_emit_op(opc);
 154    TCGOP_VECL(op) = type - TCG_TYPE_V64;
 155    TCGOP_VECE(op) = vece;
 156    op->args[0] = r;
 157    op->args[1] = a;
 158}
 159
 160void vec_gen_3(TCGOpcode opc, TCGType type, unsigned vece,
 161               TCGArg r, TCGArg a, TCGArg b)
 162{
 163    TCGOp *op = tcg_emit_op(opc);
 164    TCGOP_VECL(op) = type - TCG_TYPE_V64;
 165    TCGOP_VECE(op) = vece;
 166    op->args[0] = r;
 167    op->args[1] = a;
 168    op->args[2] = b;
 169}
 170
 171void vec_gen_4(TCGOpcode opc, TCGType type, unsigned vece,
 172               TCGArg r, TCGArg a, TCGArg b, TCGArg c)
 173{
 174    TCGOp *op = tcg_emit_op(opc);
 175    TCGOP_VECL(op) = type - TCG_TYPE_V64;
 176    TCGOP_VECE(op) = vece;
 177    op->args[0] = r;
 178    op->args[1] = a;
 179    op->args[2] = b;
 180    op->args[3] = c;
 181}
 182
 183static void vec_gen_6(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r,
 184                      TCGArg a, TCGArg b, TCGArg c, TCGArg d, TCGArg e)
 185{
 186    TCGOp *op = tcg_emit_op(opc);
 187    TCGOP_VECL(op) = type - TCG_TYPE_V64;
 188    TCGOP_VECE(op) = vece;
 189    op->args[0] = r;
 190    op->args[1] = a;
 191    op->args[2] = b;
 192    op->args[3] = c;
 193    op->args[4] = d;
 194    op->args[5] = e;
 195}
 196
 197static void vec_gen_op2(TCGOpcode opc, unsigned vece, TCGv_vec r, TCGv_vec a)
 198{
 199    TCGTemp *rt = tcgv_vec_temp(r);
 200    TCGTemp *at = tcgv_vec_temp(a);
 201    TCGType type = rt->base_type;
 202
 203    /* Must enough inputs for the output.  */
 204    tcg_debug_assert(at->base_type >= type);
 205    vec_gen_2(opc, type, vece, temp_arg(rt), temp_arg(at));
 206}
 207
 208static void vec_gen_op3(TCGOpcode opc, unsigned vece,
 209                        TCGv_vec r, TCGv_vec a, TCGv_vec b)
 210{
 211    TCGTemp *rt = tcgv_vec_temp(r);
 212    TCGTemp *at = tcgv_vec_temp(a);
 213    TCGTemp *bt = tcgv_vec_temp(b);
 214    TCGType type = rt->base_type;
 215
 216    /* Must enough inputs for the output.  */
 217    tcg_debug_assert(at->base_type >= type);
 218    tcg_debug_assert(bt->base_type >= type);
 219    vec_gen_3(opc, type, vece, temp_arg(rt), temp_arg(at), temp_arg(bt));
 220}
 221
 222void tcg_gen_mov_vec(TCGv_vec r, TCGv_vec a)
 223{
 224    if (r != a) {
 225        vec_gen_op2(INDEX_op_mov_vec, 0, r, a);
 226    }
 227}
 228
 229TCGv_vec tcg_const_zeros_vec(TCGType type)
 230{
 231    TCGv_vec ret = tcg_temp_new_vec(type);
 232    tcg_gen_dupi_vec(MO_64, ret, 0);
 233    return ret;
 234}
 235
 236TCGv_vec tcg_const_ones_vec(TCGType type)
 237{
 238    TCGv_vec ret = tcg_temp_new_vec(type);
 239    tcg_gen_dupi_vec(MO_64, ret, -1);
 240    return ret;
 241}
 242
 243TCGv_vec tcg_const_zeros_vec_matching(TCGv_vec m)
 244{
 245    TCGTemp *t = tcgv_vec_temp(m);
 246    return tcg_const_zeros_vec(t->base_type);
 247}
 248
 249TCGv_vec tcg_const_ones_vec_matching(TCGv_vec m)
 250{
 251    TCGTemp *t = tcgv_vec_temp(m);
 252    return tcg_const_ones_vec(t->base_type);
 253}
 254
 255void tcg_gen_dupi_vec(unsigned vece, TCGv_vec r, uint64_t a)
 256{
 257    TCGTemp *rt = tcgv_vec_temp(r);
 258    tcg_gen_mov_vec(r, tcg_constant_vec(rt->base_type, vece, a));
 259}
 260
 261void tcg_gen_dup_i64_vec(unsigned vece, TCGv_vec r, TCGv_i64 a)
 262{
 263    TCGArg ri = tcgv_vec_arg(r);
 264    TCGTemp *rt = arg_temp(ri);
 265    TCGType type = rt->base_type;
 266
 267    if (TCG_TARGET_REG_BITS == 64) {
 268        TCGArg ai = tcgv_i64_arg(a);
 269        vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
 270    } else if (vece == MO_64) {
 271        TCGArg al = tcgv_i32_arg(TCGV_LOW(a));
 272        TCGArg ah = tcgv_i32_arg(TCGV_HIGH(a));
 273        vec_gen_3(INDEX_op_dup2_vec, type, MO_64, ri, al, ah);
 274    } else {
 275        TCGArg ai = tcgv_i32_arg(TCGV_LOW(a));
 276        vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
 277    }
 278}
 279
 280void tcg_gen_dup_i32_vec(unsigned vece, TCGv_vec r, TCGv_i32 a)
 281{
 282    TCGArg ri = tcgv_vec_arg(r);
 283    TCGArg ai = tcgv_i32_arg(a);
 284    TCGTemp *rt = arg_temp(ri);
 285    TCGType type = rt->base_type;
 286
 287    vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
 288}
 289
 290void tcg_gen_dup_mem_vec(unsigned vece, TCGv_vec r, TCGv_ptr b,
 291                         tcg_target_long ofs)
 292{
 293    TCGArg ri = tcgv_vec_arg(r);
 294    TCGArg bi = tcgv_ptr_arg(b);
 295    TCGTemp *rt = arg_temp(ri);
 296    TCGType type = rt->base_type;
 297
 298    vec_gen_3(INDEX_op_dupm_vec, type, vece, ri, bi, ofs);
 299}
 300
 301static void vec_gen_ldst(TCGOpcode opc, TCGv_vec r, TCGv_ptr b, TCGArg o)
 302{
 303    TCGArg ri = tcgv_vec_arg(r);
 304    TCGArg bi = tcgv_ptr_arg(b);
 305    TCGTemp *rt = arg_temp(ri);
 306    TCGType type = rt->base_type;
 307
 308    vec_gen_3(opc, type, 0, ri, bi, o);
 309}
 310
 311void tcg_gen_ld_vec(TCGv_vec r, TCGv_ptr b, TCGArg o)
 312{
 313    vec_gen_ldst(INDEX_op_ld_vec, r, b, o);
 314}
 315
 316void tcg_gen_st_vec(TCGv_vec r, TCGv_ptr b, TCGArg o)
 317{
 318    vec_gen_ldst(INDEX_op_st_vec, r, b, o);
 319}
 320
 321void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr b, TCGArg o, TCGType low_type)
 322{
 323    TCGArg ri = tcgv_vec_arg(r);
 324    TCGArg bi = tcgv_ptr_arg(b);
 325    TCGTemp *rt = arg_temp(ri);
 326    TCGType type = rt->base_type;
 327
 328    tcg_debug_assert(low_type >= TCG_TYPE_V64);
 329    tcg_debug_assert(low_type <= type);
 330    vec_gen_3(INDEX_op_st_vec, low_type, 0, ri, bi, o);
 331}
 332
 333void tcg_gen_and_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 334{
 335    vec_gen_op3(INDEX_op_and_vec, 0, r, a, b);
 336}
 337
 338void tcg_gen_or_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 339{
 340    vec_gen_op3(INDEX_op_or_vec, 0, r, a, b);
 341}
 342
 343void tcg_gen_xor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 344{
 345    vec_gen_op3(INDEX_op_xor_vec, 0, r, a, b);
 346}
 347
 348void tcg_gen_andc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 349{
 350    if (TCG_TARGET_HAS_andc_vec) {
 351        vec_gen_op3(INDEX_op_andc_vec, 0, r, a, b);
 352    } else {
 353        TCGv_vec t = tcg_temp_new_vec_matching(r);
 354        tcg_gen_not_vec(0, t, b);
 355        tcg_gen_and_vec(0, r, a, t);
 356        tcg_temp_free_vec(t);
 357    }
 358}
 359
 360void tcg_gen_orc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 361{
 362    if (TCG_TARGET_HAS_orc_vec) {
 363        vec_gen_op3(INDEX_op_orc_vec, 0, r, a, b);
 364    } else {
 365        TCGv_vec t = tcg_temp_new_vec_matching(r);
 366        tcg_gen_not_vec(0, t, b);
 367        tcg_gen_or_vec(0, r, a, t);
 368        tcg_temp_free_vec(t);
 369    }
 370}
 371
 372void tcg_gen_nand_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 373{
 374    /* TODO: Add TCG_TARGET_HAS_nand_vec when adding a backend supports it. */
 375    tcg_gen_and_vec(0, r, a, b);
 376    tcg_gen_not_vec(0, r, r);
 377}
 378
 379void tcg_gen_nor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 380{
 381    /* TODO: Add TCG_TARGET_HAS_nor_vec when adding a backend supports it. */
 382    tcg_gen_or_vec(0, r, a, b);
 383    tcg_gen_not_vec(0, r, r);
 384}
 385
 386void tcg_gen_eqv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 387{
 388    /* TODO: Add TCG_TARGET_HAS_eqv_vec when adding a backend supports it. */
 389    tcg_gen_xor_vec(0, r, a, b);
 390    tcg_gen_not_vec(0, r, r);
 391}
 392
 393static bool do_op2(unsigned vece, TCGv_vec r, TCGv_vec a, TCGOpcode opc)
 394{
 395    TCGTemp *rt = tcgv_vec_temp(r);
 396    TCGTemp *at = tcgv_vec_temp(a);
 397    TCGArg ri = temp_arg(rt);
 398    TCGArg ai = temp_arg(at);
 399    TCGType type = rt->base_type;
 400    int can;
 401
 402    tcg_debug_assert(at->base_type >= type);
 403    tcg_assert_listed_vecop(opc);
 404    can = tcg_can_emit_vec_op(opc, type, vece);
 405    if (can > 0) {
 406        vec_gen_2(opc, type, vece, ri, ai);
 407    } else if (can < 0) {
 408        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
 409        tcg_expand_vec_op(opc, type, vece, ri, ai);
 410        tcg_swap_vecop_list(hold_list);
 411    } else {
 412        return false;
 413    }
 414    return true;
 415}
 416
 417void tcg_gen_not_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
 418{
 419    const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
 420
 421    if (!TCG_TARGET_HAS_not_vec || !do_op2(vece, r, a, INDEX_op_not_vec)) {
 422        TCGv_vec t = tcg_const_ones_vec_matching(r);
 423        tcg_gen_xor_vec(0, r, a, t);
 424        tcg_temp_free_vec(t);
 425    }
 426    tcg_swap_vecop_list(hold_list);
 427}
 428
 429void tcg_gen_neg_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
 430{
 431    const TCGOpcode *hold_list;
 432
 433    tcg_assert_listed_vecop(INDEX_op_neg_vec);
 434    hold_list = tcg_swap_vecop_list(NULL);
 435
 436    if (!TCG_TARGET_HAS_neg_vec || !do_op2(vece, r, a, INDEX_op_neg_vec)) {
 437        TCGv_vec t = tcg_const_zeros_vec_matching(r);
 438        tcg_gen_sub_vec(vece, r, t, a);
 439        tcg_temp_free_vec(t);
 440    }
 441    tcg_swap_vecop_list(hold_list);
 442}
 443
 444void tcg_gen_abs_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
 445{
 446    const TCGOpcode *hold_list;
 447
 448    tcg_assert_listed_vecop(INDEX_op_abs_vec);
 449    hold_list = tcg_swap_vecop_list(NULL);
 450
 451    if (!do_op2(vece, r, a, INDEX_op_abs_vec)) {
 452        TCGType type = tcgv_vec_temp(r)->base_type;
 453        TCGv_vec t = tcg_temp_new_vec(type);
 454
 455        tcg_debug_assert(tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece));
 456        if (tcg_can_emit_vec_op(INDEX_op_smax_vec, type, vece) > 0) {
 457            tcg_gen_neg_vec(vece, t, a);
 458            tcg_gen_smax_vec(vece, r, a, t);
 459        } else {
 460            if (tcg_can_emit_vec_op(INDEX_op_sari_vec, type, vece) > 0) {
 461                tcg_gen_sari_vec(vece, t, a, (8 << vece) - 1);
 462            } else {
 463                tcg_gen_cmp_vec(TCG_COND_LT, vece, t, a,
 464                                tcg_constant_vec(type, vece, 0));
 465            }
 466            tcg_gen_xor_vec(vece, r, a, t);
 467            tcg_gen_sub_vec(vece, r, r, t);
 468        }
 469
 470        tcg_temp_free_vec(t);
 471    }
 472    tcg_swap_vecop_list(hold_list);
 473}
 474
 475static void do_shifti(TCGOpcode opc, unsigned vece,
 476                      TCGv_vec r, TCGv_vec a, int64_t i)
 477{
 478    TCGTemp *rt = tcgv_vec_temp(r);
 479    TCGTemp *at = tcgv_vec_temp(a);
 480    TCGArg ri = temp_arg(rt);
 481    TCGArg ai = temp_arg(at);
 482    TCGType type = rt->base_type;
 483    int can;
 484
 485    tcg_debug_assert(at->base_type == type);
 486    tcg_debug_assert(i >= 0 && i < (8 << vece));
 487    tcg_assert_listed_vecop(opc);
 488
 489    if (i == 0) {
 490        tcg_gen_mov_vec(r, a);
 491        return;
 492    }
 493
 494    can = tcg_can_emit_vec_op(opc, type, vece);
 495    if (can > 0) {
 496        vec_gen_3(opc, type, vece, ri, ai, i);
 497    } else {
 498        /* We leave the choice of expansion via scalar or vector shift
 499           to the target.  Often, but not always, dupi can feed a vector
 500           shift easier than a scalar.  */
 501        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
 502        tcg_debug_assert(can < 0);
 503        tcg_expand_vec_op(opc, type, vece, ri, ai, i);
 504        tcg_swap_vecop_list(hold_list);
 505    }
 506}
 507
 508void tcg_gen_shli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
 509{
 510    do_shifti(INDEX_op_shli_vec, vece, r, a, i);
 511}
 512
 513void tcg_gen_shri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
 514{
 515    do_shifti(INDEX_op_shri_vec, vece, r, a, i);
 516}
 517
 518void tcg_gen_sari_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
 519{
 520    do_shifti(INDEX_op_sari_vec, vece, r, a, i);
 521}
 522
 523void tcg_gen_rotli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
 524{
 525    do_shifti(INDEX_op_rotli_vec, vece, r, a, i);
 526}
 527
 528void tcg_gen_rotri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
 529{
 530    int bits = 8 << vece;
 531    tcg_debug_assert(i >= 0 && i < bits);
 532    do_shifti(INDEX_op_rotli_vec, vece, r, a, -i & (bits - 1));
 533}
 534
 535void tcg_gen_cmp_vec(TCGCond cond, unsigned vece,
 536                     TCGv_vec r, TCGv_vec a, TCGv_vec b)
 537{
 538    TCGTemp *rt = tcgv_vec_temp(r);
 539    TCGTemp *at = tcgv_vec_temp(a);
 540    TCGTemp *bt = tcgv_vec_temp(b);
 541    TCGArg ri = temp_arg(rt);
 542    TCGArg ai = temp_arg(at);
 543    TCGArg bi = temp_arg(bt);
 544    TCGType type = rt->base_type;
 545    int can;
 546
 547    tcg_debug_assert(at->base_type >= type);
 548    tcg_debug_assert(bt->base_type >= type);
 549    tcg_assert_listed_vecop(INDEX_op_cmp_vec);
 550    can = tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece);
 551    if (can > 0) {
 552        vec_gen_4(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond);
 553    } else {
 554        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
 555        tcg_debug_assert(can < 0);
 556        tcg_expand_vec_op(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond);
 557        tcg_swap_vecop_list(hold_list);
 558    }
 559}
 560
 561static bool do_op3(unsigned vece, TCGv_vec r, TCGv_vec a,
 562                   TCGv_vec b, TCGOpcode opc)
 563{
 564    TCGTemp *rt = tcgv_vec_temp(r);
 565    TCGTemp *at = tcgv_vec_temp(a);
 566    TCGTemp *bt = tcgv_vec_temp(b);
 567    TCGArg ri = temp_arg(rt);
 568    TCGArg ai = temp_arg(at);
 569    TCGArg bi = temp_arg(bt);
 570    TCGType type = rt->base_type;
 571    int can;
 572
 573    tcg_debug_assert(at->base_type >= type);
 574    tcg_debug_assert(bt->base_type >= type);
 575    tcg_assert_listed_vecop(opc);
 576    can = tcg_can_emit_vec_op(opc, type, vece);
 577    if (can > 0) {
 578        vec_gen_3(opc, type, vece, ri, ai, bi);
 579    } else if (can < 0) {
 580        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
 581        tcg_expand_vec_op(opc, type, vece, ri, ai, bi);
 582        tcg_swap_vecop_list(hold_list);
 583    } else {
 584        return false;
 585    }
 586    return true;
 587}
 588
 589static void do_op3_nofail(unsigned vece, TCGv_vec r, TCGv_vec a,
 590                          TCGv_vec b, TCGOpcode opc)
 591{
 592    bool ok = do_op3(vece, r, a, b, opc);
 593    tcg_debug_assert(ok);
 594}
 595
 596void tcg_gen_add_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 597{
 598    do_op3_nofail(vece, r, a, b, INDEX_op_add_vec);
 599}
 600
 601void tcg_gen_sub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 602{
 603    do_op3_nofail(vece, r, a, b, INDEX_op_sub_vec);
 604}
 605
 606void tcg_gen_mul_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 607{
 608    do_op3_nofail(vece, r, a, b, INDEX_op_mul_vec);
 609}
 610
 611void tcg_gen_ssadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 612{
 613    do_op3_nofail(vece, r, a, b, INDEX_op_ssadd_vec);
 614}
 615
 616void tcg_gen_usadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 617{
 618    if (!do_op3(vece, r, a, b, INDEX_op_usadd_vec)) {
 619        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
 620        TCGv_vec t = tcg_temp_new_vec_matching(r);
 621
 622        /* usadd(a, b) = min(a, ~b) + b */
 623        tcg_gen_not_vec(vece, t, b);
 624        tcg_gen_umin_vec(vece, t, t, a);
 625        tcg_gen_add_vec(vece, r, t, b);
 626
 627        tcg_temp_free_vec(t);
 628        tcg_swap_vecop_list(hold_list);
 629    }
 630}
 631
 632void tcg_gen_sssub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 633{
 634    do_op3_nofail(vece, r, a, b, INDEX_op_sssub_vec);
 635}
 636
 637void tcg_gen_ussub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 638{
 639    if (!do_op3(vece, r, a, b, INDEX_op_ussub_vec)) {
 640        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
 641        TCGv_vec t = tcg_temp_new_vec_matching(r);
 642
 643        /* ussub(a, b) = max(a, b) - b */
 644        tcg_gen_umax_vec(vece, t, a, b);
 645        tcg_gen_sub_vec(vece, r, t, b);
 646
 647        tcg_temp_free_vec(t);
 648        tcg_swap_vecop_list(hold_list);
 649    }
 650}
 651
 652static void do_minmax(unsigned vece, TCGv_vec r, TCGv_vec a,
 653                      TCGv_vec b, TCGOpcode opc, TCGCond cond)
 654{
 655    if (!do_op3(vece, r, a, b, opc)) {
 656        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
 657        tcg_gen_cmpsel_vec(cond, vece, r, a, b, a, b);
 658        tcg_swap_vecop_list(hold_list);
 659    }
 660}
 661
 662void tcg_gen_smin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 663{
 664    do_minmax(vece, r, a, b, INDEX_op_smin_vec, TCG_COND_LT);
 665}
 666
 667void tcg_gen_umin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 668{
 669    do_minmax(vece, r, a, b, INDEX_op_umin_vec, TCG_COND_LTU);
 670}
 671
 672void tcg_gen_smax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 673{
 674    do_minmax(vece, r, a, b, INDEX_op_smax_vec, TCG_COND_GT);
 675}
 676
 677void tcg_gen_umax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 678{
 679    do_minmax(vece, r, a, b, INDEX_op_umax_vec, TCG_COND_GTU);
 680}
 681
 682void tcg_gen_shlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 683{
 684    do_op3_nofail(vece, r, a, b, INDEX_op_shlv_vec);
 685}
 686
 687void tcg_gen_shrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 688{
 689    do_op3_nofail(vece, r, a, b, INDEX_op_shrv_vec);
 690}
 691
 692void tcg_gen_sarv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 693{
 694    do_op3_nofail(vece, r, a, b, INDEX_op_sarv_vec);
 695}
 696
 697void tcg_gen_rotlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 698{
 699    do_op3_nofail(vece, r, a, b, INDEX_op_rotlv_vec);
 700}
 701
 702void tcg_gen_rotrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 703{
 704    do_op3_nofail(vece, r, a, b, INDEX_op_rotrv_vec);
 705}
 706
 707static void do_shifts(unsigned vece, TCGv_vec r, TCGv_vec a,
 708                      TCGv_i32 s, TCGOpcode opc)
 709{
 710    TCGTemp *rt = tcgv_vec_temp(r);
 711    TCGTemp *at = tcgv_vec_temp(a);
 712    TCGTemp *st = tcgv_i32_temp(s);
 713    TCGArg ri = temp_arg(rt);
 714    TCGArg ai = temp_arg(at);
 715    TCGArg si = temp_arg(st);
 716    TCGType type = rt->base_type;
 717    int can;
 718
 719    tcg_debug_assert(at->base_type >= type);
 720    tcg_assert_listed_vecop(opc);
 721    can = tcg_can_emit_vec_op(opc, type, vece);
 722    if (can > 0) {
 723        vec_gen_3(opc, type, vece, ri, ai, si);
 724    } else if (can < 0) {
 725        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
 726        tcg_expand_vec_op(opc, type, vece, ri, ai, si);
 727        tcg_swap_vecop_list(hold_list);
 728    } else {
 729        g_assert_not_reached();
 730    }
 731}
 732
 733void tcg_gen_shls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
 734{
 735    do_shifts(vece, r, a, b, INDEX_op_shls_vec);
 736}
 737
 738void tcg_gen_shrs_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
 739{
 740    do_shifts(vece, r, a, b, INDEX_op_shrs_vec);
 741}
 742
 743void tcg_gen_sars_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
 744{
 745    do_shifts(vece, r, a, b, INDEX_op_sars_vec);
 746}
 747
 748void tcg_gen_rotls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s)
 749{
 750    do_shifts(vece, r, a, s, INDEX_op_rotls_vec);
 751}
 752
 753void tcg_gen_bitsel_vec(unsigned vece, TCGv_vec r, TCGv_vec a,
 754                        TCGv_vec b, TCGv_vec c)
 755{
 756    TCGTemp *rt = tcgv_vec_temp(r);
 757    TCGTemp *at = tcgv_vec_temp(a);
 758    TCGTemp *bt = tcgv_vec_temp(b);
 759    TCGTemp *ct = tcgv_vec_temp(c);
 760    TCGType type = rt->base_type;
 761
 762    tcg_debug_assert(at->base_type >= type);
 763    tcg_debug_assert(bt->base_type >= type);
 764    tcg_debug_assert(ct->base_type >= type);
 765
 766    if (TCG_TARGET_HAS_bitsel_vec) {
 767        vec_gen_4(INDEX_op_bitsel_vec, type, MO_8,
 768                  temp_arg(rt), temp_arg(at), temp_arg(bt), temp_arg(ct));
 769    } else {
 770        TCGv_vec t = tcg_temp_new_vec(type);
 771        tcg_gen_and_vec(MO_8, t, a, b);
 772        tcg_gen_andc_vec(MO_8, r, c, a);
 773        tcg_gen_or_vec(MO_8, r, r, t);
 774        tcg_temp_free_vec(t);
 775    }
 776}
 777
 778void tcg_gen_cmpsel_vec(TCGCond cond, unsigned vece, TCGv_vec r,
 779                        TCGv_vec a, TCGv_vec b, TCGv_vec c, TCGv_vec d)
 780{
 781    TCGTemp *rt = tcgv_vec_temp(r);
 782    TCGTemp *at = tcgv_vec_temp(a);
 783    TCGTemp *bt = tcgv_vec_temp(b);
 784    TCGTemp *ct = tcgv_vec_temp(c);
 785    TCGTemp *dt = tcgv_vec_temp(d);
 786    TCGArg ri = temp_arg(rt);
 787    TCGArg ai = temp_arg(at);
 788    TCGArg bi = temp_arg(bt);
 789    TCGArg ci = temp_arg(ct);
 790    TCGArg di = temp_arg(dt);
 791    TCGType type = rt->base_type;
 792    const TCGOpcode *hold_list;
 793    int can;
 794
 795    tcg_debug_assert(at->base_type >= type);
 796    tcg_debug_assert(bt->base_type >= type);
 797    tcg_debug_assert(ct->base_type >= type);
 798    tcg_debug_assert(dt->base_type >= type);
 799
 800    tcg_assert_listed_vecop(INDEX_op_cmpsel_vec);
 801    hold_list = tcg_swap_vecop_list(NULL);
 802    can = tcg_can_emit_vec_op(INDEX_op_cmpsel_vec, type, vece);
 803
 804    if (can > 0) {
 805        vec_gen_6(INDEX_op_cmpsel_vec, type, vece, ri, ai, bi, ci, di, cond);
 806    } else if (can < 0) {
 807        tcg_expand_vec_op(INDEX_op_cmpsel_vec, type, vece,
 808                          ri, ai, bi, ci, di, cond);
 809    } else {
 810        TCGv_vec t = tcg_temp_new_vec(type);
 811        tcg_gen_cmp_vec(cond, vece, t, a, b);
 812        tcg_gen_bitsel_vec(vece, r, t, c, d);
 813        tcg_temp_free_vec(t);
 814    }
 815    tcg_swap_vecop_list(hold_list);
 816}
 817