qemu/tcg/tcg-op-vec.c
<<
>>
Prefs
   1/*
   2 * Tiny Code Generator for QEMU
   3 *
   4 * Copyright (c) 2018 Linaro, Inc.
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2.1 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include "tcg/tcg.h"
  22#include "tcg/tcg-temp-internal.h"
  23#include "tcg/tcg-op-common.h"
  24#include "tcg/tcg-mo.h"
  25#include "tcg-internal.h"
  26
  27/*
  28 * Vector optional opcode tracking.
  29 * Except for the basic logical operations (and, or, xor), and
  30 * data movement (mov, ld, st, dupi), many vector opcodes are
  31 * optional and may not be supported on the host.  Thank Intel
  32 * for the irregularity in their instruction set.
  33 *
  34 * The gvec expanders allow custom vector operations to be composed,
  35 * generally via the .fniv callback in the GVecGen* structures.  At
  36 * the same time, in deciding whether to use this hook we need to
  37 * know if the host supports the required operations.  This is
  38 * presented as an array of opcodes, terminated by 0.  Each opcode
  39 * is assumed to be expanded with the given VECE.
  40 *
  41 * For debugging, we want to validate this array.  Therefore, when
  42 * tcg_ctx->vec_opt_opc is non-NULL, the tcg_gen_*_vec expanders
  43 * will validate that their opcode is present in the list.
  44 */
  45static void tcg_assert_listed_vecop(TCGOpcode op)
  46{
  47#ifdef CONFIG_DEBUG_TCG
  48    const TCGOpcode *p = tcg_ctx->vecop_list;
  49    if (p) {
  50        for (; *p; ++p) {
  51            if (*p == op) {
  52                return;
  53            }
  54        }
  55        g_assert_not_reached();
  56    }
  57#endif
  58}
  59
  60bool tcg_can_emit_vecop_list(const TCGOpcode *list,
  61                             TCGType type, unsigned vece)
  62{
  63    if (list == NULL) {
  64        return true;
  65    }
  66
  67    for (; *list; ++list) {
  68        TCGOpcode opc = *list;
  69
  70#ifdef CONFIG_DEBUG_TCG
  71        switch (opc) {
  72        case INDEX_op_and_vec:
  73        case INDEX_op_or_vec:
  74        case INDEX_op_xor_vec:
  75        case INDEX_op_mov_vec:
  76        case INDEX_op_dup_vec:
  77        case INDEX_op_dup2_vec:
  78        case INDEX_op_ld_vec:
  79        case INDEX_op_st_vec:
  80        case INDEX_op_bitsel_vec:
  81            /* These opcodes are mandatory and should not be listed.  */
  82            g_assert_not_reached();
  83        case INDEX_op_not_vec:
  84            /* These opcodes have generic expansions using the above.  */
  85            g_assert_not_reached();
  86        default:
  87            break;
  88        }
  89#endif
  90
  91        if (tcg_can_emit_vec_op(opc, type, vece)) {
  92            continue;
  93        }
  94
  95        /*
  96         * The opcode list is created by front ends based on what they
  97         * actually invoke.  We must mirror the logic in the routines
  98         * below for generic expansions using other opcodes.
  99         */
 100        switch (opc) {
 101        case INDEX_op_neg_vec:
 102            if (tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece)) {
 103                continue;
 104            }
 105            break;
 106        case INDEX_op_abs_vec:
 107            if (tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece)
 108                && (tcg_can_emit_vec_op(INDEX_op_smax_vec, type, vece) > 0
 109                    || tcg_can_emit_vec_op(INDEX_op_sari_vec, type, vece) > 0
 110                    || tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece))) {
 111                continue;
 112            }
 113            break;
 114        case INDEX_op_usadd_vec:
 115            if (tcg_can_emit_vec_op(INDEX_op_umin_vec, type, vece) ||
 116                tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) {
 117                continue;
 118            }
 119            break;
 120        case INDEX_op_ussub_vec:
 121            if (tcg_can_emit_vec_op(INDEX_op_umax_vec, type, vece) ||
 122                tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) {
 123                continue;
 124            }
 125            break;
 126        case INDEX_op_cmpsel_vec:
 127        case INDEX_op_smin_vec:
 128        case INDEX_op_smax_vec:
 129        case INDEX_op_umin_vec:
 130        case INDEX_op_umax_vec:
 131            if (tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece)) {
 132                continue;
 133            }
 134            break;
 135        default:
 136            break;
 137        }
 138        return false;
 139    }
 140    return true;
 141}
 142
 143void vec_gen_2(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r, TCGArg a)
 144{
 145    TCGOp *op = tcg_emit_op(opc, 2);
 146    TCGOP_VECL(op) = type - TCG_TYPE_V64;
 147    TCGOP_VECE(op) = vece;
 148    op->args[0] = r;
 149    op->args[1] = a;
 150}
 151
 152void vec_gen_3(TCGOpcode opc, TCGType type, unsigned vece,
 153               TCGArg r, TCGArg a, TCGArg b)
 154{
 155    TCGOp *op = tcg_emit_op(opc, 3);
 156    TCGOP_VECL(op) = type - TCG_TYPE_V64;
 157    TCGOP_VECE(op) = vece;
 158    op->args[0] = r;
 159    op->args[1] = a;
 160    op->args[2] = b;
 161}
 162
 163void vec_gen_4(TCGOpcode opc, TCGType type, unsigned vece,
 164               TCGArg r, TCGArg a, TCGArg b, TCGArg c)
 165{
 166    TCGOp *op = tcg_emit_op(opc, 4);
 167    TCGOP_VECL(op) = type - TCG_TYPE_V64;
 168    TCGOP_VECE(op) = vece;
 169    op->args[0] = r;
 170    op->args[1] = a;
 171    op->args[2] = b;
 172    op->args[3] = c;
 173}
 174
 175static void vec_gen_6(TCGOpcode opc, TCGType type, unsigned vece, TCGArg r,
 176                      TCGArg a, TCGArg b, TCGArg c, TCGArg d, TCGArg e)
 177{
 178    TCGOp *op = tcg_emit_op(opc, 6);
 179    TCGOP_VECL(op) = type - TCG_TYPE_V64;
 180    TCGOP_VECE(op) = vece;
 181    op->args[0] = r;
 182    op->args[1] = a;
 183    op->args[2] = b;
 184    op->args[3] = c;
 185    op->args[4] = d;
 186    op->args[5] = e;
 187}
 188
 189static void vec_gen_op2(TCGOpcode opc, unsigned vece, TCGv_vec r, TCGv_vec a)
 190{
 191    TCGTemp *rt = tcgv_vec_temp(r);
 192    TCGTemp *at = tcgv_vec_temp(a);
 193    TCGType type = rt->base_type;
 194
 195    /* Must enough inputs for the output.  */
 196    tcg_debug_assert(at->base_type >= type);
 197    vec_gen_2(opc, type, vece, temp_arg(rt), temp_arg(at));
 198}
 199
 200static void vec_gen_op3(TCGOpcode opc, unsigned vece,
 201                        TCGv_vec r, TCGv_vec a, TCGv_vec b)
 202{
 203    TCGTemp *rt = tcgv_vec_temp(r);
 204    TCGTemp *at = tcgv_vec_temp(a);
 205    TCGTemp *bt = tcgv_vec_temp(b);
 206    TCGType type = rt->base_type;
 207
 208    /* Must enough inputs for the output.  */
 209    tcg_debug_assert(at->base_type >= type);
 210    tcg_debug_assert(bt->base_type >= type);
 211    vec_gen_3(opc, type, vece, temp_arg(rt), temp_arg(at), temp_arg(bt));
 212}
 213
 214void tcg_gen_mov_vec(TCGv_vec r, TCGv_vec a)
 215{
 216    if (r != a) {
 217        vec_gen_op2(INDEX_op_mov_vec, 0, r, a);
 218    }
 219}
 220
 221void tcg_gen_dupi_vec(unsigned vece, TCGv_vec r, uint64_t a)
 222{
 223    TCGTemp *rt = tcgv_vec_temp(r);
 224    tcg_gen_mov_vec(r, tcg_constant_vec(rt->base_type, vece, a));
 225}
 226
 227void tcg_gen_dup_i64_vec(unsigned vece, TCGv_vec r, TCGv_i64 a)
 228{
 229    TCGArg ri = tcgv_vec_arg(r);
 230    TCGTemp *rt = arg_temp(ri);
 231    TCGType type = rt->base_type;
 232
 233    if (TCG_TARGET_REG_BITS == 64) {
 234        TCGArg ai = tcgv_i64_arg(a);
 235        vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
 236    } else if (vece == MO_64) {
 237        TCGArg al = tcgv_i32_arg(TCGV_LOW(a));
 238        TCGArg ah = tcgv_i32_arg(TCGV_HIGH(a));
 239        vec_gen_3(INDEX_op_dup2_vec, type, MO_64, ri, al, ah);
 240    } else {
 241        TCGArg ai = tcgv_i32_arg(TCGV_LOW(a));
 242        vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
 243    }
 244}
 245
 246void tcg_gen_dup_i32_vec(unsigned vece, TCGv_vec r, TCGv_i32 a)
 247{
 248    TCGArg ri = tcgv_vec_arg(r);
 249    TCGArg ai = tcgv_i32_arg(a);
 250    TCGTemp *rt = arg_temp(ri);
 251    TCGType type = rt->base_type;
 252
 253    vec_gen_2(INDEX_op_dup_vec, type, vece, ri, ai);
 254}
 255
 256void tcg_gen_dup_mem_vec(unsigned vece, TCGv_vec r, TCGv_ptr b,
 257                         tcg_target_long ofs)
 258{
 259    TCGArg ri = tcgv_vec_arg(r);
 260    TCGArg bi = tcgv_ptr_arg(b);
 261    TCGTemp *rt = arg_temp(ri);
 262    TCGType type = rt->base_type;
 263
 264    vec_gen_3(INDEX_op_dupm_vec, type, vece, ri, bi, ofs);
 265}
 266
 267static void vec_gen_ldst(TCGOpcode opc, TCGv_vec r, TCGv_ptr b, TCGArg o)
 268{
 269    TCGArg ri = tcgv_vec_arg(r);
 270    TCGArg bi = tcgv_ptr_arg(b);
 271    TCGTemp *rt = arg_temp(ri);
 272    TCGType type = rt->base_type;
 273
 274    vec_gen_3(opc, type, 0, ri, bi, o);
 275}
 276
 277void tcg_gen_ld_vec(TCGv_vec r, TCGv_ptr b, TCGArg o)
 278{
 279    vec_gen_ldst(INDEX_op_ld_vec, r, b, o);
 280}
 281
 282void tcg_gen_st_vec(TCGv_vec r, TCGv_ptr b, TCGArg o)
 283{
 284    vec_gen_ldst(INDEX_op_st_vec, r, b, o);
 285}
 286
 287void tcg_gen_stl_vec(TCGv_vec r, TCGv_ptr b, TCGArg o, TCGType low_type)
 288{
 289    TCGArg ri = tcgv_vec_arg(r);
 290    TCGArg bi = tcgv_ptr_arg(b);
 291    TCGTemp *rt = arg_temp(ri);
 292    TCGType type = rt->base_type;
 293
 294    tcg_debug_assert(low_type >= TCG_TYPE_V64);
 295    tcg_debug_assert(low_type <= type);
 296    vec_gen_3(INDEX_op_st_vec, low_type, 0, ri, bi, o);
 297}
 298
 299void tcg_gen_and_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 300{
 301    vec_gen_op3(INDEX_op_and_vec, 0, r, a, b);
 302}
 303
 304void tcg_gen_or_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 305{
 306    vec_gen_op3(INDEX_op_or_vec, 0, r, a, b);
 307}
 308
 309void tcg_gen_xor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 310{
 311    vec_gen_op3(INDEX_op_xor_vec, 0, r, a, b);
 312}
 313
 314void tcg_gen_andc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 315{
 316    if (TCG_TARGET_HAS_andc_vec) {
 317        vec_gen_op3(INDEX_op_andc_vec, 0, r, a, b);
 318    } else {
 319        TCGv_vec t = tcg_temp_new_vec_matching(r);
 320        tcg_gen_not_vec(0, t, b);
 321        tcg_gen_and_vec(0, r, a, t);
 322        tcg_temp_free_vec(t);
 323    }
 324}
 325
 326void tcg_gen_orc_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 327{
 328    if (TCG_TARGET_HAS_orc_vec) {
 329        vec_gen_op3(INDEX_op_orc_vec, 0, r, a, b);
 330    } else {
 331        TCGv_vec t = tcg_temp_new_vec_matching(r);
 332        tcg_gen_not_vec(0, t, b);
 333        tcg_gen_or_vec(0, r, a, t);
 334        tcg_temp_free_vec(t);
 335    }
 336}
 337
 338void tcg_gen_nand_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 339{
 340    if (TCG_TARGET_HAS_nand_vec) {
 341        vec_gen_op3(INDEX_op_nand_vec, 0, r, a, b);
 342    } else {
 343        tcg_gen_and_vec(0, r, a, b);
 344        tcg_gen_not_vec(0, r, r);
 345    }
 346}
 347
 348void tcg_gen_nor_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 349{
 350    if (TCG_TARGET_HAS_nor_vec) {
 351        vec_gen_op3(INDEX_op_nor_vec, 0, r, a, b);
 352    } else {
 353        tcg_gen_or_vec(0, r, a, b);
 354        tcg_gen_not_vec(0, r, r);
 355    }
 356}
 357
 358void tcg_gen_eqv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 359{
 360    if (TCG_TARGET_HAS_eqv_vec) {
 361        vec_gen_op3(INDEX_op_eqv_vec, 0, r, a, b);
 362    } else {
 363        tcg_gen_xor_vec(0, r, a, b);
 364        tcg_gen_not_vec(0, r, r);
 365    }
 366}
 367
 368static bool do_op2(unsigned vece, TCGv_vec r, TCGv_vec a, TCGOpcode opc)
 369{
 370    TCGTemp *rt = tcgv_vec_temp(r);
 371    TCGTemp *at = tcgv_vec_temp(a);
 372    TCGArg ri = temp_arg(rt);
 373    TCGArg ai = temp_arg(at);
 374    TCGType type = rt->base_type;
 375    int can;
 376
 377    tcg_debug_assert(at->base_type >= type);
 378    tcg_assert_listed_vecop(opc);
 379    can = tcg_can_emit_vec_op(opc, type, vece);
 380    if (can > 0) {
 381        vec_gen_2(opc, type, vece, ri, ai);
 382    } else if (can < 0) {
 383        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
 384        tcg_expand_vec_op(opc, type, vece, ri, ai);
 385        tcg_swap_vecop_list(hold_list);
 386    } else {
 387        return false;
 388    }
 389    return true;
 390}
 391
 392void tcg_gen_not_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
 393{
 394    const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
 395
 396    if (!TCG_TARGET_HAS_not_vec || !do_op2(vece, r, a, INDEX_op_not_vec)) {
 397        tcg_gen_xor_vec(0, r, a, tcg_constant_vec_matching(r, 0, -1));
 398    }
 399    tcg_swap_vecop_list(hold_list);
 400}
 401
 402void tcg_gen_neg_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
 403{
 404    const TCGOpcode *hold_list;
 405
 406    tcg_assert_listed_vecop(INDEX_op_neg_vec);
 407    hold_list = tcg_swap_vecop_list(NULL);
 408
 409    if (!TCG_TARGET_HAS_neg_vec || !do_op2(vece, r, a, INDEX_op_neg_vec)) {
 410        tcg_gen_sub_vec(vece, r, tcg_constant_vec_matching(r, vece, 0), a);
 411    }
 412    tcg_swap_vecop_list(hold_list);
 413}
 414
 415void tcg_gen_abs_vec(unsigned vece, TCGv_vec r, TCGv_vec a)
 416{
 417    const TCGOpcode *hold_list;
 418
 419    tcg_assert_listed_vecop(INDEX_op_abs_vec);
 420    hold_list = tcg_swap_vecop_list(NULL);
 421
 422    if (!do_op2(vece, r, a, INDEX_op_abs_vec)) {
 423        TCGType type = tcgv_vec_temp(r)->base_type;
 424        TCGv_vec t = tcg_temp_new_vec(type);
 425
 426        tcg_debug_assert(tcg_can_emit_vec_op(INDEX_op_sub_vec, type, vece));
 427        if (tcg_can_emit_vec_op(INDEX_op_smax_vec, type, vece) > 0) {
 428            tcg_gen_neg_vec(vece, t, a);
 429            tcg_gen_smax_vec(vece, r, a, t);
 430        } else {
 431            if (tcg_can_emit_vec_op(INDEX_op_sari_vec, type, vece) > 0) {
 432                tcg_gen_sari_vec(vece, t, a, (8 << vece) - 1);
 433            } else {
 434                tcg_gen_cmp_vec(TCG_COND_LT, vece, t, a,
 435                                tcg_constant_vec(type, vece, 0));
 436            }
 437            tcg_gen_xor_vec(vece, r, a, t);
 438            tcg_gen_sub_vec(vece, r, r, t);
 439        }
 440
 441        tcg_temp_free_vec(t);
 442    }
 443    tcg_swap_vecop_list(hold_list);
 444}
 445
 446static void do_shifti(TCGOpcode opc, unsigned vece,
 447                      TCGv_vec r, TCGv_vec a, int64_t i)
 448{
 449    TCGTemp *rt = tcgv_vec_temp(r);
 450    TCGTemp *at = tcgv_vec_temp(a);
 451    TCGArg ri = temp_arg(rt);
 452    TCGArg ai = temp_arg(at);
 453    TCGType type = rt->base_type;
 454    int can;
 455
 456    tcg_debug_assert(at->base_type == type);
 457    tcg_debug_assert(i >= 0 && i < (8 << vece));
 458    tcg_assert_listed_vecop(opc);
 459
 460    if (i == 0) {
 461        tcg_gen_mov_vec(r, a);
 462        return;
 463    }
 464
 465    can = tcg_can_emit_vec_op(opc, type, vece);
 466    if (can > 0) {
 467        vec_gen_3(opc, type, vece, ri, ai, i);
 468    } else {
 469        /* We leave the choice of expansion via scalar or vector shift
 470           to the target.  Often, but not always, dupi can feed a vector
 471           shift easier than a scalar.  */
 472        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
 473        tcg_debug_assert(can < 0);
 474        tcg_expand_vec_op(opc, type, vece, ri, ai, i);
 475        tcg_swap_vecop_list(hold_list);
 476    }
 477}
 478
 479void tcg_gen_shli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
 480{
 481    do_shifti(INDEX_op_shli_vec, vece, r, a, i);
 482}
 483
 484void tcg_gen_shri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
 485{
 486    do_shifti(INDEX_op_shri_vec, vece, r, a, i);
 487}
 488
 489void tcg_gen_sari_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
 490{
 491    do_shifti(INDEX_op_sari_vec, vece, r, a, i);
 492}
 493
 494void tcg_gen_rotli_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
 495{
 496    do_shifti(INDEX_op_rotli_vec, vece, r, a, i);
 497}
 498
 499void tcg_gen_rotri_vec(unsigned vece, TCGv_vec r, TCGv_vec a, int64_t i)
 500{
 501    int bits = 8 << vece;
 502    tcg_debug_assert(i >= 0 && i < bits);
 503    do_shifti(INDEX_op_rotli_vec, vece, r, a, -i & (bits - 1));
 504}
 505
 506void tcg_gen_cmp_vec(TCGCond cond, unsigned vece,
 507                     TCGv_vec r, TCGv_vec a, TCGv_vec b)
 508{
 509    TCGTemp *rt = tcgv_vec_temp(r);
 510    TCGTemp *at = tcgv_vec_temp(a);
 511    TCGTemp *bt = tcgv_vec_temp(b);
 512    TCGArg ri = temp_arg(rt);
 513    TCGArg ai = temp_arg(at);
 514    TCGArg bi = temp_arg(bt);
 515    TCGType type = rt->base_type;
 516    int can;
 517
 518    tcg_debug_assert(at->base_type >= type);
 519    tcg_debug_assert(bt->base_type >= type);
 520    tcg_assert_listed_vecop(INDEX_op_cmp_vec);
 521    can = tcg_can_emit_vec_op(INDEX_op_cmp_vec, type, vece);
 522    if (can > 0) {
 523        vec_gen_4(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond);
 524    } else {
 525        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
 526        tcg_debug_assert(can < 0);
 527        tcg_expand_vec_op(INDEX_op_cmp_vec, type, vece, ri, ai, bi, cond);
 528        tcg_swap_vecop_list(hold_list);
 529    }
 530}
 531
 532static bool do_op3(unsigned vece, TCGv_vec r, TCGv_vec a,
 533                   TCGv_vec b, TCGOpcode opc)
 534{
 535    TCGTemp *rt = tcgv_vec_temp(r);
 536    TCGTemp *at = tcgv_vec_temp(a);
 537    TCGTemp *bt = tcgv_vec_temp(b);
 538    TCGArg ri = temp_arg(rt);
 539    TCGArg ai = temp_arg(at);
 540    TCGArg bi = temp_arg(bt);
 541    TCGType type = rt->base_type;
 542    int can;
 543
 544    tcg_debug_assert(at->base_type >= type);
 545    tcg_debug_assert(bt->base_type >= type);
 546    tcg_assert_listed_vecop(opc);
 547    can = tcg_can_emit_vec_op(opc, type, vece);
 548    if (can > 0) {
 549        vec_gen_3(opc, type, vece, ri, ai, bi);
 550    } else if (can < 0) {
 551        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
 552        tcg_expand_vec_op(opc, type, vece, ri, ai, bi);
 553        tcg_swap_vecop_list(hold_list);
 554    } else {
 555        return false;
 556    }
 557    return true;
 558}
 559
 560static void do_op3_nofail(unsigned vece, TCGv_vec r, TCGv_vec a,
 561                          TCGv_vec b, TCGOpcode opc)
 562{
 563    bool ok = do_op3(vece, r, a, b, opc);
 564    tcg_debug_assert(ok);
 565}
 566
 567void tcg_gen_add_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 568{
 569    do_op3_nofail(vece, r, a, b, INDEX_op_add_vec);
 570}
 571
 572void tcg_gen_sub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 573{
 574    do_op3_nofail(vece, r, a, b, INDEX_op_sub_vec);
 575}
 576
 577void tcg_gen_mul_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 578{
 579    do_op3_nofail(vece, r, a, b, INDEX_op_mul_vec);
 580}
 581
 582void tcg_gen_ssadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 583{
 584    do_op3_nofail(vece, r, a, b, INDEX_op_ssadd_vec);
 585}
 586
 587void tcg_gen_usadd_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 588{
 589    if (!do_op3(vece, r, a, b, INDEX_op_usadd_vec)) {
 590        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
 591        TCGv_vec t = tcg_temp_new_vec_matching(r);
 592
 593        /* usadd(a, b) = min(a, ~b) + b */
 594        tcg_gen_not_vec(vece, t, b);
 595        tcg_gen_umin_vec(vece, t, t, a);
 596        tcg_gen_add_vec(vece, r, t, b);
 597
 598        tcg_temp_free_vec(t);
 599        tcg_swap_vecop_list(hold_list);
 600    }
 601}
 602
 603void tcg_gen_sssub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 604{
 605    do_op3_nofail(vece, r, a, b, INDEX_op_sssub_vec);
 606}
 607
 608void tcg_gen_ussub_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 609{
 610    if (!do_op3(vece, r, a, b, INDEX_op_ussub_vec)) {
 611        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
 612        TCGv_vec t = tcg_temp_new_vec_matching(r);
 613
 614        /* ussub(a, b) = max(a, b) - b */
 615        tcg_gen_umax_vec(vece, t, a, b);
 616        tcg_gen_sub_vec(vece, r, t, b);
 617
 618        tcg_temp_free_vec(t);
 619        tcg_swap_vecop_list(hold_list);
 620    }
 621}
 622
 623static void do_minmax(unsigned vece, TCGv_vec r, TCGv_vec a,
 624                      TCGv_vec b, TCGOpcode opc, TCGCond cond)
 625{
 626    if (!do_op3(vece, r, a, b, opc)) {
 627        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
 628        tcg_gen_cmpsel_vec(cond, vece, r, a, b, a, b);
 629        tcg_swap_vecop_list(hold_list);
 630    }
 631}
 632
 633void tcg_gen_smin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 634{
 635    do_minmax(vece, r, a, b, INDEX_op_smin_vec, TCG_COND_LT);
 636}
 637
 638void tcg_gen_umin_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 639{
 640    do_minmax(vece, r, a, b, INDEX_op_umin_vec, TCG_COND_LTU);
 641}
 642
 643void tcg_gen_smax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 644{
 645    do_minmax(vece, r, a, b, INDEX_op_smax_vec, TCG_COND_GT);
 646}
 647
 648void tcg_gen_umax_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 649{
 650    do_minmax(vece, r, a, b, INDEX_op_umax_vec, TCG_COND_GTU);
 651}
 652
 653void tcg_gen_shlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 654{
 655    do_op3_nofail(vece, r, a, b, INDEX_op_shlv_vec);
 656}
 657
 658void tcg_gen_shrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 659{
 660    do_op3_nofail(vece, r, a, b, INDEX_op_shrv_vec);
 661}
 662
 663void tcg_gen_sarv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 664{
 665    do_op3_nofail(vece, r, a, b, INDEX_op_sarv_vec);
 666}
 667
 668void tcg_gen_rotlv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 669{
 670    do_op3_nofail(vece, r, a, b, INDEX_op_rotlv_vec);
 671}
 672
 673void tcg_gen_rotrv_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_vec b)
 674{
 675    do_op3_nofail(vece, r, a, b, INDEX_op_rotrv_vec);
 676}
 677
 678static void do_shifts(unsigned vece, TCGv_vec r, TCGv_vec a,
 679                      TCGv_i32 s, TCGOpcode opc)
 680{
 681    TCGTemp *rt = tcgv_vec_temp(r);
 682    TCGTemp *at = tcgv_vec_temp(a);
 683    TCGTemp *st = tcgv_i32_temp(s);
 684    TCGArg ri = temp_arg(rt);
 685    TCGArg ai = temp_arg(at);
 686    TCGArg si = temp_arg(st);
 687    TCGType type = rt->base_type;
 688    int can;
 689
 690    tcg_debug_assert(at->base_type >= type);
 691    tcg_assert_listed_vecop(opc);
 692    can = tcg_can_emit_vec_op(opc, type, vece);
 693    if (can > 0) {
 694        vec_gen_3(opc, type, vece, ri, ai, si);
 695    } else if (can < 0) {
 696        const TCGOpcode *hold_list = tcg_swap_vecop_list(NULL);
 697        tcg_expand_vec_op(opc, type, vece, ri, ai, si);
 698        tcg_swap_vecop_list(hold_list);
 699    } else {
 700        g_assert_not_reached();
 701    }
 702}
 703
 704void tcg_gen_shls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
 705{
 706    do_shifts(vece, r, a, b, INDEX_op_shls_vec);
 707}
 708
 709void tcg_gen_shrs_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
 710{
 711    do_shifts(vece, r, a, b, INDEX_op_shrs_vec);
 712}
 713
 714void tcg_gen_sars_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 b)
 715{
 716    do_shifts(vece, r, a, b, INDEX_op_sars_vec);
 717}
 718
 719void tcg_gen_rotls_vec(unsigned vece, TCGv_vec r, TCGv_vec a, TCGv_i32 s)
 720{
 721    do_shifts(vece, r, a, s, INDEX_op_rotls_vec);
 722}
 723
 724void tcg_gen_bitsel_vec(unsigned vece, TCGv_vec r, TCGv_vec a,
 725                        TCGv_vec b, TCGv_vec c)
 726{
 727    TCGTemp *rt = tcgv_vec_temp(r);
 728    TCGTemp *at = tcgv_vec_temp(a);
 729    TCGTemp *bt = tcgv_vec_temp(b);
 730    TCGTemp *ct = tcgv_vec_temp(c);
 731    TCGType type = rt->base_type;
 732
 733    tcg_debug_assert(at->base_type >= type);
 734    tcg_debug_assert(bt->base_type >= type);
 735    tcg_debug_assert(ct->base_type >= type);
 736
 737    if (TCG_TARGET_HAS_bitsel_vec) {
 738        vec_gen_4(INDEX_op_bitsel_vec, type, MO_8,
 739                  temp_arg(rt), temp_arg(at), temp_arg(bt), temp_arg(ct));
 740    } else {
 741        TCGv_vec t = tcg_temp_new_vec(type);
 742        tcg_gen_and_vec(MO_8, t, a, b);
 743        tcg_gen_andc_vec(MO_8, r, c, a);
 744        tcg_gen_or_vec(MO_8, r, r, t);
 745        tcg_temp_free_vec(t);
 746    }
 747}
 748
 749void tcg_gen_cmpsel_vec(TCGCond cond, unsigned vece, TCGv_vec r,
 750                        TCGv_vec a, TCGv_vec b, TCGv_vec c, TCGv_vec d)
 751{
 752    TCGTemp *rt = tcgv_vec_temp(r);
 753    TCGTemp *at = tcgv_vec_temp(a);
 754    TCGTemp *bt = tcgv_vec_temp(b);
 755    TCGTemp *ct = tcgv_vec_temp(c);
 756    TCGTemp *dt = tcgv_vec_temp(d);
 757    TCGArg ri = temp_arg(rt);
 758    TCGArg ai = temp_arg(at);
 759    TCGArg bi = temp_arg(bt);
 760    TCGArg ci = temp_arg(ct);
 761    TCGArg di = temp_arg(dt);
 762    TCGType type = rt->base_type;
 763    const TCGOpcode *hold_list;
 764    int can;
 765
 766    tcg_debug_assert(at->base_type >= type);
 767    tcg_debug_assert(bt->base_type >= type);
 768    tcg_debug_assert(ct->base_type >= type);
 769    tcg_debug_assert(dt->base_type >= type);
 770
 771    tcg_assert_listed_vecop(INDEX_op_cmpsel_vec);
 772    hold_list = tcg_swap_vecop_list(NULL);
 773    can = tcg_can_emit_vec_op(INDEX_op_cmpsel_vec, type, vece);
 774
 775    if (can > 0) {
 776        vec_gen_6(INDEX_op_cmpsel_vec, type, vece, ri, ai, bi, ci, di, cond);
 777    } else if (can < 0) {
 778        tcg_expand_vec_op(INDEX_op_cmpsel_vec, type, vece,
 779                          ri, ai, bi, ci, di, cond);
 780    } else {
 781        TCGv_vec t = tcg_temp_new_vec(type);
 782        tcg_gen_cmp_vec(cond, vece, t, a, b);
 783        tcg_gen_bitsel_vec(vece, r, t, c, d);
 784        tcg_temp_free_vec(t);
 785    }
 786    tcg_swap_vecop_list(hold_list);
 787}
 788