qemu/accel/tcg/tcg-runtime-gvec.c
<<
>>
Prefs
   1/*
   2 * Generic vectorized operation runtime
   3 *
   4 * Copyright (c) 2018 Linaro
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2.1 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include "qemu/host-utils.h"
  22#include "cpu.h"
  23#include "exec/helper-proto-common.h"
  24#include "tcg/tcg-gvec-desc.h"
  25
  26
  27static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc)
  28{
  29    intptr_t maxsz = simd_maxsz(desc);
  30    intptr_t i;
  31
  32    if (unlikely(maxsz > oprsz)) {
  33        for (i = oprsz; i < maxsz; i += sizeof(uint64_t)) {
  34            *(uint64_t *)(d + i) = 0;
  35        }
  36    }
  37}
  38
  39void HELPER(gvec_add8)(void *d, void *a, void *b, uint32_t desc)
  40{
  41    intptr_t oprsz = simd_oprsz(desc);
  42    intptr_t i;
  43
  44    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
  45        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + *(uint8_t *)(b + i);
  46    }
  47    clear_high(d, oprsz, desc);
  48}
  49
  50void HELPER(gvec_add16)(void *d, void *a, void *b, uint32_t desc)
  51{
  52    intptr_t oprsz = simd_oprsz(desc);
  53    intptr_t i;
  54
  55    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
  56        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + *(uint16_t *)(b + i);
  57    }
  58    clear_high(d, oprsz, desc);
  59}
  60
  61void HELPER(gvec_add32)(void *d, void *a, void *b, uint32_t desc)
  62{
  63    intptr_t oprsz = simd_oprsz(desc);
  64    intptr_t i;
  65
  66    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
  67        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + *(uint32_t *)(b + i);
  68    }
  69    clear_high(d, oprsz, desc);
  70}
  71
  72void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc)
  73{
  74    intptr_t oprsz = simd_oprsz(desc);
  75    intptr_t i;
  76
  77    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
  78        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + *(uint64_t *)(b + i);
  79    }
  80    clear_high(d, oprsz, desc);
  81}
  82
  83void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc)
  84{
  85    intptr_t oprsz = simd_oprsz(desc);
  86    intptr_t i;
  87
  88    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
  89        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + (uint8_t)b;
  90    }
  91    clear_high(d, oprsz, desc);
  92}
  93
  94void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc)
  95{
  96    intptr_t oprsz = simd_oprsz(desc);
  97    intptr_t i;
  98
  99    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
 100        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + (uint16_t)b;
 101    }
 102    clear_high(d, oprsz, desc);
 103}
 104
 105void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc)
 106{
 107    intptr_t oprsz = simd_oprsz(desc);
 108    intptr_t i;
 109
 110    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
 111        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + (uint32_t)b;
 112    }
 113    clear_high(d, oprsz, desc);
 114}
 115
 116void HELPER(gvec_adds64)(void *d, void *a, uint64_t b, uint32_t desc)
 117{
 118    intptr_t oprsz = simd_oprsz(desc);
 119    intptr_t i;
 120
 121    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 122        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + b;
 123    }
 124    clear_high(d, oprsz, desc);
 125}
 126
 127void HELPER(gvec_sub8)(void *d, void *a, void *b, uint32_t desc)
 128{
 129    intptr_t oprsz = simd_oprsz(desc);
 130    intptr_t i;
 131
 132    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
 133        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - *(uint8_t *)(b + i);
 134    }
 135    clear_high(d, oprsz, desc);
 136}
 137
 138void HELPER(gvec_sub16)(void *d, void *a, void *b, uint32_t desc)
 139{
 140    intptr_t oprsz = simd_oprsz(desc);
 141    intptr_t i;
 142
 143    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
 144        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - *(uint16_t *)(b + i);
 145    }
 146    clear_high(d, oprsz, desc);
 147}
 148
 149void HELPER(gvec_sub32)(void *d, void *a, void *b, uint32_t desc)
 150{
 151    intptr_t oprsz = simd_oprsz(desc);
 152    intptr_t i;
 153
 154    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
 155        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - *(uint32_t *)(b + i);
 156    }
 157    clear_high(d, oprsz, desc);
 158}
 159
 160void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc)
 161{
 162    intptr_t oprsz = simd_oprsz(desc);
 163    intptr_t i;
 164
 165    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 166        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - *(uint64_t *)(b + i);
 167    }
 168    clear_high(d, oprsz, desc);
 169}
 170
 171void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc)
 172{
 173    intptr_t oprsz = simd_oprsz(desc);
 174    intptr_t i;
 175
 176    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
 177        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - (uint8_t)b;
 178    }
 179    clear_high(d, oprsz, desc);
 180}
 181
 182void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc)
 183{
 184    intptr_t oprsz = simd_oprsz(desc);
 185    intptr_t i;
 186
 187    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
 188        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - (uint16_t)b;
 189    }
 190    clear_high(d, oprsz, desc);
 191}
 192
 193void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc)
 194{
 195    intptr_t oprsz = simd_oprsz(desc);
 196    intptr_t i;
 197
 198    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
 199        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - (uint32_t)b;
 200    }
 201    clear_high(d, oprsz, desc);
 202}
 203
 204void HELPER(gvec_subs64)(void *d, void *a, uint64_t b, uint32_t desc)
 205{
 206    intptr_t oprsz = simd_oprsz(desc);
 207    intptr_t i;
 208
 209    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 210        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - b;
 211    }
 212    clear_high(d, oprsz, desc);
 213}
 214
 215void HELPER(gvec_mul8)(void *d, void *a, void *b, uint32_t desc)
 216{
 217    intptr_t oprsz = simd_oprsz(desc);
 218    intptr_t i;
 219
 220    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
 221        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * *(uint8_t *)(b + i);
 222    }
 223    clear_high(d, oprsz, desc);
 224}
 225
 226void HELPER(gvec_mul16)(void *d, void *a, void *b, uint32_t desc)
 227{
 228    intptr_t oprsz = simd_oprsz(desc);
 229    intptr_t i;
 230
 231    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
 232        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * *(uint16_t *)(b + i);
 233    }
 234    clear_high(d, oprsz, desc);
 235}
 236
 237void HELPER(gvec_mul32)(void *d, void *a, void *b, uint32_t desc)
 238{
 239    intptr_t oprsz = simd_oprsz(desc);
 240    intptr_t i;
 241
 242    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
 243        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * *(uint32_t *)(b + i);
 244    }
 245    clear_high(d, oprsz, desc);
 246}
 247
 248void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc)
 249{
 250    intptr_t oprsz = simd_oprsz(desc);
 251    intptr_t i;
 252
 253    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 254        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * *(uint64_t *)(b + i);
 255    }
 256    clear_high(d, oprsz, desc);
 257}
 258
 259void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc)
 260{
 261    intptr_t oprsz = simd_oprsz(desc);
 262    intptr_t i;
 263
 264    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
 265        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * (uint8_t)b;
 266    }
 267    clear_high(d, oprsz, desc);
 268}
 269
 270void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc)
 271{
 272    intptr_t oprsz = simd_oprsz(desc);
 273    intptr_t i;
 274
 275    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
 276        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * (uint16_t)b;
 277    }
 278    clear_high(d, oprsz, desc);
 279}
 280
 281void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc)
 282{
 283    intptr_t oprsz = simd_oprsz(desc);
 284    intptr_t i;
 285
 286    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
 287        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * (uint32_t)b;
 288    }
 289    clear_high(d, oprsz, desc);
 290}
 291
 292void HELPER(gvec_muls64)(void *d, void *a, uint64_t b, uint32_t desc)
 293{
 294    intptr_t oprsz = simd_oprsz(desc);
 295    intptr_t i;
 296
 297    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 298        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * b;
 299    }
 300    clear_high(d, oprsz, desc);
 301}
 302
 303void HELPER(gvec_neg8)(void *d, void *a, uint32_t desc)
 304{
 305    intptr_t oprsz = simd_oprsz(desc);
 306    intptr_t i;
 307
 308    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
 309        *(uint8_t *)(d + i) = -*(uint8_t *)(a + i);
 310    }
 311    clear_high(d, oprsz, desc);
 312}
 313
 314void HELPER(gvec_neg16)(void *d, void *a, uint32_t desc)
 315{
 316    intptr_t oprsz = simd_oprsz(desc);
 317    intptr_t i;
 318
 319    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
 320        *(uint16_t *)(d + i) = -*(uint16_t *)(a + i);
 321    }
 322    clear_high(d, oprsz, desc);
 323}
 324
 325void HELPER(gvec_neg32)(void *d, void *a, uint32_t desc)
 326{
 327    intptr_t oprsz = simd_oprsz(desc);
 328    intptr_t i;
 329
 330    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
 331        *(uint32_t *)(d + i) = -*(uint32_t *)(a + i);
 332    }
 333    clear_high(d, oprsz, desc);
 334}
 335
 336void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc)
 337{
 338    intptr_t oprsz = simd_oprsz(desc);
 339    intptr_t i;
 340
 341    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 342        *(uint64_t *)(d + i) = -*(uint64_t *)(a + i);
 343    }
 344    clear_high(d, oprsz, desc);
 345}
 346
 347void HELPER(gvec_abs8)(void *d, void *a, uint32_t desc)
 348{
 349    intptr_t oprsz = simd_oprsz(desc);
 350    intptr_t i;
 351
 352    for (i = 0; i < oprsz; i += sizeof(int8_t)) {
 353        int8_t aa = *(int8_t *)(a + i);
 354        *(int8_t *)(d + i) = aa < 0 ? -aa : aa;
 355    }
 356    clear_high(d, oprsz, desc);
 357}
 358
 359void HELPER(gvec_abs16)(void *d, void *a, uint32_t desc)
 360{
 361    intptr_t oprsz = simd_oprsz(desc);
 362    intptr_t i;
 363
 364    for (i = 0; i < oprsz; i += sizeof(int16_t)) {
 365        int16_t aa = *(int16_t *)(a + i);
 366        *(int16_t *)(d + i) = aa < 0 ? -aa : aa;
 367    }
 368    clear_high(d, oprsz, desc);
 369}
 370
 371void HELPER(gvec_abs32)(void *d, void *a, uint32_t desc)
 372{
 373    intptr_t oprsz = simd_oprsz(desc);
 374    intptr_t i;
 375
 376    for (i = 0; i < oprsz; i += sizeof(int32_t)) {
 377        int32_t aa = *(int32_t *)(a + i);
 378        *(int32_t *)(d + i) = aa < 0 ? -aa : aa;
 379    }
 380    clear_high(d, oprsz, desc);
 381}
 382
 383void HELPER(gvec_abs64)(void *d, void *a, uint32_t desc)
 384{
 385    intptr_t oprsz = simd_oprsz(desc);
 386    intptr_t i;
 387
 388    for (i = 0; i < oprsz; i += sizeof(int64_t)) {
 389        int64_t aa = *(int64_t *)(a + i);
 390        *(int64_t *)(d + i) = aa < 0 ? -aa : aa;
 391    }
 392    clear_high(d, oprsz, desc);
 393}
 394
 395void HELPER(gvec_mov)(void *d, void *a, uint32_t desc)
 396{
 397    intptr_t oprsz = simd_oprsz(desc);
 398
 399    memcpy(d, a, oprsz);
 400    clear_high(d, oprsz, desc);
 401}
 402
 403void HELPER(gvec_dup64)(void *d, uint32_t desc, uint64_t c)
 404{
 405    intptr_t oprsz = simd_oprsz(desc);
 406    intptr_t i;
 407
 408    if (c == 0) {
 409        oprsz = 0;
 410    } else {
 411        for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 412            *(uint64_t *)(d + i) = c;
 413        }
 414    }
 415    clear_high(d, oprsz, desc);
 416}
 417
 418void HELPER(gvec_dup32)(void *d, uint32_t desc, uint32_t c)
 419{
 420    intptr_t oprsz = simd_oprsz(desc);
 421    intptr_t i;
 422
 423    if (c == 0) {
 424        oprsz = 0;
 425    } else {
 426        for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
 427            *(uint32_t *)(d + i) = c;
 428        }
 429    }
 430    clear_high(d, oprsz, desc);
 431}
 432
 433void HELPER(gvec_dup16)(void *d, uint32_t desc, uint32_t c)
 434{
 435    HELPER(gvec_dup32)(d, desc, 0x00010001 * (c & 0xffff));
 436}
 437
 438void HELPER(gvec_dup8)(void *d, uint32_t desc, uint32_t c)
 439{
 440    HELPER(gvec_dup32)(d, desc, 0x01010101 * (c & 0xff));
 441}
 442
 443void HELPER(gvec_not)(void *d, void *a, uint32_t desc)
 444{
 445    intptr_t oprsz = simd_oprsz(desc);
 446    intptr_t i;
 447
 448    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 449        *(uint64_t *)(d + i) = ~*(uint64_t *)(a + i);
 450    }
 451    clear_high(d, oprsz, desc);
 452}
 453
 454void HELPER(gvec_and)(void *d, void *a, void *b, uint32_t desc)
 455{
 456    intptr_t oprsz = simd_oprsz(desc);
 457    intptr_t i;
 458
 459    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 460        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & *(uint64_t *)(b + i);
 461    }
 462    clear_high(d, oprsz, desc);
 463}
 464
 465void HELPER(gvec_or)(void *d, void *a, void *b, uint32_t desc)
 466{
 467    intptr_t oprsz = simd_oprsz(desc);
 468    intptr_t i;
 469
 470    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 471        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | *(uint64_t *)(b + i);
 472    }
 473    clear_high(d, oprsz, desc);
 474}
 475
 476void HELPER(gvec_xor)(void *d, void *a, void *b, uint32_t desc)
 477{
 478    intptr_t oprsz = simd_oprsz(desc);
 479    intptr_t i;
 480
 481    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 482        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ *(uint64_t *)(b + i);
 483    }
 484    clear_high(d, oprsz, desc);
 485}
 486
 487void HELPER(gvec_andc)(void *d, void *a, void *b, uint32_t desc)
 488{
 489    intptr_t oprsz = simd_oprsz(desc);
 490    intptr_t i;
 491
 492    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 493        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) &~ *(uint64_t *)(b + i);
 494    }
 495    clear_high(d, oprsz, desc);
 496}
 497
 498void HELPER(gvec_orc)(void *d, void *a, void *b, uint32_t desc)
 499{
 500    intptr_t oprsz = simd_oprsz(desc);
 501    intptr_t i;
 502
 503    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 504        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) |~ *(uint64_t *)(b + i);
 505    }
 506    clear_high(d, oprsz, desc);
 507}
 508
 509void HELPER(gvec_nand)(void *d, void *a, void *b, uint32_t desc)
 510{
 511    intptr_t oprsz = simd_oprsz(desc);
 512    intptr_t i;
 513
 514    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 515        *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) & *(uint64_t *)(b + i));
 516    }
 517    clear_high(d, oprsz, desc);
 518}
 519
 520void HELPER(gvec_nor)(void *d, void *a, void *b, uint32_t desc)
 521{
 522    intptr_t oprsz = simd_oprsz(desc);
 523    intptr_t i;
 524
 525    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 526        *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) | *(uint64_t *)(b + i));
 527    }
 528    clear_high(d, oprsz, desc);
 529}
 530
 531void HELPER(gvec_eqv)(void *d, void *a, void *b, uint32_t desc)
 532{
 533    intptr_t oprsz = simd_oprsz(desc);
 534    intptr_t i;
 535
 536    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 537        *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) ^ *(uint64_t *)(b + i));
 538    }
 539    clear_high(d, oprsz, desc);
 540}
 541
 542void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc)
 543{
 544    intptr_t oprsz = simd_oprsz(desc);
 545    intptr_t i;
 546
 547    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 548        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & b;
 549    }
 550    clear_high(d, oprsz, desc);
 551}
 552
 553void HELPER(gvec_andcs)(void *d, void *a, uint64_t b, uint32_t desc)
 554{
 555    intptr_t oprsz = simd_oprsz(desc);
 556    intptr_t i;
 557
 558    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 559        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & ~b;
 560    }
 561    clear_high(d, oprsz, desc);
 562}
 563
 564void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc)
 565{
 566    intptr_t oprsz = simd_oprsz(desc);
 567    intptr_t i;
 568
 569    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 570        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ b;
 571    }
 572    clear_high(d, oprsz, desc);
 573}
 574
 575void HELPER(gvec_ors)(void *d, void *a, uint64_t b, uint32_t desc)
 576{
 577    intptr_t oprsz = simd_oprsz(desc);
 578    intptr_t i;
 579
 580    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 581        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | b;
 582    }
 583    clear_high(d, oprsz, desc);
 584}
 585
 586void HELPER(gvec_shl8i)(void *d, void *a, uint32_t desc)
 587{
 588    intptr_t oprsz = simd_oprsz(desc);
 589    int shift = simd_data(desc);
 590    intptr_t i;
 591
 592    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
 593        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << shift;
 594    }
 595    clear_high(d, oprsz, desc);
 596}
 597
 598void HELPER(gvec_shl16i)(void *d, void *a, uint32_t desc)
 599{
 600    intptr_t oprsz = simd_oprsz(desc);
 601    int shift = simd_data(desc);
 602    intptr_t i;
 603
 604    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
 605        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << shift;
 606    }
 607    clear_high(d, oprsz, desc);
 608}
 609
 610void HELPER(gvec_shl32i)(void *d, void *a, uint32_t desc)
 611{
 612    intptr_t oprsz = simd_oprsz(desc);
 613    int shift = simd_data(desc);
 614    intptr_t i;
 615
 616    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
 617        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << shift;
 618    }
 619    clear_high(d, oprsz, desc);
 620}
 621
 622void HELPER(gvec_shl64i)(void *d, void *a, uint32_t desc)
 623{
 624    intptr_t oprsz = simd_oprsz(desc);
 625    int shift = simd_data(desc);
 626    intptr_t i;
 627
 628    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 629        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << shift;
 630    }
 631    clear_high(d, oprsz, desc);
 632}
 633
 634void HELPER(gvec_shr8i)(void *d, void *a, uint32_t desc)
 635{
 636    intptr_t oprsz = simd_oprsz(desc);
 637    int shift = simd_data(desc);
 638    intptr_t i;
 639
 640    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
 641        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> shift;
 642    }
 643    clear_high(d, oprsz, desc);
 644}
 645
 646void HELPER(gvec_shr16i)(void *d, void *a, uint32_t desc)
 647{
 648    intptr_t oprsz = simd_oprsz(desc);
 649    int shift = simd_data(desc);
 650    intptr_t i;
 651
 652    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
 653        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> shift;
 654    }
 655    clear_high(d, oprsz, desc);
 656}
 657
 658void HELPER(gvec_shr32i)(void *d, void *a, uint32_t desc)
 659{
 660    intptr_t oprsz = simd_oprsz(desc);
 661    int shift = simd_data(desc);
 662    intptr_t i;
 663
 664    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
 665        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> shift;
 666    }
 667    clear_high(d, oprsz, desc);
 668}
 669
 670void HELPER(gvec_shr64i)(void *d, void *a, uint32_t desc)
 671{
 672    intptr_t oprsz = simd_oprsz(desc);
 673    int shift = simd_data(desc);
 674    intptr_t i;
 675
 676    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 677        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> shift;
 678    }
 679    clear_high(d, oprsz, desc);
 680}
 681
 682void HELPER(gvec_sar8i)(void *d, void *a, uint32_t desc)
 683{
 684    intptr_t oprsz = simd_oprsz(desc);
 685    int shift = simd_data(desc);
 686    intptr_t i;
 687
 688    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
 689        *(int8_t *)(d + i) = *(int8_t *)(a + i) >> shift;
 690    }
 691    clear_high(d, oprsz, desc);
 692}
 693
 694void HELPER(gvec_sar16i)(void *d, void *a, uint32_t desc)
 695{
 696    intptr_t oprsz = simd_oprsz(desc);
 697    int shift = simd_data(desc);
 698    intptr_t i;
 699
 700    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
 701        *(int16_t *)(d + i) = *(int16_t *)(a + i) >> shift;
 702    }
 703    clear_high(d, oprsz, desc);
 704}
 705
 706void HELPER(gvec_sar32i)(void *d, void *a, uint32_t desc)
 707{
 708    intptr_t oprsz = simd_oprsz(desc);
 709    int shift = simd_data(desc);
 710    intptr_t i;
 711
 712    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
 713        *(int32_t *)(d + i) = *(int32_t *)(a + i) >> shift;
 714    }
 715    clear_high(d, oprsz, desc);
 716}
 717
 718void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc)
 719{
 720    intptr_t oprsz = simd_oprsz(desc);
 721    int shift = simd_data(desc);
 722    intptr_t i;
 723
 724    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 725        *(int64_t *)(d + i) = *(int64_t *)(a + i) >> shift;
 726    }
 727    clear_high(d, oprsz, desc);
 728}
 729
 730void HELPER(gvec_rotl8i)(void *d, void *a, uint32_t desc)
 731{
 732    intptr_t oprsz = simd_oprsz(desc);
 733    int shift = simd_data(desc);
 734    intptr_t i;
 735
 736    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
 737        *(uint8_t *)(d + i) = rol8(*(uint8_t *)(a + i), shift);
 738    }
 739    clear_high(d, oprsz, desc);
 740}
 741
 742void HELPER(gvec_rotl16i)(void *d, void *a, uint32_t desc)
 743{
 744    intptr_t oprsz = simd_oprsz(desc);
 745    int shift = simd_data(desc);
 746    intptr_t i;
 747
 748    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
 749        *(uint16_t *)(d + i) = rol16(*(uint16_t *)(a + i), shift);
 750    }
 751    clear_high(d, oprsz, desc);
 752}
 753
 754void HELPER(gvec_rotl32i)(void *d, void *a, uint32_t desc)
 755{
 756    intptr_t oprsz = simd_oprsz(desc);
 757    int shift = simd_data(desc);
 758    intptr_t i;
 759
 760    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
 761        *(uint32_t *)(d + i) = rol32(*(uint32_t *)(a + i), shift);
 762    }
 763    clear_high(d, oprsz, desc);
 764}
 765
 766void HELPER(gvec_rotl64i)(void *d, void *a, uint32_t desc)
 767{
 768    intptr_t oprsz = simd_oprsz(desc);
 769    int shift = simd_data(desc);
 770    intptr_t i;
 771
 772    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 773        *(uint64_t *)(d + i) = rol64(*(uint64_t *)(a + i), shift);
 774    }
 775    clear_high(d, oprsz, desc);
 776}
 777
 778void HELPER(gvec_shl8v)(void *d, void *a, void *b, uint32_t desc)
 779{
 780    intptr_t oprsz = simd_oprsz(desc);
 781    intptr_t i;
 782
 783    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
 784        uint8_t sh = *(uint8_t *)(b + i) & 7;
 785        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << sh;
 786    }
 787    clear_high(d, oprsz, desc);
 788}
 789
 790void HELPER(gvec_shl16v)(void *d, void *a, void *b, uint32_t desc)
 791{
 792    intptr_t oprsz = simd_oprsz(desc);
 793    intptr_t i;
 794
 795    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
 796        uint8_t sh = *(uint16_t *)(b + i) & 15;
 797        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << sh;
 798    }
 799    clear_high(d, oprsz, desc);
 800}
 801
 802void HELPER(gvec_shl32v)(void *d, void *a, void *b, uint32_t desc)
 803{
 804    intptr_t oprsz = simd_oprsz(desc);
 805    intptr_t i;
 806
 807    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
 808        uint8_t sh = *(uint32_t *)(b + i) & 31;
 809        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << sh;
 810    }
 811    clear_high(d, oprsz, desc);
 812}
 813
 814void HELPER(gvec_shl64v)(void *d, void *a, void *b, uint32_t desc)
 815{
 816    intptr_t oprsz = simd_oprsz(desc);
 817    intptr_t i;
 818
 819    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 820        uint8_t sh = *(uint64_t *)(b + i) & 63;
 821        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << sh;
 822    }
 823    clear_high(d, oprsz, desc);
 824}
 825
 826void HELPER(gvec_shr8v)(void *d, void *a, void *b, uint32_t desc)
 827{
 828    intptr_t oprsz = simd_oprsz(desc);
 829    intptr_t i;
 830
 831    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
 832        uint8_t sh = *(uint8_t *)(b + i) & 7;
 833        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> sh;
 834    }
 835    clear_high(d, oprsz, desc);
 836}
 837
 838void HELPER(gvec_shr16v)(void *d, void *a, void *b, uint32_t desc)
 839{
 840    intptr_t oprsz = simd_oprsz(desc);
 841    intptr_t i;
 842
 843    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
 844        uint8_t sh = *(uint16_t *)(b + i) & 15;
 845        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> sh;
 846    }
 847    clear_high(d, oprsz, desc);
 848}
 849
 850void HELPER(gvec_shr32v)(void *d, void *a, void *b, uint32_t desc)
 851{
 852    intptr_t oprsz = simd_oprsz(desc);
 853    intptr_t i;
 854
 855    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
 856        uint8_t sh = *(uint32_t *)(b + i) & 31;
 857        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> sh;
 858    }
 859    clear_high(d, oprsz, desc);
 860}
 861
 862void HELPER(gvec_shr64v)(void *d, void *a, void *b, uint32_t desc)
 863{
 864    intptr_t oprsz = simd_oprsz(desc);
 865    intptr_t i;
 866
 867    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 868        uint8_t sh = *(uint64_t *)(b + i) & 63;
 869        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> sh;
 870    }
 871    clear_high(d, oprsz, desc);
 872}
 873
 874void HELPER(gvec_sar8v)(void *d, void *a, void *b, uint32_t desc)
 875{
 876    intptr_t oprsz = simd_oprsz(desc);
 877    intptr_t i;
 878
 879    for (i = 0; i < oprsz; i += sizeof(int8_t)) {
 880        uint8_t sh = *(uint8_t *)(b + i) & 7;
 881        *(int8_t *)(d + i) = *(int8_t *)(a + i) >> sh;
 882    }
 883    clear_high(d, oprsz, desc);
 884}
 885
 886void HELPER(gvec_sar16v)(void *d, void *a, void *b, uint32_t desc)
 887{
 888    intptr_t oprsz = simd_oprsz(desc);
 889    intptr_t i;
 890
 891    for (i = 0; i < oprsz; i += sizeof(int16_t)) {
 892        uint8_t sh = *(uint16_t *)(b + i) & 15;
 893        *(int16_t *)(d + i) = *(int16_t *)(a + i) >> sh;
 894    }
 895    clear_high(d, oprsz, desc);
 896}
 897
 898void HELPER(gvec_sar32v)(void *d, void *a, void *b, uint32_t desc)
 899{
 900    intptr_t oprsz = simd_oprsz(desc);
 901    intptr_t i;
 902
 903    for (i = 0; i < oprsz; i += sizeof(int32_t)) {
 904        uint8_t sh = *(uint32_t *)(b + i) & 31;
 905        *(int32_t *)(d + i) = *(int32_t *)(a + i) >> sh;
 906    }
 907    clear_high(d, oprsz, desc);
 908}
 909
 910void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc)
 911{
 912    intptr_t oprsz = simd_oprsz(desc);
 913    intptr_t i;
 914
 915    for (i = 0; i < oprsz; i += sizeof(int64_t)) {
 916        uint8_t sh = *(uint64_t *)(b + i) & 63;
 917        *(int64_t *)(d + i) = *(int64_t *)(a + i) >> sh;
 918    }
 919    clear_high(d, oprsz, desc);
 920}
 921
 922void HELPER(gvec_rotl8v)(void *d, void *a, void *b, uint32_t desc)
 923{
 924    intptr_t oprsz = simd_oprsz(desc);
 925    intptr_t i;
 926
 927    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
 928        uint8_t sh = *(uint8_t *)(b + i) & 7;
 929        *(uint8_t *)(d + i) = rol8(*(uint8_t *)(a + i), sh);
 930    }
 931    clear_high(d, oprsz, desc);
 932}
 933
 934void HELPER(gvec_rotl16v)(void *d, void *a, void *b, uint32_t desc)
 935{
 936    intptr_t oprsz = simd_oprsz(desc);
 937    intptr_t i;
 938
 939    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
 940        uint8_t sh = *(uint16_t *)(b + i) & 15;
 941        *(uint16_t *)(d + i) = rol16(*(uint16_t *)(a + i), sh);
 942    }
 943    clear_high(d, oprsz, desc);
 944}
 945
 946void HELPER(gvec_rotl32v)(void *d, void *a, void *b, uint32_t desc)
 947{
 948    intptr_t oprsz = simd_oprsz(desc);
 949    intptr_t i;
 950
 951    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
 952        uint8_t sh = *(uint32_t *)(b + i) & 31;
 953        *(uint32_t *)(d + i) = rol32(*(uint32_t *)(a + i), sh);
 954    }
 955    clear_high(d, oprsz, desc);
 956}
 957
 958void HELPER(gvec_rotl64v)(void *d, void *a, void *b, uint32_t desc)
 959{
 960    intptr_t oprsz = simd_oprsz(desc);
 961    intptr_t i;
 962
 963    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 964        uint8_t sh = *(uint64_t *)(b + i) & 63;
 965        *(uint64_t *)(d + i) = rol64(*(uint64_t *)(a + i), sh);
 966    }
 967    clear_high(d, oprsz, desc);
 968}
 969
 970void HELPER(gvec_rotr8v)(void *d, void *a, void *b, uint32_t desc)
 971{
 972    intptr_t oprsz = simd_oprsz(desc);
 973    intptr_t i;
 974
 975    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
 976        uint8_t sh = *(uint8_t *)(b + i) & 7;
 977        *(uint8_t *)(d + i) = ror8(*(uint8_t *)(a + i), sh);
 978    }
 979    clear_high(d, oprsz, desc);
 980}
 981
 982void HELPER(gvec_rotr16v)(void *d, void *a, void *b, uint32_t desc)
 983{
 984    intptr_t oprsz = simd_oprsz(desc);
 985    intptr_t i;
 986
 987    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
 988        uint8_t sh = *(uint16_t *)(b + i) & 15;
 989        *(uint16_t *)(d + i) = ror16(*(uint16_t *)(a + i), sh);
 990    }
 991    clear_high(d, oprsz, desc);
 992}
 993
 994void HELPER(gvec_rotr32v)(void *d, void *a, void *b, uint32_t desc)
 995{
 996    intptr_t oprsz = simd_oprsz(desc);
 997    intptr_t i;
 998
 999    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1000        uint8_t sh = *(uint32_t *)(b + i) & 31;
1001        *(uint32_t *)(d + i) = ror32(*(uint32_t *)(a + i), sh);
1002    }
1003    clear_high(d, oprsz, desc);
1004}
1005
1006void HELPER(gvec_rotr64v)(void *d, void *a, void *b, uint32_t desc)
1007{
1008    intptr_t oprsz = simd_oprsz(desc);
1009    intptr_t i;
1010
1011    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1012        uint8_t sh = *(uint64_t *)(b + i) & 63;
1013        *(uint64_t *)(d + i) = ror64(*(uint64_t *)(a + i), sh);
1014    }
1015    clear_high(d, oprsz, desc);
1016}
1017
1018#define DO_CMP1(NAME, TYPE, OP)                                            \
1019void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc)                \
1020{                                                                          \
1021    intptr_t oprsz = simd_oprsz(desc);                                     \
1022    intptr_t i;                                                            \
1023    for (i = 0; i < oprsz; i += sizeof(TYPE)) {                            \
1024        *(TYPE *)(d + i) = -(*(TYPE *)(a + i) OP *(TYPE *)(b + i));        \
1025    }                                                                      \
1026    clear_high(d, oprsz, desc);                                            \
1027}
1028
1029#define DO_CMP2(SZ) \
1030    DO_CMP1(gvec_eq##SZ, uint##SZ##_t, ==)    \
1031    DO_CMP1(gvec_ne##SZ, uint##SZ##_t, !=)    \
1032    DO_CMP1(gvec_lt##SZ, int##SZ##_t, <)      \
1033    DO_CMP1(gvec_le##SZ, int##SZ##_t, <=)     \
1034    DO_CMP1(gvec_ltu##SZ, uint##SZ##_t, <)    \
1035    DO_CMP1(gvec_leu##SZ, uint##SZ##_t, <=)
1036
1037DO_CMP2(8)
1038DO_CMP2(16)
1039DO_CMP2(32)
1040DO_CMP2(64)
1041
1042#undef DO_CMP1
1043#undef DO_CMP2
1044
1045void HELPER(gvec_ssadd8)(void *d, void *a, void *b, uint32_t desc)
1046{
1047    intptr_t oprsz = simd_oprsz(desc);
1048    intptr_t i;
1049
1050    for (i = 0; i < oprsz; i += sizeof(int8_t)) {
1051        int r = *(int8_t *)(a + i) + *(int8_t *)(b + i);
1052        if (r > INT8_MAX) {
1053            r = INT8_MAX;
1054        } else if (r < INT8_MIN) {
1055            r = INT8_MIN;
1056        }
1057        *(int8_t *)(d + i) = r;
1058    }
1059    clear_high(d, oprsz, desc);
1060}
1061
1062void HELPER(gvec_ssadd16)(void *d, void *a, void *b, uint32_t desc)
1063{
1064    intptr_t oprsz = simd_oprsz(desc);
1065    intptr_t i;
1066
1067    for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1068        int r = *(int16_t *)(a + i) + *(int16_t *)(b + i);
1069        if (r > INT16_MAX) {
1070            r = INT16_MAX;
1071        } else if (r < INT16_MIN) {
1072            r = INT16_MIN;
1073        }
1074        *(int16_t *)(d + i) = r;
1075    }
1076    clear_high(d, oprsz, desc);
1077}
1078
1079void HELPER(gvec_ssadd32)(void *d, void *a, void *b, uint32_t desc)
1080{
1081    intptr_t oprsz = simd_oprsz(desc);
1082    intptr_t i;
1083
1084    for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1085        int32_t ai = *(int32_t *)(a + i);
1086        int32_t bi = *(int32_t *)(b + i);
1087        int32_t di;
1088        if (sadd32_overflow(ai, bi, &di)) {
1089            di = (di < 0 ? INT32_MAX : INT32_MIN);
1090        }
1091        *(int32_t *)(d + i) = di;
1092    }
1093    clear_high(d, oprsz, desc);
1094}
1095
1096void HELPER(gvec_ssadd64)(void *d, void *a, void *b, uint32_t desc)
1097{
1098    intptr_t oprsz = simd_oprsz(desc);
1099    intptr_t i;
1100
1101    for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1102        int64_t ai = *(int64_t *)(a + i);
1103        int64_t bi = *(int64_t *)(b + i);
1104        int64_t di;
1105        if (sadd64_overflow(ai, bi, &di)) {
1106            di = (di < 0 ? INT64_MAX : INT64_MIN);
1107        }
1108        *(int64_t *)(d + i) = di;
1109    }
1110    clear_high(d, oprsz, desc);
1111}
1112
1113void HELPER(gvec_sssub8)(void *d, void *a, void *b, uint32_t desc)
1114{
1115    intptr_t oprsz = simd_oprsz(desc);
1116    intptr_t i;
1117
1118    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1119        int r = *(int8_t *)(a + i) - *(int8_t *)(b + i);
1120        if (r > INT8_MAX) {
1121            r = INT8_MAX;
1122        } else if (r < INT8_MIN) {
1123            r = INT8_MIN;
1124        }
1125        *(uint8_t *)(d + i) = r;
1126    }
1127    clear_high(d, oprsz, desc);
1128}
1129
1130void HELPER(gvec_sssub16)(void *d, void *a, void *b, uint32_t desc)
1131{
1132    intptr_t oprsz = simd_oprsz(desc);
1133    intptr_t i;
1134
1135    for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1136        int r = *(int16_t *)(a + i) - *(int16_t *)(b + i);
1137        if (r > INT16_MAX) {
1138            r = INT16_MAX;
1139        } else if (r < INT16_MIN) {
1140            r = INT16_MIN;
1141        }
1142        *(int16_t *)(d + i) = r;
1143    }
1144    clear_high(d, oprsz, desc);
1145}
1146
1147void HELPER(gvec_sssub32)(void *d, void *a, void *b, uint32_t desc)
1148{
1149    intptr_t oprsz = simd_oprsz(desc);
1150    intptr_t i;
1151
1152    for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1153        int32_t ai = *(int32_t *)(a + i);
1154        int32_t bi = *(int32_t *)(b + i);
1155        int32_t di;
1156        if (ssub32_overflow(ai, bi, &di)) {
1157            di = (di < 0 ? INT32_MAX : INT32_MIN);
1158        }
1159        *(int32_t *)(d + i) = di;
1160    }
1161    clear_high(d, oprsz, desc);
1162}
1163
1164void HELPER(gvec_sssub64)(void *d, void *a, void *b, uint32_t desc)
1165{
1166    intptr_t oprsz = simd_oprsz(desc);
1167    intptr_t i;
1168
1169    for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1170        int64_t ai = *(int64_t *)(a + i);
1171        int64_t bi = *(int64_t *)(b + i);
1172        int64_t di;
1173        if (ssub64_overflow(ai, bi, &di)) {
1174            di = (di < 0 ? INT64_MAX : INT64_MIN);
1175        }
1176        *(int64_t *)(d + i) = di;
1177    }
1178    clear_high(d, oprsz, desc);
1179}
1180
1181void HELPER(gvec_usadd8)(void *d, void *a, void *b, uint32_t desc)
1182{
1183    intptr_t oprsz = simd_oprsz(desc);
1184    intptr_t i;
1185
1186    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1187        unsigned r = *(uint8_t *)(a + i) + *(uint8_t *)(b + i);
1188        if (r > UINT8_MAX) {
1189            r = UINT8_MAX;
1190        }
1191        *(uint8_t *)(d + i) = r;
1192    }
1193    clear_high(d, oprsz, desc);
1194}
1195
1196void HELPER(gvec_usadd16)(void *d, void *a, void *b, uint32_t desc)
1197{
1198    intptr_t oprsz = simd_oprsz(desc);
1199    intptr_t i;
1200
1201    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1202        unsigned r = *(uint16_t *)(a + i) + *(uint16_t *)(b + i);
1203        if (r > UINT16_MAX) {
1204            r = UINT16_MAX;
1205        }
1206        *(uint16_t *)(d + i) = r;
1207    }
1208    clear_high(d, oprsz, desc);
1209}
1210
1211void HELPER(gvec_usadd32)(void *d, void *a, void *b, uint32_t desc)
1212{
1213    intptr_t oprsz = simd_oprsz(desc);
1214    intptr_t i;
1215
1216    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1217        uint32_t ai = *(uint32_t *)(a + i);
1218        uint32_t bi = *(uint32_t *)(b + i);
1219        uint32_t di;
1220        if (uadd32_overflow(ai, bi, &di)) {
1221            di = UINT32_MAX;
1222        }
1223        *(uint32_t *)(d + i) = di;
1224    }
1225    clear_high(d, oprsz, desc);
1226}
1227
1228void HELPER(gvec_usadd64)(void *d, void *a, void *b, uint32_t desc)
1229{
1230    intptr_t oprsz = simd_oprsz(desc);
1231    intptr_t i;
1232
1233    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1234        uint64_t ai = *(uint64_t *)(a + i);
1235        uint64_t bi = *(uint64_t *)(b + i);
1236        uint64_t di;
1237        if (uadd64_overflow(ai, bi, &di)) {
1238            di = UINT64_MAX;
1239        }
1240        *(uint64_t *)(d + i) = di;
1241    }
1242    clear_high(d, oprsz, desc);
1243}
1244
1245void HELPER(gvec_ussub8)(void *d, void *a, void *b, uint32_t desc)
1246{
1247    intptr_t oprsz = simd_oprsz(desc);
1248    intptr_t i;
1249
1250    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1251        int r = *(uint8_t *)(a + i) - *(uint8_t *)(b + i);
1252        if (r < 0) {
1253            r = 0;
1254        }
1255        *(uint8_t *)(d + i) = r;
1256    }
1257    clear_high(d, oprsz, desc);
1258}
1259
1260void HELPER(gvec_ussub16)(void *d, void *a, void *b, uint32_t desc)
1261{
1262    intptr_t oprsz = simd_oprsz(desc);
1263    intptr_t i;
1264
1265    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1266        int r = *(uint16_t *)(a + i) - *(uint16_t *)(b + i);
1267        if (r < 0) {
1268            r = 0;
1269        }
1270        *(uint16_t *)(d + i) = r;
1271    }
1272    clear_high(d, oprsz, desc);
1273}
1274
1275void HELPER(gvec_ussub32)(void *d, void *a, void *b, uint32_t desc)
1276{
1277    intptr_t oprsz = simd_oprsz(desc);
1278    intptr_t i;
1279
1280    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1281        uint32_t ai = *(uint32_t *)(a + i);
1282        uint32_t bi = *(uint32_t *)(b + i);
1283        uint32_t di;
1284        if (usub32_overflow(ai, bi, &di)) {
1285            di = 0;
1286        }
1287        *(uint32_t *)(d + i) = di;
1288    }
1289    clear_high(d, oprsz, desc);
1290}
1291
1292void HELPER(gvec_ussub64)(void *d, void *a, void *b, uint32_t desc)
1293{
1294    intptr_t oprsz = simd_oprsz(desc);
1295    intptr_t i;
1296
1297    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1298        uint64_t ai = *(uint64_t *)(a + i);
1299        uint64_t bi = *(uint64_t *)(b + i);
1300        uint64_t di;
1301        if (usub64_overflow(ai, bi, &di)) {
1302            di = 0;
1303        }
1304        *(uint64_t *)(d + i) = di;
1305    }
1306    clear_high(d, oprsz, desc);
1307}
1308
1309void HELPER(gvec_smin8)(void *d, void *a, void *b, uint32_t desc)
1310{
1311    intptr_t oprsz = simd_oprsz(desc);
1312    intptr_t i;
1313
1314    for (i = 0; i < oprsz; i += sizeof(int8_t)) {
1315        int8_t aa = *(int8_t *)(a + i);
1316        int8_t bb = *(int8_t *)(b + i);
1317        int8_t dd = aa < bb ? aa : bb;
1318        *(int8_t *)(d + i) = dd;
1319    }
1320    clear_high(d, oprsz, desc);
1321}
1322
1323void HELPER(gvec_smin16)(void *d, void *a, void *b, uint32_t desc)
1324{
1325    intptr_t oprsz = simd_oprsz(desc);
1326    intptr_t i;
1327
1328    for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1329        int16_t aa = *(int16_t *)(a + i);
1330        int16_t bb = *(int16_t *)(b + i);
1331        int16_t dd = aa < bb ? aa : bb;
1332        *(int16_t *)(d + i) = dd;
1333    }
1334    clear_high(d, oprsz, desc);
1335}
1336
1337void HELPER(gvec_smin32)(void *d, void *a, void *b, uint32_t desc)
1338{
1339    intptr_t oprsz = simd_oprsz(desc);
1340    intptr_t i;
1341
1342    for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1343        int32_t aa = *(int32_t *)(a + i);
1344        int32_t bb = *(int32_t *)(b + i);
1345        int32_t dd = aa < bb ? aa : bb;
1346        *(int32_t *)(d + i) = dd;
1347    }
1348    clear_high(d, oprsz, desc);
1349}
1350
1351void HELPER(gvec_smin64)(void *d, void *a, void *b, uint32_t desc)
1352{
1353    intptr_t oprsz = simd_oprsz(desc);
1354    intptr_t i;
1355
1356    for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1357        int64_t aa = *(int64_t *)(a + i);
1358        int64_t bb = *(int64_t *)(b + i);
1359        int64_t dd = aa < bb ? aa : bb;
1360        *(int64_t *)(d + i) = dd;
1361    }
1362    clear_high(d, oprsz, desc);
1363}
1364
1365void HELPER(gvec_smax8)(void *d, void *a, void *b, uint32_t desc)
1366{
1367    intptr_t oprsz = simd_oprsz(desc);
1368    intptr_t i;
1369
1370    for (i = 0; i < oprsz; i += sizeof(int8_t)) {
1371        int8_t aa = *(int8_t *)(a + i);
1372        int8_t bb = *(int8_t *)(b + i);
1373        int8_t dd = aa > bb ? aa : bb;
1374        *(int8_t *)(d + i) = dd;
1375    }
1376    clear_high(d, oprsz, desc);
1377}
1378
1379void HELPER(gvec_smax16)(void *d, void *a, void *b, uint32_t desc)
1380{
1381    intptr_t oprsz = simd_oprsz(desc);
1382    intptr_t i;
1383
1384    for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1385        int16_t aa = *(int16_t *)(a + i);
1386        int16_t bb = *(int16_t *)(b + i);
1387        int16_t dd = aa > bb ? aa : bb;
1388        *(int16_t *)(d + i) = dd;
1389    }
1390    clear_high(d, oprsz, desc);
1391}
1392
1393void HELPER(gvec_smax32)(void *d, void *a, void *b, uint32_t desc)
1394{
1395    intptr_t oprsz = simd_oprsz(desc);
1396    intptr_t i;
1397
1398    for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1399        int32_t aa = *(int32_t *)(a + i);
1400        int32_t bb = *(int32_t *)(b + i);
1401        int32_t dd = aa > bb ? aa : bb;
1402        *(int32_t *)(d + i) = dd;
1403    }
1404    clear_high(d, oprsz, desc);
1405}
1406
1407void HELPER(gvec_smax64)(void *d, void *a, void *b, uint32_t desc)
1408{
1409    intptr_t oprsz = simd_oprsz(desc);
1410    intptr_t i;
1411
1412    for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1413        int64_t aa = *(int64_t *)(a + i);
1414        int64_t bb = *(int64_t *)(b + i);
1415        int64_t dd = aa > bb ? aa : bb;
1416        *(int64_t *)(d + i) = dd;
1417    }
1418    clear_high(d, oprsz, desc);
1419}
1420
1421void HELPER(gvec_umin8)(void *d, void *a, void *b, uint32_t desc)
1422{
1423    intptr_t oprsz = simd_oprsz(desc);
1424    intptr_t i;
1425
1426    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1427        uint8_t aa = *(uint8_t *)(a + i);
1428        uint8_t bb = *(uint8_t *)(b + i);
1429        uint8_t dd = aa < bb ? aa : bb;
1430        *(uint8_t *)(d + i) = dd;
1431    }
1432    clear_high(d, oprsz, desc);
1433}
1434
1435void HELPER(gvec_umin16)(void *d, void *a, void *b, uint32_t desc)
1436{
1437    intptr_t oprsz = simd_oprsz(desc);
1438    intptr_t i;
1439
1440    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1441        uint16_t aa = *(uint16_t *)(a + i);
1442        uint16_t bb = *(uint16_t *)(b + i);
1443        uint16_t dd = aa < bb ? aa : bb;
1444        *(uint16_t *)(d + i) = dd;
1445    }
1446    clear_high(d, oprsz, desc);
1447}
1448
1449void HELPER(gvec_umin32)(void *d, void *a, void *b, uint32_t desc)
1450{
1451    intptr_t oprsz = simd_oprsz(desc);
1452    intptr_t i;
1453
1454    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1455        uint32_t aa = *(uint32_t *)(a + i);
1456        uint32_t bb = *(uint32_t *)(b + i);
1457        uint32_t dd = aa < bb ? aa : bb;
1458        *(uint32_t *)(d + i) = dd;
1459    }
1460    clear_high(d, oprsz, desc);
1461}
1462
1463void HELPER(gvec_umin64)(void *d, void *a, void *b, uint32_t desc)
1464{
1465    intptr_t oprsz = simd_oprsz(desc);
1466    intptr_t i;
1467
1468    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1469        uint64_t aa = *(uint64_t *)(a + i);
1470        uint64_t bb = *(uint64_t *)(b + i);
1471        uint64_t dd = aa < bb ? aa : bb;
1472        *(uint64_t *)(d + i) = dd;
1473    }
1474    clear_high(d, oprsz, desc);
1475}
1476
1477void HELPER(gvec_umax8)(void *d, void *a, void *b, uint32_t desc)
1478{
1479    intptr_t oprsz = simd_oprsz(desc);
1480    intptr_t i;
1481
1482    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1483        uint8_t aa = *(uint8_t *)(a + i);
1484        uint8_t bb = *(uint8_t *)(b + i);
1485        uint8_t dd = aa > bb ? aa : bb;
1486        *(uint8_t *)(d + i) = dd;
1487    }
1488    clear_high(d, oprsz, desc);
1489}
1490
1491void HELPER(gvec_umax16)(void *d, void *a, void *b, uint32_t desc)
1492{
1493    intptr_t oprsz = simd_oprsz(desc);
1494    intptr_t i;
1495
1496    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1497        uint16_t aa = *(uint16_t *)(a + i);
1498        uint16_t bb = *(uint16_t *)(b + i);
1499        uint16_t dd = aa > bb ? aa : bb;
1500        *(uint16_t *)(d + i) = dd;
1501    }
1502    clear_high(d, oprsz, desc);
1503}
1504
1505void HELPER(gvec_umax32)(void *d, void *a, void *b, uint32_t desc)
1506{
1507    intptr_t oprsz = simd_oprsz(desc);
1508    intptr_t i;
1509
1510    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1511        uint32_t aa = *(uint32_t *)(a + i);
1512        uint32_t bb = *(uint32_t *)(b + i);
1513        uint32_t dd = aa > bb ? aa : bb;
1514        *(uint32_t *)(d + i) = dd;
1515    }
1516    clear_high(d, oprsz, desc);
1517}
1518
1519void HELPER(gvec_umax64)(void *d, void *a, void *b, uint32_t desc)
1520{
1521    intptr_t oprsz = simd_oprsz(desc);
1522    intptr_t i;
1523
1524    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1525        uint64_t aa = *(uint64_t *)(a + i);
1526        uint64_t bb = *(uint64_t *)(b + i);
1527        uint64_t dd = aa > bb ? aa : bb;
1528        *(uint64_t *)(d + i) = dd;
1529    }
1530    clear_high(d, oprsz, desc);
1531}
1532
1533void HELPER(gvec_bitsel)(void *d, void *a, void *b, void *c, uint32_t desc)
1534{
1535    intptr_t oprsz = simd_oprsz(desc);
1536    intptr_t i;
1537
1538    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1539        uint64_t aa = *(uint64_t *)(a + i);
1540        uint64_t bb = *(uint64_t *)(b + i);
1541        uint64_t cc = *(uint64_t *)(c + i);
1542        *(uint64_t *)(d + i) = (bb & aa) | (cc & ~aa);
1543    }
1544    clear_high(d, oprsz, desc);
1545}
1546