qemu/accel/tcg/tcg-runtime-gvec.c
<<
>>
Prefs
   1/*
   2 * Generic vectorized operation runtime
   3 *
   4 * Copyright (c) 2018 Linaro
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2.1 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include "qemu/host-utils.h"
  22#include "cpu.h"
  23#include "exec/helper-proto.h"
  24#include "tcg/tcg-gvec-desc.h"
  25
  26
  27static inline void clear_high(void *d, intptr_t oprsz, uint32_t desc)
  28{
  29    intptr_t maxsz = simd_maxsz(desc);
  30    intptr_t i;
  31
  32    if (unlikely(maxsz > oprsz)) {
  33        for (i = oprsz; i < maxsz; i += sizeof(uint64_t)) {
  34            *(uint64_t *)(d + i) = 0;
  35        }
  36    }
  37}
  38
  39void HELPER(gvec_add8)(void *d, void *a, void *b, uint32_t desc)
  40{
  41    intptr_t oprsz = simd_oprsz(desc);
  42    intptr_t i;
  43
  44    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
  45        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + *(uint8_t *)(b + i);
  46    }
  47    clear_high(d, oprsz, desc);
  48}
  49
  50void HELPER(gvec_add16)(void *d, void *a, void *b, uint32_t desc)
  51{
  52    intptr_t oprsz = simd_oprsz(desc);
  53    intptr_t i;
  54
  55    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
  56        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + *(uint16_t *)(b + i);
  57    }
  58    clear_high(d, oprsz, desc);
  59}
  60
  61void HELPER(gvec_add32)(void *d, void *a, void *b, uint32_t desc)
  62{
  63    intptr_t oprsz = simd_oprsz(desc);
  64    intptr_t i;
  65
  66    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
  67        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + *(uint32_t *)(b + i);
  68    }
  69    clear_high(d, oprsz, desc);
  70}
  71
  72void HELPER(gvec_add64)(void *d, void *a, void *b, uint32_t desc)
  73{
  74    intptr_t oprsz = simd_oprsz(desc);
  75    intptr_t i;
  76
  77    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
  78        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + *(uint64_t *)(b + i);
  79    }
  80    clear_high(d, oprsz, desc);
  81}
  82
  83void HELPER(gvec_adds8)(void *d, void *a, uint64_t b, uint32_t desc)
  84{
  85    intptr_t oprsz = simd_oprsz(desc);
  86    intptr_t i;
  87
  88    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
  89        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) + (uint8_t)b;
  90    }
  91    clear_high(d, oprsz, desc);
  92}
  93
  94void HELPER(gvec_adds16)(void *d, void *a, uint64_t b, uint32_t desc)
  95{
  96    intptr_t oprsz = simd_oprsz(desc);
  97    intptr_t i;
  98
  99    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
 100        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) + (uint16_t)b;
 101    }
 102    clear_high(d, oprsz, desc);
 103}
 104
 105void HELPER(gvec_adds32)(void *d, void *a, uint64_t b, uint32_t desc)
 106{
 107    intptr_t oprsz = simd_oprsz(desc);
 108    intptr_t i;
 109
 110    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
 111        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) + (uint32_t)b;
 112    }
 113    clear_high(d, oprsz, desc);
 114}
 115
 116void HELPER(gvec_adds64)(void *d, void *a, uint64_t b, uint32_t desc)
 117{
 118    intptr_t oprsz = simd_oprsz(desc);
 119    intptr_t i;
 120
 121    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 122        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) + b;
 123    }
 124    clear_high(d, oprsz, desc);
 125}
 126
 127void HELPER(gvec_sub8)(void *d, void *a, void *b, uint32_t desc)
 128{
 129    intptr_t oprsz = simd_oprsz(desc);
 130    intptr_t i;
 131
 132    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
 133        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - *(uint8_t *)(b + i);
 134    }
 135    clear_high(d, oprsz, desc);
 136}
 137
 138void HELPER(gvec_sub16)(void *d, void *a, void *b, uint32_t desc)
 139{
 140    intptr_t oprsz = simd_oprsz(desc);
 141    intptr_t i;
 142
 143    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
 144        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - *(uint16_t *)(b + i);
 145    }
 146    clear_high(d, oprsz, desc);
 147}
 148
 149void HELPER(gvec_sub32)(void *d, void *a, void *b, uint32_t desc)
 150{
 151    intptr_t oprsz = simd_oprsz(desc);
 152    intptr_t i;
 153
 154    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
 155        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - *(uint32_t *)(b + i);
 156    }
 157    clear_high(d, oprsz, desc);
 158}
 159
 160void HELPER(gvec_sub64)(void *d, void *a, void *b, uint32_t desc)
 161{
 162    intptr_t oprsz = simd_oprsz(desc);
 163    intptr_t i;
 164
 165    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 166        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - *(uint64_t *)(b + i);
 167    }
 168    clear_high(d, oprsz, desc);
 169}
 170
 171void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc)
 172{
 173    intptr_t oprsz = simd_oprsz(desc);
 174    intptr_t i;
 175
 176    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
 177        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) - (uint8_t)b;
 178    }
 179    clear_high(d, oprsz, desc);
 180}
 181
 182void HELPER(gvec_subs16)(void *d, void *a, uint64_t b, uint32_t desc)
 183{
 184    intptr_t oprsz = simd_oprsz(desc);
 185    intptr_t i;
 186
 187    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
 188        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) - (uint16_t)b;
 189    }
 190    clear_high(d, oprsz, desc);
 191}
 192
 193void HELPER(gvec_subs32)(void *d, void *a, uint64_t b, uint32_t desc)
 194{
 195    intptr_t oprsz = simd_oprsz(desc);
 196    intptr_t i;
 197
 198    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
 199        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) - (uint32_t)b;
 200    }
 201    clear_high(d, oprsz, desc);
 202}
 203
 204void HELPER(gvec_subs64)(void *d, void *a, uint64_t b, uint32_t desc)
 205{
 206    intptr_t oprsz = simd_oprsz(desc);
 207    intptr_t i;
 208
 209    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 210        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) - b;
 211    }
 212    clear_high(d, oprsz, desc);
 213}
 214
 215void HELPER(gvec_mul8)(void *d, void *a, void *b, uint32_t desc)
 216{
 217    intptr_t oprsz = simd_oprsz(desc);
 218    intptr_t i;
 219
 220    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
 221        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * *(uint8_t *)(b + i);
 222    }
 223    clear_high(d, oprsz, desc);
 224}
 225
 226void HELPER(gvec_mul16)(void *d, void *a, void *b, uint32_t desc)
 227{
 228    intptr_t oprsz = simd_oprsz(desc);
 229    intptr_t i;
 230
 231    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
 232        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * *(uint16_t *)(b + i);
 233    }
 234    clear_high(d, oprsz, desc);
 235}
 236
 237void HELPER(gvec_mul32)(void *d, void *a, void *b, uint32_t desc)
 238{
 239    intptr_t oprsz = simd_oprsz(desc);
 240    intptr_t i;
 241
 242    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
 243        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * *(uint32_t *)(b + i);
 244    }
 245    clear_high(d, oprsz, desc);
 246}
 247
 248void HELPER(gvec_mul64)(void *d, void *a, void *b, uint32_t desc)
 249{
 250    intptr_t oprsz = simd_oprsz(desc);
 251    intptr_t i;
 252
 253    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 254        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * *(uint64_t *)(b + i);
 255    }
 256    clear_high(d, oprsz, desc);
 257}
 258
 259void HELPER(gvec_muls8)(void *d, void *a, uint64_t b, uint32_t desc)
 260{
 261    intptr_t oprsz = simd_oprsz(desc);
 262    intptr_t i;
 263
 264    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
 265        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) * (uint8_t)b;
 266    }
 267    clear_high(d, oprsz, desc);
 268}
 269
 270void HELPER(gvec_muls16)(void *d, void *a, uint64_t b, uint32_t desc)
 271{
 272    intptr_t oprsz = simd_oprsz(desc);
 273    intptr_t i;
 274
 275    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
 276        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) * (uint16_t)b;
 277    }
 278    clear_high(d, oprsz, desc);
 279}
 280
 281void HELPER(gvec_muls32)(void *d, void *a, uint64_t b, uint32_t desc)
 282{
 283    intptr_t oprsz = simd_oprsz(desc);
 284    intptr_t i;
 285
 286    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
 287        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) * (uint32_t)b;
 288    }
 289    clear_high(d, oprsz, desc);
 290}
 291
 292void HELPER(gvec_muls64)(void *d, void *a, uint64_t b, uint32_t desc)
 293{
 294    intptr_t oprsz = simd_oprsz(desc);
 295    intptr_t i;
 296
 297    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 298        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) * b;
 299    }
 300    clear_high(d, oprsz, desc);
 301}
 302
 303void HELPER(gvec_neg8)(void *d, void *a, uint32_t desc)
 304{
 305    intptr_t oprsz = simd_oprsz(desc);
 306    intptr_t i;
 307
 308    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
 309        *(uint8_t *)(d + i) = -*(uint8_t *)(a + i);
 310    }
 311    clear_high(d, oprsz, desc);
 312}
 313
 314void HELPER(gvec_neg16)(void *d, void *a, uint32_t desc)
 315{
 316    intptr_t oprsz = simd_oprsz(desc);
 317    intptr_t i;
 318
 319    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
 320        *(uint16_t *)(d + i) = -*(uint16_t *)(a + i);
 321    }
 322    clear_high(d, oprsz, desc);
 323}
 324
 325void HELPER(gvec_neg32)(void *d, void *a, uint32_t desc)
 326{
 327    intptr_t oprsz = simd_oprsz(desc);
 328    intptr_t i;
 329
 330    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
 331        *(uint32_t *)(d + i) = -*(uint32_t *)(a + i);
 332    }
 333    clear_high(d, oprsz, desc);
 334}
 335
 336void HELPER(gvec_neg64)(void *d, void *a, uint32_t desc)
 337{
 338    intptr_t oprsz = simd_oprsz(desc);
 339    intptr_t i;
 340
 341    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 342        *(uint64_t *)(d + i) = -*(uint64_t *)(a + i);
 343    }
 344    clear_high(d, oprsz, desc);
 345}
 346
 347void HELPER(gvec_abs8)(void *d, void *a, uint32_t desc)
 348{
 349    intptr_t oprsz = simd_oprsz(desc);
 350    intptr_t i;
 351
 352    for (i = 0; i < oprsz; i += sizeof(int8_t)) {
 353        int8_t aa = *(int8_t *)(a + i);
 354        *(int8_t *)(d + i) = aa < 0 ? -aa : aa;
 355    }
 356    clear_high(d, oprsz, desc);
 357}
 358
 359void HELPER(gvec_abs16)(void *d, void *a, uint32_t desc)
 360{
 361    intptr_t oprsz = simd_oprsz(desc);
 362    intptr_t i;
 363
 364    for (i = 0; i < oprsz; i += sizeof(int16_t)) {
 365        int16_t aa = *(int16_t *)(a + i);
 366        *(int16_t *)(d + i) = aa < 0 ? -aa : aa;
 367    }
 368    clear_high(d, oprsz, desc);
 369}
 370
 371void HELPER(gvec_abs32)(void *d, void *a, uint32_t desc)
 372{
 373    intptr_t oprsz = simd_oprsz(desc);
 374    intptr_t i;
 375
 376    for (i = 0; i < oprsz; i += sizeof(int32_t)) {
 377        int32_t aa = *(int32_t *)(a + i);
 378        *(int32_t *)(d + i) = aa < 0 ? -aa : aa;
 379    }
 380    clear_high(d, oprsz, desc);
 381}
 382
 383void HELPER(gvec_abs64)(void *d, void *a, uint32_t desc)
 384{
 385    intptr_t oprsz = simd_oprsz(desc);
 386    intptr_t i;
 387
 388    for (i = 0; i < oprsz; i += sizeof(int64_t)) {
 389        int64_t aa = *(int64_t *)(a + i);
 390        *(int64_t *)(d + i) = aa < 0 ? -aa : aa;
 391    }
 392    clear_high(d, oprsz, desc);
 393}
 394
 395void HELPER(gvec_mov)(void *d, void *a, uint32_t desc)
 396{
 397    intptr_t oprsz = simd_oprsz(desc);
 398
 399    memcpy(d, a, oprsz);
 400    clear_high(d, oprsz, desc);
 401}
 402
 403void HELPER(gvec_dup64)(void *d, uint32_t desc, uint64_t c)
 404{
 405    intptr_t oprsz = simd_oprsz(desc);
 406    intptr_t i;
 407
 408    if (c == 0) {
 409        oprsz = 0;
 410    } else {
 411        for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 412            *(uint64_t *)(d + i) = c;
 413        }
 414    }
 415    clear_high(d, oprsz, desc);
 416}
 417
 418void HELPER(gvec_dup32)(void *d, uint32_t desc, uint32_t c)
 419{
 420    intptr_t oprsz = simd_oprsz(desc);
 421    intptr_t i;
 422
 423    if (c == 0) {
 424        oprsz = 0;
 425    } else {
 426        for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
 427            *(uint32_t *)(d + i) = c;
 428        }
 429    }
 430    clear_high(d, oprsz, desc);
 431}
 432
 433void HELPER(gvec_dup16)(void *d, uint32_t desc, uint32_t c)
 434{
 435    HELPER(gvec_dup32)(d, desc, 0x00010001 * (c & 0xffff));
 436}
 437
 438void HELPER(gvec_dup8)(void *d, uint32_t desc, uint32_t c)
 439{
 440    HELPER(gvec_dup32)(d, desc, 0x01010101 * (c & 0xff));
 441}
 442
 443void HELPER(gvec_not)(void *d, void *a, uint32_t desc)
 444{
 445    intptr_t oprsz = simd_oprsz(desc);
 446    intptr_t i;
 447
 448    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 449        *(uint64_t *)(d + i) = ~*(uint64_t *)(a + i);
 450    }
 451    clear_high(d, oprsz, desc);
 452}
 453
 454void HELPER(gvec_and)(void *d, void *a, void *b, uint32_t desc)
 455{
 456    intptr_t oprsz = simd_oprsz(desc);
 457    intptr_t i;
 458
 459    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 460        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & *(uint64_t *)(b + i);
 461    }
 462    clear_high(d, oprsz, desc);
 463}
 464
 465void HELPER(gvec_or)(void *d, void *a, void *b, uint32_t desc)
 466{
 467    intptr_t oprsz = simd_oprsz(desc);
 468    intptr_t i;
 469
 470    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 471        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | *(uint64_t *)(b + i);
 472    }
 473    clear_high(d, oprsz, desc);
 474}
 475
 476void HELPER(gvec_xor)(void *d, void *a, void *b, uint32_t desc)
 477{
 478    intptr_t oprsz = simd_oprsz(desc);
 479    intptr_t i;
 480
 481    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 482        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ *(uint64_t *)(b + i);
 483    }
 484    clear_high(d, oprsz, desc);
 485}
 486
 487void HELPER(gvec_andc)(void *d, void *a, void *b, uint32_t desc)
 488{
 489    intptr_t oprsz = simd_oprsz(desc);
 490    intptr_t i;
 491
 492    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 493        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) &~ *(uint64_t *)(b + i);
 494    }
 495    clear_high(d, oprsz, desc);
 496}
 497
 498void HELPER(gvec_orc)(void *d, void *a, void *b, uint32_t desc)
 499{
 500    intptr_t oprsz = simd_oprsz(desc);
 501    intptr_t i;
 502
 503    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 504        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) |~ *(uint64_t *)(b + i);
 505    }
 506    clear_high(d, oprsz, desc);
 507}
 508
 509void HELPER(gvec_nand)(void *d, void *a, void *b, uint32_t desc)
 510{
 511    intptr_t oprsz = simd_oprsz(desc);
 512    intptr_t i;
 513
 514    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 515        *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) & *(uint64_t *)(b + i));
 516    }
 517    clear_high(d, oprsz, desc);
 518}
 519
 520void HELPER(gvec_nor)(void *d, void *a, void *b, uint32_t desc)
 521{
 522    intptr_t oprsz = simd_oprsz(desc);
 523    intptr_t i;
 524
 525    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 526        *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) | *(uint64_t *)(b + i));
 527    }
 528    clear_high(d, oprsz, desc);
 529}
 530
 531void HELPER(gvec_eqv)(void *d, void *a, void *b, uint32_t desc)
 532{
 533    intptr_t oprsz = simd_oprsz(desc);
 534    intptr_t i;
 535
 536    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 537        *(uint64_t *)(d + i) = ~(*(uint64_t *)(a + i) ^ *(uint64_t *)(b + i));
 538    }
 539    clear_high(d, oprsz, desc);
 540}
 541
 542void HELPER(gvec_ands)(void *d, void *a, uint64_t b, uint32_t desc)
 543{
 544    intptr_t oprsz = simd_oprsz(desc);
 545    intptr_t i;
 546
 547    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 548        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) & b;
 549    }
 550    clear_high(d, oprsz, desc);
 551}
 552
 553void HELPER(gvec_xors)(void *d, void *a, uint64_t b, uint32_t desc)
 554{
 555    intptr_t oprsz = simd_oprsz(desc);
 556    intptr_t i;
 557
 558    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 559        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) ^ b;
 560    }
 561    clear_high(d, oprsz, desc);
 562}
 563
 564void HELPER(gvec_ors)(void *d, void *a, uint64_t b, uint32_t desc)
 565{
 566    intptr_t oprsz = simd_oprsz(desc);
 567    intptr_t i;
 568
 569    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 570        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) | b;
 571    }
 572    clear_high(d, oprsz, desc);
 573}
 574
 575void HELPER(gvec_shl8i)(void *d, void *a, uint32_t desc)
 576{
 577    intptr_t oprsz = simd_oprsz(desc);
 578    int shift = simd_data(desc);
 579    intptr_t i;
 580
 581    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
 582        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << shift;
 583    }
 584    clear_high(d, oprsz, desc);
 585}
 586
 587void HELPER(gvec_shl16i)(void *d, void *a, uint32_t desc)
 588{
 589    intptr_t oprsz = simd_oprsz(desc);
 590    int shift = simd_data(desc);
 591    intptr_t i;
 592
 593    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
 594        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << shift;
 595    }
 596    clear_high(d, oprsz, desc);
 597}
 598
 599void HELPER(gvec_shl32i)(void *d, void *a, uint32_t desc)
 600{
 601    intptr_t oprsz = simd_oprsz(desc);
 602    int shift = simd_data(desc);
 603    intptr_t i;
 604
 605    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
 606        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << shift;
 607    }
 608    clear_high(d, oprsz, desc);
 609}
 610
 611void HELPER(gvec_shl64i)(void *d, void *a, uint32_t desc)
 612{
 613    intptr_t oprsz = simd_oprsz(desc);
 614    int shift = simd_data(desc);
 615    intptr_t i;
 616
 617    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 618        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << shift;
 619    }
 620    clear_high(d, oprsz, desc);
 621}
 622
 623void HELPER(gvec_shr8i)(void *d, void *a, uint32_t desc)
 624{
 625    intptr_t oprsz = simd_oprsz(desc);
 626    int shift = simd_data(desc);
 627    intptr_t i;
 628
 629    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
 630        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> shift;
 631    }
 632    clear_high(d, oprsz, desc);
 633}
 634
 635void HELPER(gvec_shr16i)(void *d, void *a, uint32_t desc)
 636{
 637    intptr_t oprsz = simd_oprsz(desc);
 638    int shift = simd_data(desc);
 639    intptr_t i;
 640
 641    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
 642        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> shift;
 643    }
 644    clear_high(d, oprsz, desc);
 645}
 646
 647void HELPER(gvec_shr32i)(void *d, void *a, uint32_t desc)
 648{
 649    intptr_t oprsz = simd_oprsz(desc);
 650    int shift = simd_data(desc);
 651    intptr_t i;
 652
 653    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
 654        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> shift;
 655    }
 656    clear_high(d, oprsz, desc);
 657}
 658
 659void HELPER(gvec_shr64i)(void *d, void *a, uint32_t desc)
 660{
 661    intptr_t oprsz = simd_oprsz(desc);
 662    int shift = simd_data(desc);
 663    intptr_t i;
 664
 665    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 666        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> shift;
 667    }
 668    clear_high(d, oprsz, desc);
 669}
 670
 671void HELPER(gvec_sar8i)(void *d, void *a, uint32_t desc)
 672{
 673    intptr_t oprsz = simd_oprsz(desc);
 674    int shift = simd_data(desc);
 675    intptr_t i;
 676
 677    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
 678        *(int8_t *)(d + i) = *(int8_t *)(a + i) >> shift;
 679    }
 680    clear_high(d, oprsz, desc);
 681}
 682
 683void HELPER(gvec_sar16i)(void *d, void *a, uint32_t desc)
 684{
 685    intptr_t oprsz = simd_oprsz(desc);
 686    int shift = simd_data(desc);
 687    intptr_t i;
 688
 689    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
 690        *(int16_t *)(d + i) = *(int16_t *)(a + i) >> shift;
 691    }
 692    clear_high(d, oprsz, desc);
 693}
 694
 695void HELPER(gvec_sar32i)(void *d, void *a, uint32_t desc)
 696{
 697    intptr_t oprsz = simd_oprsz(desc);
 698    int shift = simd_data(desc);
 699    intptr_t i;
 700
 701    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
 702        *(int32_t *)(d + i) = *(int32_t *)(a + i) >> shift;
 703    }
 704    clear_high(d, oprsz, desc);
 705}
 706
 707void HELPER(gvec_sar64i)(void *d, void *a, uint32_t desc)
 708{
 709    intptr_t oprsz = simd_oprsz(desc);
 710    int shift = simd_data(desc);
 711    intptr_t i;
 712
 713    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 714        *(int64_t *)(d + i) = *(int64_t *)(a + i) >> shift;
 715    }
 716    clear_high(d, oprsz, desc);
 717}
 718
 719void HELPER(gvec_rotl8i)(void *d, void *a, uint32_t desc)
 720{
 721    intptr_t oprsz = simd_oprsz(desc);
 722    int shift = simd_data(desc);
 723    intptr_t i;
 724
 725    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
 726        *(uint8_t *)(d + i) = rol8(*(uint8_t *)(a + i), shift);
 727    }
 728    clear_high(d, oprsz, desc);
 729}
 730
 731void HELPER(gvec_rotl16i)(void *d, void *a, uint32_t desc)
 732{
 733    intptr_t oprsz = simd_oprsz(desc);
 734    int shift = simd_data(desc);
 735    intptr_t i;
 736
 737    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
 738        *(uint16_t *)(d + i) = rol16(*(uint16_t *)(a + i), shift);
 739    }
 740    clear_high(d, oprsz, desc);
 741}
 742
 743void HELPER(gvec_rotl32i)(void *d, void *a, uint32_t desc)
 744{
 745    intptr_t oprsz = simd_oprsz(desc);
 746    int shift = simd_data(desc);
 747    intptr_t i;
 748
 749    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
 750        *(uint32_t *)(d + i) = rol32(*(uint32_t *)(a + i), shift);
 751    }
 752    clear_high(d, oprsz, desc);
 753}
 754
 755void HELPER(gvec_rotl64i)(void *d, void *a, uint32_t desc)
 756{
 757    intptr_t oprsz = simd_oprsz(desc);
 758    int shift = simd_data(desc);
 759    intptr_t i;
 760
 761    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 762        *(uint64_t *)(d + i) = rol64(*(uint64_t *)(a + i), shift);
 763    }
 764    clear_high(d, oprsz, desc);
 765}
 766
 767void HELPER(gvec_shl8v)(void *d, void *a, void *b, uint32_t desc)
 768{
 769    intptr_t oprsz = simd_oprsz(desc);
 770    intptr_t i;
 771
 772    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
 773        uint8_t sh = *(uint8_t *)(b + i) & 7;
 774        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) << sh;
 775    }
 776    clear_high(d, oprsz, desc);
 777}
 778
 779void HELPER(gvec_shl16v)(void *d, void *a, void *b, uint32_t desc)
 780{
 781    intptr_t oprsz = simd_oprsz(desc);
 782    intptr_t i;
 783
 784    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
 785        uint8_t sh = *(uint16_t *)(b + i) & 15;
 786        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) << sh;
 787    }
 788    clear_high(d, oprsz, desc);
 789}
 790
 791void HELPER(gvec_shl32v)(void *d, void *a, void *b, uint32_t desc)
 792{
 793    intptr_t oprsz = simd_oprsz(desc);
 794    intptr_t i;
 795
 796    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
 797        uint8_t sh = *(uint32_t *)(b + i) & 31;
 798        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) << sh;
 799    }
 800    clear_high(d, oprsz, desc);
 801}
 802
 803void HELPER(gvec_shl64v)(void *d, void *a, void *b, uint32_t desc)
 804{
 805    intptr_t oprsz = simd_oprsz(desc);
 806    intptr_t i;
 807
 808    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 809        uint8_t sh = *(uint64_t *)(b + i) & 63;
 810        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) << sh;
 811    }
 812    clear_high(d, oprsz, desc);
 813}
 814
 815void HELPER(gvec_shr8v)(void *d, void *a, void *b, uint32_t desc)
 816{
 817    intptr_t oprsz = simd_oprsz(desc);
 818    intptr_t i;
 819
 820    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
 821        uint8_t sh = *(uint8_t *)(b + i) & 7;
 822        *(uint8_t *)(d + i) = *(uint8_t *)(a + i) >> sh;
 823    }
 824    clear_high(d, oprsz, desc);
 825}
 826
 827void HELPER(gvec_shr16v)(void *d, void *a, void *b, uint32_t desc)
 828{
 829    intptr_t oprsz = simd_oprsz(desc);
 830    intptr_t i;
 831
 832    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
 833        uint8_t sh = *(uint16_t *)(b + i) & 15;
 834        *(uint16_t *)(d + i) = *(uint16_t *)(a + i) >> sh;
 835    }
 836    clear_high(d, oprsz, desc);
 837}
 838
 839void HELPER(gvec_shr32v)(void *d, void *a, void *b, uint32_t desc)
 840{
 841    intptr_t oprsz = simd_oprsz(desc);
 842    intptr_t i;
 843
 844    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
 845        uint8_t sh = *(uint32_t *)(b + i) & 31;
 846        *(uint32_t *)(d + i) = *(uint32_t *)(a + i) >> sh;
 847    }
 848    clear_high(d, oprsz, desc);
 849}
 850
 851void HELPER(gvec_shr64v)(void *d, void *a, void *b, uint32_t desc)
 852{
 853    intptr_t oprsz = simd_oprsz(desc);
 854    intptr_t i;
 855
 856    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 857        uint8_t sh = *(uint64_t *)(b + i) & 63;
 858        *(uint64_t *)(d + i) = *(uint64_t *)(a + i) >> sh;
 859    }
 860    clear_high(d, oprsz, desc);
 861}
 862
 863void HELPER(gvec_sar8v)(void *d, void *a, void *b, uint32_t desc)
 864{
 865    intptr_t oprsz = simd_oprsz(desc);
 866    intptr_t i;
 867
 868    for (i = 0; i < oprsz; i += sizeof(int8_t)) {
 869        uint8_t sh = *(uint8_t *)(b + i) & 7;
 870        *(int8_t *)(d + i) = *(int8_t *)(a + i) >> sh;
 871    }
 872    clear_high(d, oprsz, desc);
 873}
 874
 875void HELPER(gvec_sar16v)(void *d, void *a, void *b, uint32_t desc)
 876{
 877    intptr_t oprsz = simd_oprsz(desc);
 878    intptr_t i;
 879
 880    for (i = 0; i < oprsz; i += sizeof(int16_t)) {
 881        uint8_t sh = *(uint16_t *)(b + i) & 15;
 882        *(int16_t *)(d + i) = *(int16_t *)(a + i) >> sh;
 883    }
 884    clear_high(d, oprsz, desc);
 885}
 886
 887void HELPER(gvec_sar32v)(void *d, void *a, void *b, uint32_t desc)
 888{
 889    intptr_t oprsz = simd_oprsz(desc);
 890    intptr_t i;
 891
 892    for (i = 0; i < oprsz; i += sizeof(int32_t)) {
 893        uint8_t sh = *(uint32_t *)(b + i) & 31;
 894        *(int32_t *)(d + i) = *(int32_t *)(a + i) >> sh;
 895    }
 896    clear_high(d, oprsz, desc);
 897}
 898
 899void HELPER(gvec_sar64v)(void *d, void *a, void *b, uint32_t desc)
 900{
 901    intptr_t oprsz = simd_oprsz(desc);
 902    intptr_t i;
 903
 904    for (i = 0; i < oprsz; i += sizeof(int64_t)) {
 905        uint8_t sh = *(uint64_t *)(b + i) & 63;
 906        *(int64_t *)(d + i) = *(int64_t *)(a + i) >> sh;
 907    }
 908    clear_high(d, oprsz, desc);
 909}
 910
 911void HELPER(gvec_rotl8v)(void *d, void *a, void *b, uint32_t desc)
 912{
 913    intptr_t oprsz = simd_oprsz(desc);
 914    intptr_t i;
 915
 916    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
 917        uint8_t sh = *(uint8_t *)(b + i) & 7;
 918        *(uint8_t *)(d + i) = rol8(*(uint8_t *)(a + i), sh);
 919    }
 920    clear_high(d, oprsz, desc);
 921}
 922
 923void HELPER(gvec_rotl16v)(void *d, void *a, void *b, uint32_t desc)
 924{
 925    intptr_t oprsz = simd_oprsz(desc);
 926    intptr_t i;
 927
 928    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
 929        uint8_t sh = *(uint16_t *)(b + i) & 15;
 930        *(uint16_t *)(d + i) = rol16(*(uint16_t *)(a + i), sh);
 931    }
 932    clear_high(d, oprsz, desc);
 933}
 934
 935void HELPER(gvec_rotl32v)(void *d, void *a, void *b, uint32_t desc)
 936{
 937    intptr_t oprsz = simd_oprsz(desc);
 938    intptr_t i;
 939
 940    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
 941        uint8_t sh = *(uint32_t *)(b + i) & 31;
 942        *(uint32_t *)(d + i) = rol32(*(uint32_t *)(a + i), sh);
 943    }
 944    clear_high(d, oprsz, desc);
 945}
 946
 947void HELPER(gvec_rotl64v)(void *d, void *a, void *b, uint32_t desc)
 948{
 949    intptr_t oprsz = simd_oprsz(desc);
 950    intptr_t i;
 951
 952    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
 953        uint8_t sh = *(uint64_t *)(b + i) & 63;
 954        *(uint64_t *)(d + i) = rol64(*(uint64_t *)(a + i), sh);
 955    }
 956    clear_high(d, oprsz, desc);
 957}
 958
 959void HELPER(gvec_rotr8v)(void *d, void *a, void *b, uint32_t desc)
 960{
 961    intptr_t oprsz = simd_oprsz(desc);
 962    intptr_t i;
 963
 964    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
 965        uint8_t sh = *(uint8_t *)(b + i) & 7;
 966        *(uint8_t *)(d + i) = ror8(*(uint8_t *)(a + i), sh);
 967    }
 968    clear_high(d, oprsz, desc);
 969}
 970
 971void HELPER(gvec_rotr16v)(void *d, void *a, void *b, uint32_t desc)
 972{
 973    intptr_t oprsz = simd_oprsz(desc);
 974    intptr_t i;
 975
 976    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
 977        uint8_t sh = *(uint16_t *)(b + i) & 15;
 978        *(uint16_t *)(d + i) = ror16(*(uint16_t *)(a + i), sh);
 979    }
 980    clear_high(d, oprsz, desc);
 981}
 982
 983void HELPER(gvec_rotr32v)(void *d, void *a, void *b, uint32_t desc)
 984{
 985    intptr_t oprsz = simd_oprsz(desc);
 986    intptr_t i;
 987
 988    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
 989        uint8_t sh = *(uint32_t *)(b + i) & 31;
 990        *(uint32_t *)(d + i) = ror32(*(uint32_t *)(a + i), sh);
 991    }
 992    clear_high(d, oprsz, desc);
 993}
 994
 995void HELPER(gvec_rotr64v)(void *d, void *a, void *b, uint32_t desc)
 996{
 997    intptr_t oprsz = simd_oprsz(desc);
 998    intptr_t i;
 999
1000    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1001        uint8_t sh = *(uint64_t *)(b + i) & 63;
1002        *(uint64_t *)(d + i) = ror64(*(uint64_t *)(a + i), sh);
1003    }
1004    clear_high(d, oprsz, desc);
1005}
1006
1007#define DO_CMP1(NAME, TYPE, OP)                                            \
1008void HELPER(NAME)(void *d, void *a, void *b, uint32_t desc)                \
1009{                                                                          \
1010    intptr_t oprsz = simd_oprsz(desc);                                     \
1011    intptr_t i;                                                            \
1012    for (i = 0; i < oprsz; i += sizeof(TYPE)) {                            \
1013        *(TYPE *)(d + i) = -(*(TYPE *)(a + i) OP *(TYPE *)(b + i));        \
1014    }                                                                      \
1015    clear_high(d, oprsz, desc);                                            \
1016}
1017
1018#define DO_CMP2(SZ) \
1019    DO_CMP1(gvec_eq##SZ, uint##SZ##_t, ==)    \
1020    DO_CMP1(gvec_ne##SZ, uint##SZ##_t, !=)    \
1021    DO_CMP1(gvec_lt##SZ, int##SZ##_t, <)      \
1022    DO_CMP1(gvec_le##SZ, int##SZ##_t, <=)     \
1023    DO_CMP1(gvec_ltu##SZ, uint##SZ##_t, <)    \
1024    DO_CMP1(gvec_leu##SZ, uint##SZ##_t, <=)
1025
1026DO_CMP2(8)
1027DO_CMP2(16)
1028DO_CMP2(32)
1029DO_CMP2(64)
1030
1031#undef DO_CMP1
1032#undef DO_CMP2
1033
1034void HELPER(gvec_ssadd8)(void *d, void *a, void *b, uint32_t desc)
1035{
1036    intptr_t oprsz = simd_oprsz(desc);
1037    intptr_t i;
1038
1039    for (i = 0; i < oprsz; i += sizeof(int8_t)) {
1040        int r = *(int8_t *)(a + i) + *(int8_t *)(b + i);
1041        if (r > INT8_MAX) {
1042            r = INT8_MAX;
1043        } else if (r < INT8_MIN) {
1044            r = INT8_MIN;
1045        }
1046        *(int8_t *)(d + i) = r;
1047    }
1048    clear_high(d, oprsz, desc);
1049}
1050
1051void HELPER(gvec_ssadd16)(void *d, void *a, void *b, uint32_t desc)
1052{
1053    intptr_t oprsz = simd_oprsz(desc);
1054    intptr_t i;
1055
1056    for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1057        int r = *(int16_t *)(a + i) + *(int16_t *)(b + i);
1058        if (r > INT16_MAX) {
1059            r = INT16_MAX;
1060        } else if (r < INT16_MIN) {
1061            r = INT16_MIN;
1062        }
1063        *(int16_t *)(d + i) = r;
1064    }
1065    clear_high(d, oprsz, desc);
1066}
1067
1068void HELPER(gvec_ssadd32)(void *d, void *a, void *b, uint32_t desc)
1069{
1070    intptr_t oprsz = simd_oprsz(desc);
1071    intptr_t i;
1072
1073    for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1074        int32_t ai = *(int32_t *)(a + i);
1075        int32_t bi = *(int32_t *)(b + i);
1076        int32_t di;
1077        if (sadd32_overflow(ai, bi, &di)) {
1078            di = (di < 0 ? INT32_MAX : INT32_MIN);
1079        }
1080        *(int32_t *)(d + i) = di;
1081    }
1082    clear_high(d, oprsz, desc);
1083}
1084
1085void HELPER(gvec_ssadd64)(void *d, void *a, void *b, uint32_t desc)
1086{
1087    intptr_t oprsz = simd_oprsz(desc);
1088    intptr_t i;
1089
1090    for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1091        int64_t ai = *(int64_t *)(a + i);
1092        int64_t bi = *(int64_t *)(b + i);
1093        int64_t di;
1094        if (sadd64_overflow(ai, bi, &di)) {
1095            di = (di < 0 ? INT64_MAX : INT64_MIN);
1096        }
1097        *(int64_t *)(d + i) = di;
1098    }
1099    clear_high(d, oprsz, desc);
1100}
1101
1102void HELPER(gvec_sssub8)(void *d, void *a, void *b, uint32_t desc)
1103{
1104    intptr_t oprsz = simd_oprsz(desc);
1105    intptr_t i;
1106
1107    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1108        int r = *(int8_t *)(a + i) - *(int8_t *)(b + i);
1109        if (r > INT8_MAX) {
1110            r = INT8_MAX;
1111        } else if (r < INT8_MIN) {
1112            r = INT8_MIN;
1113        }
1114        *(uint8_t *)(d + i) = r;
1115    }
1116    clear_high(d, oprsz, desc);
1117}
1118
1119void HELPER(gvec_sssub16)(void *d, void *a, void *b, uint32_t desc)
1120{
1121    intptr_t oprsz = simd_oprsz(desc);
1122    intptr_t i;
1123
1124    for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1125        int r = *(int16_t *)(a + i) - *(int16_t *)(b + i);
1126        if (r > INT16_MAX) {
1127            r = INT16_MAX;
1128        } else if (r < INT16_MIN) {
1129            r = INT16_MIN;
1130        }
1131        *(int16_t *)(d + i) = r;
1132    }
1133    clear_high(d, oprsz, desc);
1134}
1135
1136void HELPER(gvec_sssub32)(void *d, void *a, void *b, uint32_t desc)
1137{
1138    intptr_t oprsz = simd_oprsz(desc);
1139    intptr_t i;
1140
1141    for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1142        int32_t ai = *(int32_t *)(a + i);
1143        int32_t bi = *(int32_t *)(b + i);
1144        int32_t di;
1145        if (ssub32_overflow(ai, bi, &di)) {
1146            di = (di < 0 ? INT32_MAX : INT32_MIN);
1147        }
1148        *(int32_t *)(d + i) = di;
1149    }
1150    clear_high(d, oprsz, desc);
1151}
1152
1153void HELPER(gvec_sssub64)(void *d, void *a, void *b, uint32_t desc)
1154{
1155    intptr_t oprsz = simd_oprsz(desc);
1156    intptr_t i;
1157
1158    for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1159        int64_t ai = *(int64_t *)(a + i);
1160        int64_t bi = *(int64_t *)(b + i);
1161        int64_t di;
1162        if (ssub64_overflow(ai, bi, &di)) {
1163            di = (di < 0 ? INT64_MAX : INT64_MIN);
1164        }
1165        *(int64_t *)(d + i) = di;
1166    }
1167    clear_high(d, oprsz, desc);
1168}
1169
1170void HELPER(gvec_usadd8)(void *d, void *a, void *b, uint32_t desc)
1171{
1172    intptr_t oprsz = simd_oprsz(desc);
1173    intptr_t i;
1174
1175    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1176        unsigned r = *(uint8_t *)(a + i) + *(uint8_t *)(b + i);
1177        if (r > UINT8_MAX) {
1178            r = UINT8_MAX;
1179        }
1180        *(uint8_t *)(d + i) = r;
1181    }
1182    clear_high(d, oprsz, desc);
1183}
1184
1185void HELPER(gvec_usadd16)(void *d, void *a, void *b, uint32_t desc)
1186{
1187    intptr_t oprsz = simd_oprsz(desc);
1188    intptr_t i;
1189
1190    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1191        unsigned r = *(uint16_t *)(a + i) + *(uint16_t *)(b + i);
1192        if (r > UINT16_MAX) {
1193            r = UINT16_MAX;
1194        }
1195        *(uint16_t *)(d + i) = r;
1196    }
1197    clear_high(d, oprsz, desc);
1198}
1199
1200void HELPER(gvec_usadd32)(void *d, void *a, void *b, uint32_t desc)
1201{
1202    intptr_t oprsz = simd_oprsz(desc);
1203    intptr_t i;
1204
1205    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1206        uint32_t ai = *(uint32_t *)(a + i);
1207        uint32_t bi = *(uint32_t *)(b + i);
1208        uint32_t di;
1209        if (uadd32_overflow(ai, bi, &di)) {
1210            di = UINT32_MAX;
1211        }
1212        *(uint32_t *)(d + i) = di;
1213    }
1214    clear_high(d, oprsz, desc);
1215}
1216
1217void HELPER(gvec_usadd64)(void *d, void *a, void *b, uint32_t desc)
1218{
1219    intptr_t oprsz = simd_oprsz(desc);
1220    intptr_t i;
1221
1222    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1223        uint64_t ai = *(uint64_t *)(a + i);
1224        uint64_t bi = *(uint64_t *)(b + i);
1225        uint64_t di;
1226        if (uadd64_overflow(ai, bi, &di)) {
1227            di = UINT64_MAX;
1228        }
1229        *(uint64_t *)(d + i) = di;
1230    }
1231    clear_high(d, oprsz, desc);
1232}
1233
1234void HELPER(gvec_ussub8)(void *d, void *a, void *b, uint32_t desc)
1235{
1236    intptr_t oprsz = simd_oprsz(desc);
1237    intptr_t i;
1238
1239    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1240        int r = *(uint8_t *)(a + i) - *(uint8_t *)(b + i);
1241        if (r < 0) {
1242            r = 0;
1243        }
1244        *(uint8_t *)(d + i) = r;
1245    }
1246    clear_high(d, oprsz, desc);
1247}
1248
1249void HELPER(gvec_ussub16)(void *d, void *a, void *b, uint32_t desc)
1250{
1251    intptr_t oprsz = simd_oprsz(desc);
1252    intptr_t i;
1253
1254    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1255        int r = *(uint16_t *)(a + i) - *(uint16_t *)(b + i);
1256        if (r < 0) {
1257            r = 0;
1258        }
1259        *(uint16_t *)(d + i) = r;
1260    }
1261    clear_high(d, oprsz, desc);
1262}
1263
1264void HELPER(gvec_ussub32)(void *d, void *a, void *b, uint32_t desc)
1265{
1266    intptr_t oprsz = simd_oprsz(desc);
1267    intptr_t i;
1268
1269    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1270        uint32_t ai = *(uint32_t *)(a + i);
1271        uint32_t bi = *(uint32_t *)(b + i);
1272        uint32_t di;
1273        if (usub32_overflow(ai, bi, &di)) {
1274            di = 0;
1275        }
1276        *(uint32_t *)(d + i) = di;
1277    }
1278    clear_high(d, oprsz, desc);
1279}
1280
1281void HELPER(gvec_ussub64)(void *d, void *a, void *b, uint32_t desc)
1282{
1283    intptr_t oprsz = simd_oprsz(desc);
1284    intptr_t i;
1285
1286    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1287        uint64_t ai = *(uint64_t *)(a + i);
1288        uint64_t bi = *(uint64_t *)(b + i);
1289        uint64_t di;
1290        if (usub64_overflow(ai, bi, &di)) {
1291            di = 0;
1292        }
1293        *(uint64_t *)(d + i) = di;
1294    }
1295    clear_high(d, oprsz, desc);
1296}
1297
1298void HELPER(gvec_smin8)(void *d, void *a, void *b, uint32_t desc)
1299{
1300    intptr_t oprsz = simd_oprsz(desc);
1301    intptr_t i;
1302
1303    for (i = 0; i < oprsz; i += sizeof(int8_t)) {
1304        int8_t aa = *(int8_t *)(a + i);
1305        int8_t bb = *(int8_t *)(b + i);
1306        int8_t dd = aa < bb ? aa : bb;
1307        *(int8_t *)(d + i) = dd;
1308    }
1309    clear_high(d, oprsz, desc);
1310}
1311
1312void HELPER(gvec_smin16)(void *d, void *a, void *b, uint32_t desc)
1313{
1314    intptr_t oprsz = simd_oprsz(desc);
1315    intptr_t i;
1316
1317    for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1318        int16_t aa = *(int16_t *)(a + i);
1319        int16_t bb = *(int16_t *)(b + i);
1320        int16_t dd = aa < bb ? aa : bb;
1321        *(int16_t *)(d + i) = dd;
1322    }
1323    clear_high(d, oprsz, desc);
1324}
1325
1326void HELPER(gvec_smin32)(void *d, void *a, void *b, uint32_t desc)
1327{
1328    intptr_t oprsz = simd_oprsz(desc);
1329    intptr_t i;
1330
1331    for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1332        int32_t aa = *(int32_t *)(a + i);
1333        int32_t bb = *(int32_t *)(b + i);
1334        int32_t dd = aa < bb ? aa : bb;
1335        *(int32_t *)(d + i) = dd;
1336    }
1337    clear_high(d, oprsz, desc);
1338}
1339
1340void HELPER(gvec_smin64)(void *d, void *a, void *b, uint32_t desc)
1341{
1342    intptr_t oprsz = simd_oprsz(desc);
1343    intptr_t i;
1344
1345    for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1346        int64_t aa = *(int64_t *)(a + i);
1347        int64_t bb = *(int64_t *)(b + i);
1348        int64_t dd = aa < bb ? aa : bb;
1349        *(int64_t *)(d + i) = dd;
1350    }
1351    clear_high(d, oprsz, desc);
1352}
1353
1354void HELPER(gvec_smax8)(void *d, void *a, void *b, uint32_t desc)
1355{
1356    intptr_t oprsz = simd_oprsz(desc);
1357    intptr_t i;
1358
1359    for (i = 0; i < oprsz; i += sizeof(int8_t)) {
1360        int8_t aa = *(int8_t *)(a + i);
1361        int8_t bb = *(int8_t *)(b + i);
1362        int8_t dd = aa > bb ? aa : bb;
1363        *(int8_t *)(d + i) = dd;
1364    }
1365    clear_high(d, oprsz, desc);
1366}
1367
1368void HELPER(gvec_smax16)(void *d, void *a, void *b, uint32_t desc)
1369{
1370    intptr_t oprsz = simd_oprsz(desc);
1371    intptr_t i;
1372
1373    for (i = 0; i < oprsz; i += sizeof(int16_t)) {
1374        int16_t aa = *(int16_t *)(a + i);
1375        int16_t bb = *(int16_t *)(b + i);
1376        int16_t dd = aa > bb ? aa : bb;
1377        *(int16_t *)(d + i) = dd;
1378    }
1379    clear_high(d, oprsz, desc);
1380}
1381
1382void HELPER(gvec_smax32)(void *d, void *a, void *b, uint32_t desc)
1383{
1384    intptr_t oprsz = simd_oprsz(desc);
1385    intptr_t i;
1386
1387    for (i = 0; i < oprsz; i += sizeof(int32_t)) {
1388        int32_t aa = *(int32_t *)(a + i);
1389        int32_t bb = *(int32_t *)(b + i);
1390        int32_t dd = aa > bb ? aa : bb;
1391        *(int32_t *)(d + i) = dd;
1392    }
1393    clear_high(d, oprsz, desc);
1394}
1395
1396void HELPER(gvec_smax64)(void *d, void *a, void *b, uint32_t desc)
1397{
1398    intptr_t oprsz = simd_oprsz(desc);
1399    intptr_t i;
1400
1401    for (i = 0; i < oprsz; i += sizeof(int64_t)) {
1402        int64_t aa = *(int64_t *)(a + i);
1403        int64_t bb = *(int64_t *)(b + i);
1404        int64_t dd = aa > bb ? aa : bb;
1405        *(int64_t *)(d + i) = dd;
1406    }
1407    clear_high(d, oprsz, desc);
1408}
1409
1410void HELPER(gvec_umin8)(void *d, void *a, void *b, uint32_t desc)
1411{
1412    intptr_t oprsz = simd_oprsz(desc);
1413    intptr_t i;
1414
1415    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1416        uint8_t aa = *(uint8_t *)(a + i);
1417        uint8_t bb = *(uint8_t *)(b + i);
1418        uint8_t dd = aa < bb ? aa : bb;
1419        *(uint8_t *)(d + i) = dd;
1420    }
1421    clear_high(d, oprsz, desc);
1422}
1423
1424void HELPER(gvec_umin16)(void *d, void *a, void *b, uint32_t desc)
1425{
1426    intptr_t oprsz = simd_oprsz(desc);
1427    intptr_t i;
1428
1429    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1430        uint16_t aa = *(uint16_t *)(a + i);
1431        uint16_t bb = *(uint16_t *)(b + i);
1432        uint16_t dd = aa < bb ? aa : bb;
1433        *(uint16_t *)(d + i) = dd;
1434    }
1435    clear_high(d, oprsz, desc);
1436}
1437
1438void HELPER(gvec_umin32)(void *d, void *a, void *b, uint32_t desc)
1439{
1440    intptr_t oprsz = simd_oprsz(desc);
1441    intptr_t i;
1442
1443    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1444        uint32_t aa = *(uint32_t *)(a + i);
1445        uint32_t bb = *(uint32_t *)(b + i);
1446        uint32_t dd = aa < bb ? aa : bb;
1447        *(uint32_t *)(d + i) = dd;
1448    }
1449    clear_high(d, oprsz, desc);
1450}
1451
1452void HELPER(gvec_umin64)(void *d, void *a, void *b, uint32_t desc)
1453{
1454    intptr_t oprsz = simd_oprsz(desc);
1455    intptr_t i;
1456
1457    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1458        uint64_t aa = *(uint64_t *)(a + i);
1459        uint64_t bb = *(uint64_t *)(b + i);
1460        uint64_t dd = aa < bb ? aa : bb;
1461        *(uint64_t *)(d + i) = dd;
1462    }
1463    clear_high(d, oprsz, desc);
1464}
1465
1466void HELPER(gvec_umax8)(void *d, void *a, void *b, uint32_t desc)
1467{
1468    intptr_t oprsz = simd_oprsz(desc);
1469    intptr_t i;
1470
1471    for (i = 0; i < oprsz; i += sizeof(uint8_t)) {
1472        uint8_t aa = *(uint8_t *)(a + i);
1473        uint8_t bb = *(uint8_t *)(b + i);
1474        uint8_t dd = aa > bb ? aa : bb;
1475        *(uint8_t *)(d + i) = dd;
1476    }
1477    clear_high(d, oprsz, desc);
1478}
1479
1480void HELPER(gvec_umax16)(void *d, void *a, void *b, uint32_t desc)
1481{
1482    intptr_t oprsz = simd_oprsz(desc);
1483    intptr_t i;
1484
1485    for (i = 0; i < oprsz; i += sizeof(uint16_t)) {
1486        uint16_t aa = *(uint16_t *)(a + i);
1487        uint16_t bb = *(uint16_t *)(b + i);
1488        uint16_t dd = aa > bb ? aa : bb;
1489        *(uint16_t *)(d + i) = dd;
1490    }
1491    clear_high(d, oprsz, desc);
1492}
1493
1494void HELPER(gvec_umax32)(void *d, void *a, void *b, uint32_t desc)
1495{
1496    intptr_t oprsz = simd_oprsz(desc);
1497    intptr_t i;
1498
1499    for (i = 0; i < oprsz; i += sizeof(uint32_t)) {
1500        uint32_t aa = *(uint32_t *)(a + i);
1501        uint32_t bb = *(uint32_t *)(b + i);
1502        uint32_t dd = aa > bb ? aa : bb;
1503        *(uint32_t *)(d + i) = dd;
1504    }
1505    clear_high(d, oprsz, desc);
1506}
1507
1508void HELPER(gvec_umax64)(void *d, void *a, void *b, uint32_t desc)
1509{
1510    intptr_t oprsz = simd_oprsz(desc);
1511    intptr_t i;
1512
1513    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1514        uint64_t aa = *(uint64_t *)(a + i);
1515        uint64_t bb = *(uint64_t *)(b + i);
1516        uint64_t dd = aa > bb ? aa : bb;
1517        *(uint64_t *)(d + i) = dd;
1518    }
1519    clear_high(d, oprsz, desc);
1520}
1521
1522void HELPER(gvec_bitsel)(void *d, void *a, void *b, void *c, uint32_t desc)
1523{
1524    intptr_t oprsz = simd_oprsz(desc);
1525    intptr_t i;
1526
1527    for (i = 0; i < oprsz; i += sizeof(uint64_t)) {
1528        uint64_t aa = *(uint64_t *)(a + i);
1529        uint64_t bb = *(uint64_t *)(b + i);
1530        uint64_t cc = *(uint64_t *)(c + i);
1531        *(uint64_t *)(d + i) = (bb & aa) | (cc & ~aa);
1532    }
1533    clear_high(d, oprsz, desc);
1534}
1535