qemu/target/s390x/tcg/vec_int_helper.c
<<
>>
Prefs
   1/*
   2 * QEMU TCG support -- s390x vector integer instruction support
   3 *
   4 * Copyright (C) 2019 Red Hat Inc
   5 *
   6 * Authors:
   7 *   David Hildenbrand <david@redhat.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  10 * See the COPYING file in the top-level directory.
  11 */
  12#include "qemu/osdep.h"
  13#include "cpu.h"
  14#include "vec.h"
  15#include "exec/helper-proto.h"
  16#include "tcg/tcg-gvec-desc.h"
  17
  18static bool s390_vec_is_zero(const S390Vector *v)
  19{
  20    return !v->doubleword[0] && !v->doubleword[1];
  21}
  22
  23static void s390_vec_xor(S390Vector *res, const S390Vector *a,
  24                         const S390Vector *b)
  25{
  26    res->doubleword[0] = a->doubleword[0] ^ b->doubleword[0];
  27    res->doubleword[1] = a->doubleword[1] ^ b->doubleword[1];
  28}
  29
  30static void s390_vec_and(S390Vector *res, const S390Vector *a,
  31                         const S390Vector *b)
  32{
  33    res->doubleword[0] = a->doubleword[0] & b->doubleword[0];
  34    res->doubleword[1] = a->doubleword[1] & b->doubleword[1];
  35}
  36
  37static bool s390_vec_equal(const S390Vector *a, const S390Vector *b)
  38{
  39    return a->doubleword[0] == b->doubleword[0] &&
  40           a->doubleword[1] == b->doubleword[1];
  41}
  42
  43static void s390_vec_shl(S390Vector *d, const S390Vector *a, uint64_t count)
  44{
  45    uint64_t tmp;
  46
  47    g_assert(count < 128);
  48    if (count == 0) {
  49        d->doubleword[0] = a->doubleword[0];
  50        d->doubleword[1] = a->doubleword[1];
  51    } else if (count == 64) {
  52        d->doubleword[0] = a->doubleword[1];
  53        d->doubleword[1] = 0;
  54    } else if (count < 64) {
  55        tmp = extract64(a->doubleword[1], 64 - count, count);
  56        d->doubleword[1] = a->doubleword[1] << count;
  57        d->doubleword[0] = (a->doubleword[0] << count) | tmp;
  58    } else {
  59        d->doubleword[0] = a->doubleword[1] << (count - 64);
  60        d->doubleword[1] = 0;
  61    }
  62}
  63
  64static void s390_vec_sar(S390Vector *d, const S390Vector *a, uint64_t count)
  65{
  66    uint64_t tmp;
  67
  68    if (count == 0) {
  69        d->doubleword[0] = a->doubleword[0];
  70        d->doubleword[1] = a->doubleword[1];
  71    } else if (count == 64) {
  72        tmp = (int64_t)a->doubleword[0] >> 63;
  73        d->doubleword[1] = a->doubleword[0];
  74        d->doubleword[0] = tmp;
  75    } else if (count < 64) {
  76        tmp = a->doubleword[1] >> count;
  77        d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]);
  78        d->doubleword[0] = (int64_t)a->doubleword[0] >> count;
  79    } else {
  80        tmp = (int64_t)a->doubleword[0] >> 63;
  81        d->doubleword[1] = (int64_t)a->doubleword[0] >> (count - 64);
  82        d->doubleword[0] = tmp;
  83    }
  84}
  85
  86static void s390_vec_shr(S390Vector *d, const S390Vector *a, uint64_t count)
  87{
  88    uint64_t tmp;
  89
  90    g_assert(count < 128);
  91    if (count == 0) {
  92        d->doubleword[0] = a->doubleword[0];
  93        d->doubleword[1] = a->doubleword[1];
  94    } else if (count == 64) {
  95        d->doubleword[1] = a->doubleword[0];
  96        d->doubleword[0] = 0;
  97    } else if (count < 64) {
  98        tmp = a->doubleword[1] >> count;
  99        d->doubleword[1] = deposit64(tmp, 64 - count, count, a->doubleword[0]);
 100        d->doubleword[0] = a->doubleword[0] >> count;
 101    } else {
 102        d->doubleword[1] = a->doubleword[0] >> (count - 64);
 103        d->doubleword[0] = 0;
 104    }
 105}
 106#define DEF_VAVG(BITS)                                                         \
 107void HELPER(gvec_vavg##BITS)(void *v1, const void *v2, const void *v3,         \
 108                             uint32_t desc)                                    \
 109{                                                                              \
 110    int i;                                                                     \
 111                                                                               \
 112    for (i = 0; i < (128 / BITS); i++) {                                       \
 113        const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i);   \
 114        const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i);   \
 115                                                                               \
 116        s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1);                 \
 117    }                                                                          \
 118}
 119DEF_VAVG(8)
 120DEF_VAVG(16)
 121
 122#define DEF_VAVGL(BITS)                                                        \
 123void HELPER(gvec_vavgl##BITS)(void *v1, const void *v2, const void *v3,        \
 124                              uint32_t desc)                                   \
 125{                                                                              \
 126    int i;                                                                     \
 127                                                                               \
 128    for (i = 0; i < (128 / BITS); i++) {                                       \
 129        const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
 130        const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
 131                                                                               \
 132        s390_vec_write_element##BITS(v1, i, (a + b + 1) >> 1);                 \
 133    }                                                                          \
 134}
 135DEF_VAVGL(8)
 136DEF_VAVGL(16)
 137
 138#define DEF_VCLZ(BITS)                                                         \
 139void HELPER(gvec_vclz##BITS)(void *v1, const void *v2, uint32_t desc)          \
 140{                                                                              \
 141    int i;                                                                     \
 142                                                                               \
 143    for (i = 0; i < (128 / BITS); i++) {                                       \
 144        const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
 145                                                                               \
 146        s390_vec_write_element##BITS(v1, i, clz32(a) - 32 + BITS);             \
 147    }                                                                          \
 148}
 149DEF_VCLZ(8)
 150DEF_VCLZ(16)
 151
 152#define DEF_VCTZ(BITS)                                                         \
 153void HELPER(gvec_vctz##BITS)(void *v1, const void *v2, uint32_t desc)          \
 154{                                                                              \
 155    int i;                                                                     \
 156                                                                               \
 157    for (i = 0; i < (128 / BITS); i++) {                                       \
 158        const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
 159                                                                               \
 160        s390_vec_write_element##BITS(v1, i, a ? ctz32(a) : BITS);              \
 161    }                                                                          \
 162}
 163DEF_VCTZ(8)
 164DEF_VCTZ(16)
 165
 166/* like binary multiplication, but XOR instead of addition */
 167#define DEF_GALOIS_MULTIPLY(BITS, TBITS)                                       \
 168static uint##TBITS##_t galois_multiply##BITS(uint##TBITS##_t a,                \
 169                                             uint##TBITS##_t b)                \
 170{                                                                              \
 171    uint##TBITS##_t res = 0;                                                   \
 172                                                                               \
 173    while (b) {                                                                \
 174        if (b & 0x1) {                                                         \
 175            res = res ^ a;                                                     \
 176        }                                                                      \
 177        a = a << 1;                                                            \
 178        b = b >> 1;                                                            \
 179    }                                                                          \
 180    return res;                                                                \
 181}
 182DEF_GALOIS_MULTIPLY(8, 16)
 183DEF_GALOIS_MULTIPLY(16, 32)
 184DEF_GALOIS_MULTIPLY(32, 64)
 185
 186static S390Vector galois_multiply64(uint64_t a, uint64_t b)
 187{
 188    S390Vector res = {};
 189    S390Vector va = {
 190        .doubleword[1] = a,
 191    };
 192    S390Vector vb = {
 193        .doubleword[1] = b,
 194    };
 195
 196    while (!s390_vec_is_zero(&vb)) {
 197        if (vb.doubleword[1] & 0x1) {
 198            s390_vec_xor(&res, &res, &va);
 199        }
 200        s390_vec_shl(&va, &va, 1);
 201        s390_vec_shr(&vb, &vb, 1);
 202    }
 203    return res;
 204}
 205
 206#define DEF_VGFM(BITS, TBITS)                                                  \
 207void HELPER(gvec_vgfm##BITS)(void *v1, const void *v2, const void *v3,         \
 208                             uint32_t desc)                                    \
 209{                                                                              \
 210    int i;                                                                     \
 211                                                                               \
 212    for (i = 0; i < (128 / TBITS); i++) {                                      \
 213        uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2);             \
 214        uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2);             \
 215        uint##TBITS##_t d = galois_multiply##BITS(a, b);                       \
 216                                                                               \
 217        a = s390_vec_read_element##BITS(v2, i * 2 + 1);                        \
 218        b = s390_vec_read_element##BITS(v3, i * 2 + 1);                        \
 219        d = d ^ galois_multiply32(a, b);                                       \
 220        s390_vec_write_element##TBITS(v1, i, d);                               \
 221    }                                                                          \
 222}
 223DEF_VGFM(8, 16)
 224DEF_VGFM(16, 32)
 225DEF_VGFM(32, 64)
 226
 227void HELPER(gvec_vgfm64)(void *v1, const void *v2, const void *v3,
 228                         uint32_t desc)
 229{
 230    S390Vector tmp1, tmp2;
 231    uint64_t a, b;
 232
 233    a = s390_vec_read_element64(v2, 0);
 234    b = s390_vec_read_element64(v3, 0);
 235    tmp1 = galois_multiply64(a, b);
 236    a = s390_vec_read_element64(v2, 1);
 237    b = s390_vec_read_element64(v3, 1);
 238    tmp2 = galois_multiply64(a, b);
 239    s390_vec_xor(v1, &tmp1, &tmp2);
 240}
 241
 242#define DEF_VGFMA(BITS, TBITS)                                                 \
 243void HELPER(gvec_vgfma##BITS)(void *v1, const void *v2, const void *v3,        \
 244                              const void *v4, uint32_t desc)                   \
 245{                                                                              \
 246    int i;                                                                     \
 247                                                                               \
 248    for (i = 0; i < (128 / TBITS); i++) {                                      \
 249        uint##BITS##_t a = s390_vec_read_element##BITS(v2, i * 2);             \
 250        uint##BITS##_t b = s390_vec_read_element##BITS(v3, i * 2);             \
 251        uint##TBITS##_t d = galois_multiply##BITS(a, b);                       \
 252                                                                               \
 253        a = s390_vec_read_element##BITS(v2, i * 2 + 1);                        \
 254        b = s390_vec_read_element##BITS(v3, i * 2 + 1);                        \
 255        d = d ^ galois_multiply32(a, b);                                       \
 256        d = d ^ s390_vec_read_element##TBITS(v4, i);                           \
 257        s390_vec_write_element##TBITS(v1, i, d);                               \
 258    }                                                                          \
 259}
 260DEF_VGFMA(8, 16)
 261DEF_VGFMA(16, 32)
 262DEF_VGFMA(32, 64)
 263
 264void HELPER(gvec_vgfma64)(void *v1, const void *v2, const void *v3,
 265                          const void *v4, uint32_t desc)
 266{
 267    S390Vector tmp1, tmp2;
 268    uint64_t a, b;
 269
 270    a = s390_vec_read_element64(v2, 0);
 271    b = s390_vec_read_element64(v3, 0);
 272    tmp1 = galois_multiply64(a, b);
 273    a = s390_vec_read_element64(v2, 1);
 274    b = s390_vec_read_element64(v3, 1);
 275    tmp2 = galois_multiply64(a, b);
 276    s390_vec_xor(&tmp1, &tmp1, &tmp2);
 277    s390_vec_xor(v1, &tmp1, v4);
 278}
 279
 280#define DEF_VMAL(BITS)                                                         \
 281void HELPER(gvec_vmal##BITS)(void *v1, const void *v2, const void *v3,         \
 282                             const void *v4, uint32_t desc)                    \
 283{                                                                              \
 284    int i;                                                                     \
 285                                                                               \
 286    for (i = 0; i < (128 / BITS); i++) {                                       \
 287        const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
 288        const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
 289        const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i);           \
 290                                                                               \
 291        s390_vec_write_element##BITS(v1, i, a * b + c);                        \
 292    }                                                                          \
 293}
 294DEF_VMAL(8)
 295DEF_VMAL(16)
 296
 297#define DEF_VMAH(BITS)                                                         \
 298void HELPER(gvec_vmah##BITS)(void *v1, const void *v2, const void *v3,         \
 299                             const void *v4, uint32_t desc)                    \
 300{                                                                              \
 301    int i;                                                                     \
 302                                                                               \
 303    for (i = 0; i < (128 / BITS); i++) {                                       \
 304        const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i);   \
 305        const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i);   \
 306        const int32_t c = (int##BITS##_t)s390_vec_read_element##BITS(v4, i);   \
 307                                                                               \
 308        s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS);              \
 309    }                                                                          \
 310}
 311DEF_VMAH(8)
 312DEF_VMAH(16)
 313
 314#define DEF_VMALH(BITS)                                                        \
 315void HELPER(gvec_vmalh##BITS)(void *v1, const void *v2, const void *v3,        \
 316                              const void *v4, uint32_t desc)                   \
 317{                                                                              \
 318    int i;                                                                     \
 319                                                                               \
 320    for (i = 0; i < (128 / BITS); i++) {                                       \
 321        const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
 322        const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
 323        const uint##BITS##_t c = s390_vec_read_element##BITS(v4, i);           \
 324                                                                               \
 325        s390_vec_write_element##BITS(v1, i, (a * b + c) >> BITS);              \
 326    }                                                                          \
 327}
 328DEF_VMALH(8)
 329DEF_VMALH(16)
 330
 331#define DEF_VMAE(BITS, TBITS)                                                  \
 332void HELPER(gvec_vmae##BITS)(void *v1, const void *v2, const void *v3,         \
 333                             const void *v4, uint32_t desc)                    \
 334{                                                                              \
 335    int i, j;                                                                  \
 336                                                                               \
 337    for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
 338        int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
 339        int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
 340        int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i);                \
 341                                                                               \
 342        s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
 343    }                                                                          \
 344}
 345DEF_VMAE(8, 16)
 346DEF_VMAE(16, 32)
 347DEF_VMAE(32, 64)
 348
 349#define DEF_VMALE(BITS, TBITS)                                                 \
 350void HELPER(gvec_vmale##BITS)(void *v1, const void *v2, const void *v3,        \
 351                              const void *v4, uint32_t desc)                   \
 352{                                                                              \
 353    int i, j;                                                                  \
 354                                                                               \
 355    for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
 356        uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);                \
 357        uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);                \
 358        uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i);               \
 359                                                                               \
 360        s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
 361    }                                                                          \
 362}
 363DEF_VMALE(8, 16)
 364DEF_VMALE(16, 32)
 365DEF_VMALE(32, 64)
 366
 367#define DEF_VMAO(BITS, TBITS)                                                  \
 368void HELPER(gvec_vmao##BITS)(void *v1, const void *v2, const void *v3,         \
 369                             const void *v4, uint32_t desc)                    \
 370{                                                                              \
 371    int i, j;                                                                  \
 372                                                                               \
 373    for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
 374        int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
 375        int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
 376        int##TBITS##_t c = s390_vec_read_element##TBITS(v4, i);                \
 377                                                                               \
 378        s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
 379    }                                                                          \
 380}
 381DEF_VMAO(8, 16)
 382DEF_VMAO(16, 32)
 383DEF_VMAO(32, 64)
 384
 385#define DEF_VMALO(BITS, TBITS)                                                 \
 386void HELPER(gvec_vmalo##BITS)(void *v1, const void *v2, const void *v3,        \
 387                              const void *v4, uint32_t desc)                   \
 388{                                                                              \
 389    int i, j;                                                                  \
 390                                                                               \
 391    for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
 392        uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);                \
 393        uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);                \
 394        uint##TBITS##_t c = s390_vec_read_element##TBITS(v4, i);               \
 395                                                                               \
 396        s390_vec_write_element##TBITS(v1, i, a * b + c);                       \
 397    }                                                                          \
 398}
 399DEF_VMALO(8, 16)
 400DEF_VMALO(16, 32)
 401DEF_VMALO(32, 64)
 402
 403#define DEF_VMH(BITS)                                                          \
 404void HELPER(gvec_vmh##BITS)(void *v1, const void *v2, const void *v3,          \
 405                            uint32_t desc)                                     \
 406{                                                                              \
 407    int i;                                                                     \
 408                                                                               \
 409    for (i = 0; i < (128 / BITS); i++) {                                       \
 410        const int32_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, i);   \
 411        const int32_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, i);   \
 412                                                                               \
 413        s390_vec_write_element##BITS(v1, i, (a * b) >> BITS);                  \
 414    }                                                                          \
 415}
 416DEF_VMH(8)
 417DEF_VMH(16)
 418
 419#define DEF_VMLH(BITS)                                                         \
 420void HELPER(gvec_vmlh##BITS)(void *v1, const void *v2, const void *v3,         \
 421                             uint32_t desc)                                    \
 422{                                                                              \
 423    int i;                                                                     \
 424                                                                               \
 425    for (i = 0; i < (128 / BITS); i++) {                                       \
 426        const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
 427        const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
 428                                                                               \
 429        s390_vec_write_element##BITS(v1, i, (a * b) >> BITS);                  \
 430    }                                                                          \
 431}
 432DEF_VMLH(8)
 433DEF_VMLH(16)
 434
 435#define DEF_VME(BITS, TBITS)                                                   \
 436void HELPER(gvec_vme##BITS)(void *v1, const void *v2, const void *v3,          \
 437                            uint32_t desc)                                     \
 438{                                                                              \
 439    int i, j;                                                                  \
 440                                                                               \
 441    for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
 442        int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
 443        int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
 444                                                                               \
 445        s390_vec_write_element##TBITS(v1, i, a * b);                           \
 446    }                                                                          \
 447}
 448DEF_VME(8, 16)
 449DEF_VME(16, 32)
 450DEF_VME(32, 64)
 451
 452#define DEF_VMLE(BITS, TBITS)                                                  \
 453void HELPER(gvec_vmle##BITS)(void *v1, const void *v2, const void *v3,         \
 454                             uint32_t desc)                                    \
 455{                                                                              \
 456    int i, j;                                                                  \
 457                                                                               \
 458    for (i = 0, j = 0; i < (128 / TBITS); i++, j += 2) {                       \
 459        const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);          \
 460        const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);          \
 461                                                                               \
 462        s390_vec_write_element##TBITS(v1, i, a * b);                           \
 463    }                                                                          \
 464}
 465DEF_VMLE(8, 16)
 466DEF_VMLE(16, 32)
 467DEF_VMLE(32, 64)
 468
 469#define DEF_VMO(BITS, TBITS)                                                   \
 470void HELPER(gvec_vmo##BITS)(void *v1, const void *v2, const void *v3,          \
 471                            uint32_t desc)                                     \
 472{                                                                              \
 473    int i, j;                                                                  \
 474                                                                               \
 475    for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
 476        int##TBITS##_t a = (int##BITS##_t)s390_vec_read_element##BITS(v2, j);  \
 477        int##TBITS##_t b = (int##BITS##_t)s390_vec_read_element##BITS(v3, j);  \
 478                                                                               \
 479        s390_vec_write_element##TBITS(v1, i, a * b);                           \
 480    }                                                                          \
 481}
 482DEF_VMO(8, 16)
 483DEF_VMO(16, 32)
 484DEF_VMO(32, 64)
 485
 486#define DEF_VMLO(BITS, TBITS)                                                  \
 487void HELPER(gvec_vmlo##BITS)(void *v1, const void *v2, const void *v3,         \
 488                             uint32_t desc)                                    \
 489{                                                                              \
 490    int i, j;                                                                  \
 491                                                                               \
 492    for (i = 0, j = 1; i < (128 / TBITS); i++, j += 2) {                       \
 493        const uint##TBITS##_t a = s390_vec_read_element##BITS(v2, j);          \
 494        const uint##TBITS##_t b = s390_vec_read_element##BITS(v3, j);          \
 495                                                                               \
 496        s390_vec_write_element##TBITS(v1, i, a * b);                           \
 497    }                                                                          \
 498}
 499DEF_VMLO(8, 16)
 500DEF_VMLO(16, 32)
 501DEF_VMLO(32, 64)
 502
 503#define DEF_VPOPCT(BITS)                                                       \
 504void HELPER(gvec_vpopct##BITS)(void *v1, const void *v2, uint32_t desc)        \
 505{                                                                              \
 506    int i;                                                                     \
 507                                                                               \
 508    for (i = 0; i < (128 / BITS); i++) {                                       \
 509        const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
 510                                                                               \
 511        s390_vec_write_element##BITS(v1, i, ctpop32(a));                       \
 512    }                                                                          \
 513}
 514DEF_VPOPCT(8)
 515DEF_VPOPCT(16)
 516
 517#define DEF_VERIM(BITS)                                                        \
 518void HELPER(gvec_verim##BITS)(void *v1, const void *v2, const void *v3,        \
 519                              uint32_t desc)                                   \
 520{                                                                              \
 521    const uint8_t count = simd_data(desc);                                     \
 522    int i;                                                                     \
 523                                                                               \
 524    for (i = 0; i < (128 / BITS); i++) {                                       \
 525        const uint##BITS##_t a = s390_vec_read_element##BITS(v1, i);           \
 526        const uint##BITS##_t b = s390_vec_read_element##BITS(v2, i);           \
 527        const uint##BITS##_t mask = s390_vec_read_element##BITS(v3, i);        \
 528        const uint##BITS##_t d = (a & ~mask) | (rol##BITS(b, count) & mask);   \
 529                                                                               \
 530        s390_vec_write_element##BITS(v1, i, d);                                \
 531    }                                                                          \
 532}
 533DEF_VERIM(8)
 534DEF_VERIM(16)
 535
 536void HELPER(gvec_vsl)(void *v1, const void *v2, uint64_t count,
 537                      uint32_t desc)
 538{
 539    s390_vec_shl(v1, v2, count);
 540}
 541
 542void HELPER(gvec_vsl_ve2)(void *v1, const void *v2, const void *v3,
 543                          uint32_t desc)
 544{
 545    S390Vector tmp;
 546    uint32_t sh, e0, e1 = 0;
 547    int i;
 548
 549    for (i = 15; i >= 0; --i, e1 = e0) {
 550        e0 = s390_vec_read_element8(v2, i);
 551        sh = s390_vec_read_element8(v3, i) & 7;
 552
 553        s390_vec_write_element8(&tmp, i, rol32(e0 | (e1 << 24), sh));
 554    }
 555
 556    *(S390Vector *)v1 = tmp;
 557}
 558
 559void HELPER(gvec_vsra)(void *v1, const void *v2, uint64_t count,
 560                       uint32_t desc)
 561{
 562    s390_vec_sar(v1, v2, count);
 563}
 564
 565void HELPER(gvec_vsra_ve2)(void *v1, const void *v2, const void *v3,
 566                           uint32_t desc)
 567{
 568    S390Vector tmp;
 569    uint32_t sh, e0, e1 = 0;
 570    int i = 0;
 571
 572    /* Byte 0 is special only. */
 573    e0 = (int32_t)(int8_t)s390_vec_read_element8(v2, i);
 574    sh = s390_vec_read_element8(v3, i) & 7;
 575    s390_vec_write_element8(&tmp, i, e0 >> sh);
 576
 577    e1 = e0;
 578    for (i = 1; i < 16; ++i, e1 = e0) {
 579        e0 = s390_vec_read_element8(v2, i);
 580        sh = s390_vec_read_element8(v3, i) & 7;
 581        s390_vec_write_element8(&tmp, i, (e0 | e1 << 8) >> sh);
 582    }
 583
 584    *(S390Vector *)v1 = tmp;
 585}
 586
 587void HELPER(gvec_vsrl)(void *v1, const void *v2, uint64_t count,
 588                       uint32_t desc)
 589{
 590    s390_vec_shr(v1, v2, count);
 591}
 592
 593void HELPER(gvec_vsrl_ve2)(void *v1, const void *v2, const void *v3,
 594                           uint32_t desc)
 595{
 596    S390Vector tmp;
 597    uint32_t sh, e0, e1 = 0;
 598
 599    for (int i = 0; i < 16; ++i, e1 = e0) {
 600        e0 = s390_vec_read_element8(v2, i);
 601        sh = s390_vec_read_element8(v3, i) & 7;
 602
 603        s390_vec_write_element8(&tmp, i, (e0 | (e1 << 8)) >> sh);
 604    }
 605
 606    *(S390Vector *)v1 = tmp;
 607}
 608
 609#define DEF_VSCBI(BITS)                                                        \
 610void HELPER(gvec_vscbi##BITS)(void *v1, const void *v2, const void *v3,        \
 611                              uint32_t desc)                                   \
 612{                                                                              \
 613    int i;                                                                     \
 614                                                                               \
 615    for (i = 0; i < (128 / BITS); i++) {                                       \
 616        const uint##BITS##_t a = s390_vec_read_element##BITS(v2, i);           \
 617        const uint##BITS##_t b = s390_vec_read_element##BITS(v3, i);           \
 618                                                                               \
 619        s390_vec_write_element##BITS(v1, i, a >= b);                           \
 620    }                                                                          \
 621}
 622DEF_VSCBI(8)
 623DEF_VSCBI(16)
 624
 625void HELPER(gvec_vtm)(void *v1, const void *v2, CPUS390XState *env,
 626                      uint32_t desc)
 627{
 628    S390Vector tmp;
 629
 630    s390_vec_and(&tmp, v1, v2);
 631    if (s390_vec_is_zero(&tmp)) {
 632        /* Selected bits all zeros; or all mask bits zero */
 633        env->cc_op = 0;
 634    } else if (s390_vec_equal(&tmp, v2)) {
 635        /* Selected bits all ones */
 636        env->cc_op = 3;
 637    } else {
 638        /* Selected bits a mix of zeros and ones */
 639        env->cc_op = 1;
 640    }
 641}
 642