qemu/target/s390x/tcg/vec_string_helper.c
<<
>>
Prefs
   1/*
   2 * QEMU TCG support -- s390x vector string instruction support
   3 *
   4 * Copyright (C) 2019 Red Hat Inc
   5 *
   6 * Authors:
   7 *   David Hildenbrand <david@redhat.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  10 * See the COPYING file in the top-level directory.
  11 */
  12#include "qemu/osdep.h"
  13#include "qemu-common.h"
  14#include "cpu.h"
  15#include "s390x-internal.h"
  16#include "vec.h"
  17#include "tcg/tcg.h"
  18#include "tcg/tcg-gvec-desc.h"
  19#include "exec/helper-proto.h"
  20
  21/*
  22 * Returns a bit set in the MSB of each element that is zero,
  23 * as defined by the mask.
  24 */
  25static inline uint64_t zero_search(uint64_t a, uint64_t mask)
  26{
  27    return ~(((a & mask) + mask) | a | mask);
  28}
  29
  30/*
  31 * Returns a bit set in the MSB of each element that is not zero,
  32 * as defined by the mask.
  33 */
  34static inline uint64_t nonzero_search(uint64_t a, uint64_t mask)
  35{
  36    return (((a & mask) + mask) | a) & ~mask;
  37}
  38
  39/*
  40 * Returns the byte offset for the first match, or 16 for no match.
  41 */
  42static inline int match_index(uint64_t c0, uint64_t c1)
  43{
  44    return (c0 ? clz64(c0) : clz64(c1) + 64) >> 3;
  45}
  46
  47/*
  48 * Returns the number of bits composing one element.
  49 */
  50static uint8_t get_element_bits(uint8_t es)
  51{
  52    return (1 << es) * BITS_PER_BYTE;
  53}
  54
  55/*
  56 * Returns the bitmask for a single element.
  57 */
  58static uint64_t get_single_element_mask(uint8_t es)
  59{
  60    return -1ull >> (64 - get_element_bits(es));
  61}
  62
  63/*
  64 * Returns the bitmask for a single element (excluding the MSB).
  65 */
  66static uint64_t get_single_element_lsbs_mask(uint8_t es)
  67{
  68    return -1ull >> (65 - get_element_bits(es));
  69}
  70
  71/*
  72 * Returns the bitmasks for multiple elements (excluding the MSBs).
  73 */
  74static uint64_t get_element_lsbs_mask(uint8_t es)
  75{
  76    return dup_const(es, get_single_element_lsbs_mask(es));
  77}
  78
  79static int vfae(void *v1, const void *v2, const void *v3, bool in,
  80                bool rt, bool zs, uint8_t es)
  81{
  82    const uint64_t mask = get_element_lsbs_mask(es);
  83    const int bits = get_element_bits(es);
  84    uint64_t a0, a1, b0, b1, e0, e1, t0, t1, z0, z1;
  85    uint64_t first_zero = 16;
  86    uint64_t first_equal;
  87    int i;
  88
  89    a0 = s390_vec_read_element64(v2, 0);
  90    a1 = s390_vec_read_element64(v2, 1);
  91    b0 = s390_vec_read_element64(v3, 0);
  92    b1 = s390_vec_read_element64(v3, 1);
  93    e0 = 0;
  94    e1 = 0;
  95    /* compare against equality with every other element */
  96    for (i = 0; i < 64; i += bits) {
  97        t0 = rol64(b0, i);
  98        t1 = rol64(b1, i);
  99        e0 |= zero_search(a0 ^ t0, mask);
 100        e0 |= zero_search(a0 ^ t1, mask);
 101        e1 |= zero_search(a1 ^ t0, mask);
 102        e1 |= zero_search(a1 ^ t1, mask);
 103    }
 104    /* invert the result if requested - invert only the MSBs */
 105    if (in) {
 106        e0 = ~e0 & ~mask;
 107        e1 = ~e1 & ~mask;
 108    }
 109    first_equal = match_index(e0, e1);
 110
 111    if (zs) {
 112        z0 = zero_search(a0, mask);
 113        z1 = zero_search(a1, mask);
 114        first_zero = match_index(z0, z1);
 115    }
 116
 117    if (rt) {
 118        e0 = (e0 >> (bits - 1)) * get_single_element_mask(es);
 119        e1 = (e1 >> (bits - 1)) * get_single_element_mask(es);
 120        s390_vec_write_element64(v1, 0, e0);
 121        s390_vec_write_element64(v1, 1, e1);
 122    } else {
 123        s390_vec_write_element64(v1, 0, MIN(first_equal, first_zero));
 124        s390_vec_write_element64(v1, 1, 0);
 125    }
 126
 127    if (first_zero == 16 && first_equal == 16) {
 128        return 3; /* no match */
 129    } else if (first_zero == 16) {
 130        return 1; /* matching elements, no match for zero */
 131    } else if (first_equal < first_zero) {
 132        return 2; /* matching elements before match for zero */
 133    }
 134    return 0; /* match for zero */
 135}
 136
 137#define DEF_VFAE_HELPER(BITS)                                                  \
 138void HELPER(gvec_vfae##BITS)(void *v1, const void *v2, const void *v3,         \
 139                             uint32_t desc)                                    \
 140{                                                                              \
 141    const bool in = extract32(simd_data(desc), 3, 1);                          \
 142    const bool rt = extract32(simd_data(desc), 2, 1);                          \
 143    const bool zs = extract32(simd_data(desc), 1, 1);                          \
 144                                                                               \
 145    vfae(v1, v2, v3, in, rt, zs, MO_##BITS);                                   \
 146}
 147DEF_VFAE_HELPER(8)
 148DEF_VFAE_HELPER(16)
 149DEF_VFAE_HELPER(32)
 150
 151#define DEF_VFAE_CC_HELPER(BITS)                                               \
 152void HELPER(gvec_vfae_cc##BITS)(void *v1, const void *v2, const void *v3,      \
 153                                CPUS390XState *env, uint32_t desc)             \
 154{                                                                              \
 155    const bool in = extract32(simd_data(desc), 3, 1);                          \
 156    const bool rt = extract32(simd_data(desc), 2, 1);                          \
 157    const bool zs = extract32(simd_data(desc), 1, 1);                          \
 158                                                                               \
 159    env->cc_op = vfae(v1, v2, v3, in, rt, zs, MO_##BITS);                      \
 160}
 161DEF_VFAE_CC_HELPER(8)
 162DEF_VFAE_CC_HELPER(16)
 163DEF_VFAE_CC_HELPER(32)
 164
 165static int vfee(void *v1, const void *v2, const void *v3, bool zs, uint8_t es)
 166{
 167    const uint64_t mask = get_element_lsbs_mask(es);
 168    uint64_t a0, a1, b0, b1, e0, e1, z0, z1;
 169    uint64_t first_zero = 16;
 170    uint64_t first_equal;
 171
 172    a0 = s390_vec_read_element64(v2, 0);
 173    a1 = s390_vec_read_element64(v2, 1);
 174    b0 = s390_vec_read_element64(v3, 0);
 175    b1 = s390_vec_read_element64(v3, 1);
 176    e0 = zero_search(a0 ^ b0, mask);
 177    e1 = zero_search(a1 ^ b1, mask);
 178    first_equal = match_index(e0, e1);
 179
 180    if (zs) {
 181        z0 = zero_search(a0, mask);
 182        z1 = zero_search(a1, mask);
 183        first_zero = match_index(z0, z1);
 184    }
 185
 186    s390_vec_write_element64(v1, 0, MIN(first_equal, first_zero));
 187    s390_vec_write_element64(v1, 1, 0);
 188    if (first_zero == 16 && first_equal == 16) {
 189        return 3; /* no match */
 190    } else if (first_zero == 16) {
 191        return 1; /* matching elements, no match for zero */
 192    } else if (first_equal < first_zero) {
 193        return 2; /* matching elements before match for zero */
 194    }
 195    return 0; /* match for zero */
 196}
 197
 198#define DEF_VFEE_HELPER(BITS)                                                  \
 199void HELPER(gvec_vfee##BITS)(void *v1, const void *v2, const void *v3,         \
 200                             uint32_t desc)                                    \
 201{                                                                              \
 202    const bool zs = extract32(simd_data(desc), 1, 1);                          \
 203                                                                               \
 204    vfee(v1, v2, v3, zs, MO_##BITS);                                           \
 205}
 206DEF_VFEE_HELPER(8)
 207DEF_VFEE_HELPER(16)
 208DEF_VFEE_HELPER(32)
 209
 210#define DEF_VFEE_CC_HELPER(BITS)                                               \
 211void HELPER(gvec_vfee_cc##BITS)(void *v1, const void *v2, const void *v3,      \
 212                                CPUS390XState *env, uint32_t desc)             \
 213{                                                                              \
 214    const bool zs = extract32(simd_data(desc), 1, 1);                          \
 215                                                                               \
 216    env->cc_op = vfee(v1, v2, v3, zs, MO_##BITS);                              \
 217}
 218DEF_VFEE_CC_HELPER(8)
 219DEF_VFEE_CC_HELPER(16)
 220DEF_VFEE_CC_HELPER(32)
 221
 222static int vfene(void *v1, const void *v2, const void *v3, bool zs, uint8_t es)
 223{
 224    const uint64_t mask = get_element_lsbs_mask(es);
 225    uint64_t a0, a1, b0, b1, e0, e1, z0, z1;
 226    uint64_t first_zero = 16;
 227    uint64_t first_inequal;
 228    bool smaller = false;
 229
 230    a0 = s390_vec_read_element64(v2, 0);
 231    a1 = s390_vec_read_element64(v2, 1);
 232    b0 = s390_vec_read_element64(v3, 0);
 233    b1 = s390_vec_read_element64(v3, 1);
 234    e0 = nonzero_search(a0 ^ b0, mask);
 235    e1 = nonzero_search(a1 ^ b1, mask);
 236    first_inequal = match_index(e0, e1);
 237
 238    /* identify the smaller element */
 239    if (first_inequal < 16) {
 240        uint8_t enr = first_inequal / (1 << es);
 241        uint32_t a = s390_vec_read_element(v2, enr, es);
 242        uint32_t b = s390_vec_read_element(v3, enr, es);
 243
 244        smaller = a < b;
 245    }
 246
 247    if (zs) {
 248        z0 = zero_search(a0, mask);
 249        z1 = zero_search(a1, mask);
 250        first_zero = match_index(z0, z1);
 251    }
 252
 253    s390_vec_write_element64(v1, 0, MIN(first_inequal, first_zero));
 254    s390_vec_write_element64(v1, 1, 0);
 255    if (first_zero == 16 && first_inequal == 16) {
 256        return 3;
 257    } else if (first_zero < first_inequal) {
 258        return 0;
 259    }
 260    return smaller ? 1 : 2;
 261}
 262
 263#define DEF_VFENE_HELPER(BITS)                                                 \
 264void HELPER(gvec_vfene##BITS)(void *v1, const void *v2, const void *v3,        \
 265                              uint32_t desc)                                   \
 266{                                                                              \
 267    const bool zs = extract32(simd_data(desc), 1, 1);                          \
 268                                                                               \
 269    vfene(v1, v2, v3, zs, MO_##BITS);                                          \
 270}
 271DEF_VFENE_HELPER(8)
 272DEF_VFENE_HELPER(16)
 273DEF_VFENE_HELPER(32)
 274
 275#define DEF_VFENE_CC_HELPER(BITS)                                              \
 276void HELPER(gvec_vfene_cc##BITS)(void *v1, const void *v2, const void *v3,     \
 277                                 CPUS390XState *env, uint32_t desc)            \
 278{                                                                              \
 279    const bool zs = extract32(simd_data(desc), 1, 1);                          \
 280                                                                               \
 281    env->cc_op = vfene(v1, v2, v3, zs, MO_##BITS);                             \
 282}
 283DEF_VFENE_CC_HELPER(8)
 284DEF_VFENE_CC_HELPER(16)
 285DEF_VFENE_CC_HELPER(32)
 286
 287static int vistr(void *v1, const void *v2, uint8_t es)
 288{
 289    const uint64_t mask = get_element_lsbs_mask(es);
 290    uint64_t a0 = s390_vec_read_element64(v2, 0);
 291    uint64_t a1 = s390_vec_read_element64(v2, 1);
 292    uint64_t z;
 293    int cc = 3;
 294
 295    z = zero_search(a0, mask);
 296    if (z) {
 297        a0 &= ~(-1ull >> clz64(z));
 298        a1 = 0;
 299        cc = 0;
 300    } else {
 301        z = zero_search(a1, mask);
 302        if (z) {
 303            a1 &= ~(-1ull >> clz64(z));
 304            cc = 0;
 305        }
 306    }
 307
 308    s390_vec_write_element64(v1, 0, a0);
 309    s390_vec_write_element64(v1, 1, a1);
 310    return cc;
 311}
 312
 313#define DEF_VISTR_HELPER(BITS)                                                 \
 314void HELPER(gvec_vistr##BITS)(void *v1, const void *v2, uint32_t desc)         \
 315{                                                                              \
 316    vistr(v1, v2, MO_##BITS);                                                  \
 317}
 318DEF_VISTR_HELPER(8)
 319DEF_VISTR_HELPER(16)
 320DEF_VISTR_HELPER(32)
 321
 322#define DEF_VISTR_CC_HELPER(BITS)                                              \
 323void HELPER(gvec_vistr_cc##BITS)(void *v1, const void *v2, CPUS390XState *env, \
 324                                uint32_t desc)                                 \
 325{                                                                              \
 326    env->cc_op = vistr(v1, v2, MO_##BITS);                                     \
 327}
 328DEF_VISTR_CC_HELPER(8)
 329DEF_VISTR_CC_HELPER(16)
 330DEF_VISTR_CC_HELPER(32)
 331
 332static bool element_compare(uint32_t data, uint32_t l, uint8_t c)
 333{
 334    const bool equal = extract32(c, 7, 1);
 335    const bool lower = extract32(c, 6, 1);
 336    const bool higher = extract32(c, 5, 1);
 337
 338    if (data < l) {
 339        return lower;
 340    } else if (data > l) {
 341        return higher;
 342    }
 343    return equal;
 344}
 345
 346static int vstrc(void *v1, const void *v2, const void *v3, const void *v4,
 347                 bool in, bool rt, bool zs, uint8_t es)
 348{
 349    const uint64_t mask = get_element_lsbs_mask(es);
 350    uint64_t a0 = s390_vec_read_element64(v2, 0);
 351    uint64_t a1 = s390_vec_read_element64(v2, 1);
 352    int first_zero = 16, first_match = 16;
 353    S390Vector rt_result = {};
 354    uint64_t z0, z1;
 355    int i, j;
 356
 357    if (zs) {
 358        z0 = zero_search(a0, mask);
 359        z1 = zero_search(a1, mask);
 360        first_zero = match_index(z0, z1);
 361    }
 362
 363    for (i = 0; i < 16 / (1 << es); i++) {
 364        const uint32_t data = s390_vec_read_element(v2, i, es);
 365        const int cur_byte = i * (1 << es);
 366        bool any_match = false;
 367
 368        /* if we don't need a bit vector, we can stop early */
 369        if (cur_byte == first_zero && !rt) {
 370            break;
 371        }
 372
 373        for (j = 0; j < 16 / (1 << es); j += 2) {
 374            const uint32_t l1 = s390_vec_read_element(v3, j, es);
 375            const uint32_t l2 = s390_vec_read_element(v3, j + 1, es);
 376            /* we are only interested in the highest byte of each element */
 377            const uint8_t c1 = s390_vec_read_element8(v4, j * (1 << es));
 378            const uint8_t c2 = s390_vec_read_element8(v4, (j + 1) * (1 << es));
 379
 380            if (element_compare(data, l1, c1) &&
 381                element_compare(data, l2, c2)) {
 382                any_match = true;
 383                break;
 384            }
 385        }
 386        /* invert the result if requested */
 387        any_match = in ^ any_match;
 388
 389        if (any_match) {
 390            /* indicate bit vector if requested */
 391            if (rt) {
 392                const uint64_t val = -1ull;
 393
 394                first_match = MIN(cur_byte, first_match);
 395                s390_vec_write_element(&rt_result, i, es, val);
 396            } else {
 397                /* stop on the first match */
 398                first_match = cur_byte;
 399                break;
 400            }
 401        }
 402    }
 403
 404    if (rt) {
 405        *(S390Vector *)v1 = rt_result;
 406    } else {
 407        s390_vec_write_element64(v1, 0, MIN(first_match, first_zero));
 408        s390_vec_write_element64(v1, 1, 0);
 409    }
 410
 411    if (first_zero == 16 && first_match == 16) {
 412        return 3; /* no match */
 413    } else if (first_zero == 16) {
 414        return 1; /* matching elements, no match for zero */
 415    } else if (first_match < first_zero) {
 416        return 2; /* matching elements before match for zero */
 417    }
 418    return 0; /* match for zero */
 419}
 420
 421#define DEF_VSTRC_HELPER(BITS)                                                 \
 422void HELPER(gvec_vstrc##BITS)(void *v1, const void *v2, const void *v3,        \
 423                              const void *v4, uint32_t desc)                   \
 424{                                                                              \
 425    const bool in = extract32(simd_data(desc), 3, 1);                          \
 426    const bool zs = extract32(simd_data(desc), 1, 1);                          \
 427                                                                               \
 428    vstrc(v1, v2, v3, v4, in, 0, zs, MO_##BITS);                               \
 429}
 430DEF_VSTRC_HELPER(8)
 431DEF_VSTRC_HELPER(16)
 432DEF_VSTRC_HELPER(32)
 433
 434#define DEF_VSTRC_RT_HELPER(BITS)                                              \
 435void HELPER(gvec_vstrc_rt##BITS)(void *v1, const void *v2, const void *v3,     \
 436                                 const void *v4, uint32_t desc)                \
 437{                                                                              \
 438    const bool in = extract32(simd_data(desc), 3, 1);                          \
 439    const bool zs = extract32(simd_data(desc), 1, 1);                          \
 440                                                                               \
 441    vstrc(v1, v2, v3, v4, in, 1, zs, MO_##BITS);                               \
 442}
 443DEF_VSTRC_RT_HELPER(8)
 444DEF_VSTRC_RT_HELPER(16)
 445DEF_VSTRC_RT_HELPER(32)
 446
 447#define DEF_VSTRC_CC_HELPER(BITS)                                              \
 448void HELPER(gvec_vstrc_cc##BITS)(void *v1, const void *v2, const void *v3,     \
 449                                 const void *v4, CPUS390XState *env,           \
 450                                 uint32_t desc)                                \
 451{                                                                              \
 452    const bool in = extract32(simd_data(desc), 3, 1);                          \
 453    const bool zs = extract32(simd_data(desc), 1, 1);                          \
 454                                                                               \
 455    env->cc_op = vstrc(v1, v2, v3, v4, in, 0, zs, MO_##BITS);                  \
 456}
 457DEF_VSTRC_CC_HELPER(8)
 458DEF_VSTRC_CC_HELPER(16)
 459DEF_VSTRC_CC_HELPER(32)
 460
 461#define DEF_VSTRC_CC_RT_HELPER(BITS)                                           \
 462void HELPER(gvec_vstrc_cc_rt##BITS)(void *v1, const void *v2, const void *v3,  \
 463                                    const void *v4, CPUS390XState *env,        \
 464                                    uint32_t desc)                             \
 465{                                                                              \
 466    const bool in = extract32(simd_data(desc), 3, 1);                          \
 467    const bool zs = extract32(simd_data(desc), 1, 1);                          \
 468                                                                               \
 469    env->cc_op = vstrc(v1, v2, v3, v4, in, 1, zs, MO_##BITS);                  \
 470}
 471DEF_VSTRC_CC_RT_HELPER(8)
 472DEF_VSTRC_CC_RT_HELPER(16)
 473DEF_VSTRC_CC_RT_HELPER(32)
 474