qemu/target/s390x/tcg/vec_string_helper.c
<<
>>
Prefs
   1/*
   2 * QEMU TCG support -- s390x vector string instruction support
   3 *
   4 * Copyright (C) 2019 Red Hat Inc
   5 *
   6 * Authors:
   7 *   David Hildenbrand <david@redhat.com>
   8 *
   9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
  10 * See the COPYING file in the top-level directory.
  11 */
  12#include "qemu/osdep.h"
  13#include "cpu.h"
  14#include "s390x-internal.h"
  15#include "vec.h"
  16#include "tcg/tcg.h"
  17#include "tcg/tcg-gvec-desc.h"
  18#include "exec/helper-proto.h"
  19
  20/*
  21 * Returns a bit set in the MSB of each element that is zero,
  22 * as defined by the mask.
  23 */
  24static inline uint64_t zero_search(uint64_t a, uint64_t mask)
  25{
  26    return ~(((a & mask) + mask) | a | mask);
  27}
  28
  29/*
  30 * Returns a bit set in the MSB of each element that is not zero,
  31 * as defined by the mask.
  32 */
  33static inline uint64_t nonzero_search(uint64_t a, uint64_t mask)
  34{
  35    return (((a & mask) + mask) | a) & ~mask;
  36}
  37
  38/*
  39 * Returns the byte offset for the first match, or 16 for no match.
  40 */
  41static inline int match_index(uint64_t c0, uint64_t c1)
  42{
  43    return (c0 ? clz64(c0) : clz64(c1) + 64) >> 3;
  44}
  45
  46/*
  47 * Returns the number of bits composing one element.
  48 */
  49static uint8_t get_element_bits(uint8_t es)
  50{
  51    return (1 << es) * BITS_PER_BYTE;
  52}
  53
  54/*
  55 * Returns the bitmask for a single element.
  56 */
  57static uint64_t get_single_element_mask(uint8_t es)
  58{
  59    return -1ull >> (64 - get_element_bits(es));
  60}
  61
  62/*
  63 * Returns the bitmask for a single element (excluding the MSB).
  64 */
  65static uint64_t get_single_element_lsbs_mask(uint8_t es)
  66{
  67    return -1ull >> (65 - get_element_bits(es));
  68}
  69
  70/*
  71 * Returns the bitmasks for multiple elements (excluding the MSBs).
  72 */
  73static uint64_t get_element_lsbs_mask(uint8_t es)
  74{
  75    return dup_const(es, get_single_element_lsbs_mask(es));
  76}
  77
  78static int vfae(void *v1, const void *v2, const void *v3, bool in,
  79                bool rt, bool zs, uint8_t es)
  80{
  81    const uint64_t mask = get_element_lsbs_mask(es);
  82    const int bits = get_element_bits(es);
  83    uint64_t a0, a1, b0, b1, e0, e1, t0, t1, z0, z1;
  84    uint64_t first_zero = 16;
  85    uint64_t first_equal;
  86    int i;
  87
  88    a0 = s390_vec_read_element64(v2, 0);
  89    a1 = s390_vec_read_element64(v2, 1);
  90    b0 = s390_vec_read_element64(v3, 0);
  91    b1 = s390_vec_read_element64(v3, 1);
  92    e0 = 0;
  93    e1 = 0;
  94    /* compare against equality with every other element */
  95    for (i = 0; i < 64; i += bits) {
  96        t0 = rol64(b0, i);
  97        t1 = rol64(b1, i);
  98        e0 |= zero_search(a0 ^ t0, mask);
  99        e0 |= zero_search(a0 ^ t1, mask);
 100        e1 |= zero_search(a1 ^ t0, mask);
 101        e1 |= zero_search(a1 ^ t1, mask);
 102    }
 103    /* invert the result if requested - invert only the MSBs */
 104    if (in) {
 105        e0 = ~e0 & ~mask;
 106        e1 = ~e1 & ~mask;
 107    }
 108    first_equal = match_index(e0, e1);
 109
 110    if (zs) {
 111        z0 = zero_search(a0, mask);
 112        z1 = zero_search(a1, mask);
 113        first_zero = match_index(z0, z1);
 114    }
 115
 116    if (rt) {
 117        e0 = (e0 >> (bits - 1)) * get_single_element_mask(es);
 118        e1 = (e1 >> (bits - 1)) * get_single_element_mask(es);
 119        s390_vec_write_element64(v1, 0, e0);
 120        s390_vec_write_element64(v1, 1, e1);
 121    } else {
 122        s390_vec_write_element64(v1, 0, MIN(first_equal, first_zero));
 123        s390_vec_write_element64(v1, 1, 0);
 124    }
 125
 126    if (first_zero == 16 && first_equal == 16) {
 127        return 3; /* no match */
 128    } else if (first_zero == 16) {
 129        return 1; /* matching elements, no match for zero */
 130    } else if (first_equal < first_zero) {
 131        return 2; /* matching elements before match for zero */
 132    }
 133    return 0; /* match for zero */
 134}
 135
 136#define DEF_VFAE_HELPER(BITS)                                                  \
 137void HELPER(gvec_vfae##BITS)(void *v1, const void *v2, const void *v3,         \
 138                             uint32_t desc)                                    \
 139{                                                                              \
 140    const bool in = extract32(simd_data(desc), 3, 1);                          \
 141    const bool rt = extract32(simd_data(desc), 2, 1);                          \
 142    const bool zs = extract32(simd_data(desc), 1, 1);                          \
 143                                                                               \
 144    vfae(v1, v2, v3, in, rt, zs, MO_##BITS);                                   \
 145}
 146DEF_VFAE_HELPER(8)
 147DEF_VFAE_HELPER(16)
 148DEF_VFAE_HELPER(32)
 149
 150#define DEF_VFAE_CC_HELPER(BITS)                                               \
 151void HELPER(gvec_vfae_cc##BITS)(void *v1, const void *v2, const void *v3,      \
 152                                CPUS390XState *env, uint32_t desc)             \
 153{                                                                              \
 154    const bool in = extract32(simd_data(desc), 3, 1);                          \
 155    const bool rt = extract32(simd_data(desc), 2, 1);                          \
 156    const bool zs = extract32(simd_data(desc), 1, 1);                          \
 157                                                                               \
 158    env->cc_op = vfae(v1, v2, v3, in, rt, zs, MO_##BITS);                      \
 159}
 160DEF_VFAE_CC_HELPER(8)
 161DEF_VFAE_CC_HELPER(16)
 162DEF_VFAE_CC_HELPER(32)
 163
 164static int vfee(void *v1, const void *v2, const void *v3, bool zs, uint8_t es)
 165{
 166    const uint64_t mask = get_element_lsbs_mask(es);
 167    uint64_t a0, a1, b0, b1, e0, e1, z0, z1;
 168    uint64_t first_zero = 16;
 169    uint64_t first_equal;
 170
 171    a0 = s390_vec_read_element64(v2, 0);
 172    a1 = s390_vec_read_element64(v2, 1);
 173    b0 = s390_vec_read_element64(v3, 0);
 174    b1 = s390_vec_read_element64(v3, 1);
 175    e0 = zero_search(a0 ^ b0, mask);
 176    e1 = zero_search(a1 ^ b1, mask);
 177    first_equal = match_index(e0, e1);
 178
 179    if (zs) {
 180        z0 = zero_search(a0, mask);
 181        z1 = zero_search(a1, mask);
 182        first_zero = match_index(z0, z1);
 183    }
 184
 185    s390_vec_write_element64(v1, 0, MIN(first_equal, first_zero));
 186    s390_vec_write_element64(v1, 1, 0);
 187    if (first_zero == 16 && first_equal == 16) {
 188        return 3; /* no match */
 189    } else if (first_zero == 16) {
 190        return 1; /* matching elements, no match for zero */
 191    } else if (first_equal < first_zero) {
 192        return 2; /* matching elements before match for zero */
 193    }
 194    return 0; /* match for zero */
 195}
 196
 197#define DEF_VFEE_HELPER(BITS)                                                  \
 198void HELPER(gvec_vfee##BITS)(void *v1, const void *v2, const void *v3,         \
 199                             uint32_t desc)                                    \
 200{                                                                              \
 201    const bool zs = extract32(simd_data(desc), 1, 1);                          \
 202                                                                               \
 203    vfee(v1, v2, v3, zs, MO_##BITS);                                           \
 204}
 205DEF_VFEE_HELPER(8)
 206DEF_VFEE_HELPER(16)
 207DEF_VFEE_HELPER(32)
 208
 209#define DEF_VFEE_CC_HELPER(BITS)                                               \
 210void HELPER(gvec_vfee_cc##BITS)(void *v1, const void *v2, const void *v3,      \
 211                                CPUS390XState *env, uint32_t desc)             \
 212{                                                                              \
 213    const bool zs = extract32(simd_data(desc), 1, 1);                          \
 214                                                                               \
 215    env->cc_op = vfee(v1, v2, v3, zs, MO_##BITS);                              \
 216}
 217DEF_VFEE_CC_HELPER(8)
 218DEF_VFEE_CC_HELPER(16)
 219DEF_VFEE_CC_HELPER(32)
 220
 221static int vfene(void *v1, const void *v2, const void *v3, bool zs, uint8_t es)
 222{
 223    const uint64_t mask = get_element_lsbs_mask(es);
 224    uint64_t a0, a1, b0, b1, e0, e1, z0, z1;
 225    uint64_t first_zero = 16;
 226    uint64_t first_inequal;
 227    bool smaller = false;
 228
 229    a0 = s390_vec_read_element64(v2, 0);
 230    a1 = s390_vec_read_element64(v2, 1);
 231    b0 = s390_vec_read_element64(v3, 0);
 232    b1 = s390_vec_read_element64(v3, 1);
 233    e0 = nonzero_search(a0 ^ b0, mask);
 234    e1 = nonzero_search(a1 ^ b1, mask);
 235    first_inequal = match_index(e0, e1);
 236
 237    /* identify the smaller element */
 238    if (first_inequal < 16) {
 239        uint8_t enr = first_inequal / (1 << es);
 240        uint32_t a = s390_vec_read_element(v2, enr, es);
 241        uint32_t b = s390_vec_read_element(v3, enr, es);
 242
 243        smaller = a < b;
 244    }
 245
 246    if (zs) {
 247        z0 = zero_search(a0, mask);
 248        z1 = zero_search(a1, mask);
 249        first_zero = match_index(z0, z1);
 250    }
 251
 252    s390_vec_write_element64(v1, 0, MIN(first_inequal, first_zero));
 253    s390_vec_write_element64(v1, 1, 0);
 254    if (first_zero == 16 && first_inequal == 16) {
 255        return 3;
 256    } else if (first_zero < first_inequal) {
 257        return 0;
 258    }
 259    return smaller ? 1 : 2;
 260}
 261
 262#define DEF_VFENE_HELPER(BITS)                                                 \
 263void HELPER(gvec_vfene##BITS)(void *v1, const void *v2, const void *v3,        \
 264                              uint32_t desc)                                   \
 265{                                                                              \
 266    const bool zs = extract32(simd_data(desc), 1, 1);                          \
 267                                                                               \
 268    vfene(v1, v2, v3, zs, MO_##BITS);                                          \
 269}
 270DEF_VFENE_HELPER(8)
 271DEF_VFENE_HELPER(16)
 272DEF_VFENE_HELPER(32)
 273
 274#define DEF_VFENE_CC_HELPER(BITS)                                              \
 275void HELPER(gvec_vfene_cc##BITS)(void *v1, const void *v2, const void *v3,     \
 276                                 CPUS390XState *env, uint32_t desc)            \
 277{                                                                              \
 278    const bool zs = extract32(simd_data(desc), 1, 1);                          \
 279                                                                               \
 280    env->cc_op = vfene(v1, v2, v3, zs, MO_##BITS);                             \
 281}
 282DEF_VFENE_CC_HELPER(8)
 283DEF_VFENE_CC_HELPER(16)
 284DEF_VFENE_CC_HELPER(32)
 285
 286static int vistr(void *v1, const void *v2, uint8_t es)
 287{
 288    const uint64_t mask = get_element_lsbs_mask(es);
 289    uint64_t a0 = s390_vec_read_element64(v2, 0);
 290    uint64_t a1 = s390_vec_read_element64(v2, 1);
 291    uint64_t z;
 292    int cc = 3;
 293
 294    z = zero_search(a0, mask);
 295    if (z) {
 296        a0 &= ~(-1ull >> clz64(z));
 297        a1 = 0;
 298        cc = 0;
 299    } else {
 300        z = zero_search(a1, mask);
 301        if (z) {
 302            a1 &= ~(-1ull >> clz64(z));
 303            cc = 0;
 304        }
 305    }
 306
 307    s390_vec_write_element64(v1, 0, a0);
 308    s390_vec_write_element64(v1, 1, a1);
 309    return cc;
 310}
 311
 312#define DEF_VISTR_HELPER(BITS)                                                 \
 313void HELPER(gvec_vistr##BITS)(void *v1, const void *v2, uint32_t desc)         \
 314{                                                                              \
 315    vistr(v1, v2, MO_##BITS);                                                  \
 316}
 317DEF_VISTR_HELPER(8)
 318DEF_VISTR_HELPER(16)
 319DEF_VISTR_HELPER(32)
 320
 321#define DEF_VISTR_CC_HELPER(BITS)                                              \
 322void HELPER(gvec_vistr_cc##BITS)(void *v1, const void *v2, CPUS390XState *env, \
 323                                uint32_t desc)                                 \
 324{                                                                              \
 325    env->cc_op = vistr(v1, v2, MO_##BITS);                                     \
 326}
 327DEF_VISTR_CC_HELPER(8)
 328DEF_VISTR_CC_HELPER(16)
 329DEF_VISTR_CC_HELPER(32)
 330
 331static bool element_compare(uint32_t data, uint32_t l, uint8_t c)
 332{
 333    const bool equal = extract32(c, 7, 1);
 334    const bool lower = extract32(c, 6, 1);
 335    const bool higher = extract32(c, 5, 1);
 336
 337    if (data < l) {
 338        return lower;
 339    } else if (data > l) {
 340        return higher;
 341    }
 342    return equal;
 343}
 344
 345static int vstrc(void *v1, const void *v2, const void *v3, const void *v4,
 346                 bool in, bool rt, bool zs, uint8_t es)
 347{
 348    const uint64_t mask = get_element_lsbs_mask(es);
 349    uint64_t a0 = s390_vec_read_element64(v2, 0);
 350    uint64_t a1 = s390_vec_read_element64(v2, 1);
 351    int first_zero = 16, first_match = 16;
 352    S390Vector rt_result = {};
 353    uint64_t z0, z1;
 354    int i, j;
 355
 356    if (zs) {
 357        z0 = zero_search(a0, mask);
 358        z1 = zero_search(a1, mask);
 359        first_zero = match_index(z0, z1);
 360    }
 361
 362    for (i = 0; i < 16 / (1 << es); i++) {
 363        const uint32_t data = s390_vec_read_element(v2, i, es);
 364        const int cur_byte = i * (1 << es);
 365        bool any_match = false;
 366
 367        /* if we don't need a bit vector, we can stop early */
 368        if (cur_byte == first_zero && !rt) {
 369            break;
 370        }
 371
 372        for (j = 0; j < 16 / (1 << es); j += 2) {
 373            const uint32_t l1 = s390_vec_read_element(v3, j, es);
 374            const uint32_t l2 = s390_vec_read_element(v3, j + 1, es);
 375            /* we are only interested in the highest byte of each element */
 376            const uint8_t c1 = s390_vec_read_element8(v4, j * (1 << es));
 377            const uint8_t c2 = s390_vec_read_element8(v4, (j + 1) * (1 << es));
 378
 379            if (element_compare(data, l1, c1) &&
 380                element_compare(data, l2, c2)) {
 381                any_match = true;
 382                break;
 383            }
 384        }
 385        /* invert the result if requested */
 386        any_match = in ^ any_match;
 387
 388        if (any_match) {
 389            /* indicate bit vector if requested */
 390            if (rt) {
 391                const uint64_t val = -1ull;
 392
 393                first_match = MIN(cur_byte, first_match);
 394                s390_vec_write_element(&rt_result, i, es, val);
 395            } else {
 396                /* stop on the first match */
 397                first_match = cur_byte;
 398                break;
 399            }
 400        }
 401    }
 402
 403    if (rt) {
 404        *(S390Vector *)v1 = rt_result;
 405    } else {
 406        s390_vec_write_element64(v1, 0, MIN(first_match, first_zero));
 407        s390_vec_write_element64(v1, 1, 0);
 408    }
 409
 410    if (first_zero == 16 && first_match == 16) {
 411        return 3; /* no match */
 412    } else if (first_zero == 16) {
 413        return 1; /* matching elements, no match for zero */
 414    } else if (first_match < first_zero) {
 415        return 2; /* matching elements before match for zero */
 416    }
 417    return 0; /* match for zero */
 418}
 419
 420#define DEF_VSTRC_HELPER(BITS)                                                 \
 421void HELPER(gvec_vstrc##BITS)(void *v1, const void *v2, const void *v3,        \
 422                              const void *v4, uint32_t desc)                   \
 423{                                                                              \
 424    const bool in = extract32(simd_data(desc), 3, 1);                          \
 425    const bool zs = extract32(simd_data(desc), 1, 1);                          \
 426                                                                               \
 427    vstrc(v1, v2, v3, v4, in, 0, zs, MO_##BITS);                               \
 428}
 429DEF_VSTRC_HELPER(8)
 430DEF_VSTRC_HELPER(16)
 431DEF_VSTRC_HELPER(32)
 432
 433#define DEF_VSTRC_RT_HELPER(BITS)                                              \
 434void HELPER(gvec_vstrc_rt##BITS)(void *v1, const void *v2, const void *v3,     \
 435                                 const void *v4, uint32_t desc)                \
 436{                                                                              \
 437    const bool in = extract32(simd_data(desc), 3, 1);                          \
 438    const bool zs = extract32(simd_data(desc), 1, 1);                          \
 439                                                                               \
 440    vstrc(v1, v2, v3, v4, in, 1, zs, MO_##BITS);                               \
 441}
 442DEF_VSTRC_RT_HELPER(8)
 443DEF_VSTRC_RT_HELPER(16)
 444DEF_VSTRC_RT_HELPER(32)
 445
 446#define DEF_VSTRC_CC_HELPER(BITS)                                              \
 447void HELPER(gvec_vstrc_cc##BITS)(void *v1, const void *v2, const void *v3,     \
 448                                 const void *v4, CPUS390XState *env,           \
 449                                 uint32_t desc)                                \
 450{                                                                              \
 451    const bool in = extract32(simd_data(desc), 3, 1);                          \
 452    const bool zs = extract32(simd_data(desc), 1, 1);                          \
 453                                                                               \
 454    env->cc_op = vstrc(v1, v2, v3, v4, in, 0, zs, MO_##BITS);                  \
 455}
 456DEF_VSTRC_CC_HELPER(8)
 457DEF_VSTRC_CC_HELPER(16)
 458DEF_VSTRC_CC_HELPER(32)
 459
 460#define DEF_VSTRC_CC_RT_HELPER(BITS)                                           \
 461void HELPER(gvec_vstrc_cc_rt##BITS)(void *v1, const void *v2, const void *v3,  \
 462                                    const void *v4, CPUS390XState *env,        \
 463                                    uint32_t desc)                             \
 464{                                                                              \
 465    const bool in = extract32(simd_data(desc), 3, 1);                          \
 466    const bool zs = extract32(simd_data(desc), 1, 1);                          \
 467                                                                               \
 468    env->cc_op = vstrc(v1, v2, v3, v4, in, 1, zs, MO_##BITS);                  \
 469}
 470DEF_VSTRC_CC_RT_HELPER(8)
 471DEF_VSTRC_CC_RT_HELPER(16)
 472DEF_VSTRC_CC_RT_HELPER(32)
 473
 474static int vstrs(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
 475                 const S390Vector *v4, uint8_t es, bool zs)
 476{
 477    int substr_elen, substr_0, str_elen, i, j, k, cc;
 478    int nelem = 16 >> es;
 479    bool eos = false;
 480
 481    substr_elen = s390_vec_read_element8(v4, 7) >> es;
 482
 483    /* If ZS, bound substr length by min(nelem, strlen(v3)). */
 484    if (zs) {
 485        substr_elen = MIN(substr_elen, nelem);
 486        for (i = 0; i < substr_elen; i++) {
 487            if (s390_vec_read_element(v3, i, es) == 0) {
 488                substr_elen = i;
 489                break;
 490            }
 491        }
 492    }
 493
 494    if (substr_elen == 0) {
 495        cc = 2; /* full match for degenerate case of empty substr */
 496        k = 0;
 497        goto done;
 498    }
 499
 500    /* If ZS, look for eos in the searched string. */
 501    if (zs) {
 502        for (k = 0; k < nelem; k++) {
 503            if (s390_vec_read_element(v2, k, es) == 0) {
 504                eos = true;
 505                break;
 506            }
 507        }
 508        str_elen = k;
 509    } else {
 510        str_elen = nelem;
 511    }
 512
 513    substr_0 = s390_vec_read_element(v3, 0, es);
 514
 515    for (k = 0; ; k++) {
 516        for (; k < str_elen; k++) {
 517            if (s390_vec_read_element(v2, k, es) == substr_0) {
 518                break;
 519            }
 520        }
 521
 522        /* If we reached the end of the string, no match. */
 523        if (k == str_elen) {
 524            cc = eos; /* no match (with or without zero char) */
 525            goto done;
 526        }
 527
 528        /* If the substring is only one char, match. */
 529        if (substr_elen == 1) {
 530            cc = 2; /* full match */
 531            goto done;
 532        }
 533
 534        /* If the match begins at the last char, we have a partial match. */
 535        if (k == str_elen - 1) {
 536            cc = 3; /* partial match */
 537            goto done;
 538        }
 539
 540        i = MIN(nelem, k + substr_elen);
 541        for (j = k + 1; j < i; j++) {
 542            uint32_t e2 = s390_vec_read_element(v2, j, es);
 543            uint32_t e3 = s390_vec_read_element(v3, j - k, es);
 544            if (e2 != e3) {
 545                break;
 546            }
 547        }
 548        if (j == i) {
 549            /* Matched up until "end". */
 550            cc = i - k == substr_elen ? 2 : 3; /* full or partial match */
 551            goto done;
 552        }
 553    }
 554
 555 done:
 556    s390_vec_write_element64(v1, 0, k << es);
 557    s390_vec_write_element64(v1, 1, 0);
 558    return cc;
 559}
 560
 561#define DEF_VSTRS_HELPER(BITS)                                             \
 562void QEMU_FLATTEN HELPER(gvec_vstrs_##BITS)(void *v1, const void *v2,      \
 563    const void *v3, const void *v4, CPUS390XState *env, uint32_t desc)     \
 564    { env->cc_op = vstrs(v1, v2, v3, v4, MO_##BITS, false); }              \
 565void QEMU_FLATTEN HELPER(gvec_vstrs_zs##BITS)(void *v1, const void *v2,    \
 566    const void *v3, const void *v4, CPUS390XState *env, uint32_t desc)     \
 567    { env->cc_op = vstrs(v1, v2, v3, v4, MO_##BITS, true); }
 568
 569DEF_VSTRS_HELPER(8)
 570DEF_VSTRS_HELPER(16)
 571DEF_VSTRS_HELPER(32)
 572