qemu/target/s390x/tcg/mem_helper.c
<<
>>
Prefs
   1/*
   2 *  S/390 memory access helper routines
   3 *
   4 *  Copyright (c) 2009 Ulrich Hecht
   5 *  Copyright (c) 2009 Alexander Graf
   6 *
   7 * This library is free software; you can redistribute it and/or
   8 * modify it under the terms of the GNU Lesser General Public
   9 * License as published by the Free Software Foundation; either
  10 * version 2.1 of the License, or (at your option) any later version.
  11 *
  12 * This library is distributed in the hope that it will be useful,
  13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 * Lesser General Public License for more details.
  16 *
  17 * You should have received a copy of the GNU Lesser General Public
  18 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  19 */
  20
  21#include "qemu/osdep.h"
  22#include "cpu.h"
  23#include "s390x-internal.h"
  24#include "tcg_s390x.h"
  25#include "exec/helper-proto.h"
  26#include "exec/exec-all.h"
  27#include "exec/cpu_ldst.h"
  28#include "qemu/int128.h"
  29#include "qemu/atomic128.h"
  30#include "trace.h"
  31
  32#if !defined(CONFIG_USER_ONLY)
  33#include "hw/s390x/storage-keys.h"
  34#include "hw/boards.h"
  35#endif
  36
  37/*****************************************************************************/
  38/* Softmmu support */
  39
  40/* #define DEBUG_HELPER */
  41#ifdef DEBUG_HELPER
  42#define HELPER_LOG(x...) qemu_log(x)
  43#else
  44#define HELPER_LOG(x...)
  45#endif
  46
  47static inline bool psw_key_valid(CPUS390XState *env, uint8_t psw_key)
  48{
  49    uint16_t pkm = env->cregs[3] >> 16;
  50
  51    if (env->psw.mask & PSW_MASK_PSTATE) {
  52        /* PSW key has range 0..15, it is valid if the bit is 1 in the PKM */
  53        return pkm & (0x80 >> psw_key);
  54    }
  55    return true;
  56}
  57
  58static bool is_destructive_overlap(CPUS390XState *env, uint64_t dest,
  59                                   uint64_t src, uint32_t len)
  60{
  61    if (!len || src == dest) {
  62        return false;
  63    }
  64    /* Take care of wrapping at the end of address space. */
  65    if (unlikely(wrap_address(env, src + len - 1) < src)) {
  66        return dest > src || dest <= wrap_address(env, src + len - 1);
  67    }
  68    return dest > src && dest <= src + len - 1;
  69}
  70
  71/* Trigger a SPECIFICATION exception if an address or a length is not
  72   naturally aligned.  */
  73static inline void check_alignment(CPUS390XState *env, uint64_t v,
  74                                   int wordsize, uintptr_t ra)
  75{
  76    if (v % wordsize) {
  77        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
  78    }
  79}
  80
  81/* Load a value from memory according to its size.  */
  82static inline uint64_t cpu_ldusize_data_ra(CPUS390XState *env, uint64_t addr,
  83                                           int wordsize, uintptr_t ra)
  84{
  85    switch (wordsize) {
  86    case 1:
  87        return cpu_ldub_data_ra(env, addr, ra);
  88    case 2:
  89        return cpu_lduw_data_ra(env, addr, ra);
  90    default:
  91        abort();
  92    }
  93}
  94
  95/* Store a to memory according to its size.  */
  96static inline void cpu_stsize_data_ra(CPUS390XState *env, uint64_t addr,
  97                                      uint64_t value, int wordsize,
  98                                      uintptr_t ra)
  99{
 100    switch (wordsize) {
 101    case 1:
 102        cpu_stb_data_ra(env, addr, value, ra);
 103        break;
 104    case 2:
 105        cpu_stw_data_ra(env, addr, value, ra);
 106        break;
 107    default:
 108        abort();
 109    }
 110}
 111
 112/* An access covers at most 4096 bytes and therefore at most two pages. */
 113typedef struct S390Access {
 114    target_ulong vaddr1;
 115    target_ulong vaddr2;
 116    char *haddr1;
 117    char *haddr2;
 118    uint16_t size1;
 119    uint16_t size2;
 120    /*
 121     * If we can't access the host page directly, we'll have to do I/O access
 122     * via ld/st helpers. These are internal details, so we store the
 123     * mmu idx to do the access here instead of passing it around in the
 124     * helpers. Maybe, one day we can get rid of ld/st access - once we can
 125     * handle TLB_NOTDIRTY differently. We don't expect these special accesses
 126     * to trigger exceptions - only if we would have TLB_NOTDIRTY on LAP
 127     * pages, we might trigger a new MMU translation - very unlikely that
 128     * the mapping changes in between and we would trigger a fault.
 129     */
 130    int mmu_idx;
 131} S390Access;
 132
 133/*
 134 * With nonfault=1, return the PGM_ exception that would have been injected
 135 * into the guest; return 0 if no exception was detected.
 136 *
 137 * For !CONFIG_USER_ONLY, the TEC is stored stored to env->tlb_fill_tec.
 138 * For CONFIG_USER_ONLY, the faulting address is stored to env->__excp_addr.
 139 */
 140static int s390_probe_access(CPUArchState *env, target_ulong addr, int size,
 141                             MMUAccessType access_type, int mmu_idx,
 142                             bool nonfault, void **phost, uintptr_t ra)
 143{
 144#if defined(CONFIG_USER_ONLY)
 145    return probe_access_flags(env, addr, access_type, mmu_idx,
 146                              nonfault, phost, ra);
 147#else
 148    int flags;
 149
 150    /*
 151     * For !CONFIG_USER_ONLY, we cannot rely on TLB_INVALID_MASK or haddr==NULL
 152     * to detect if there was an exception during tlb_fill().
 153     */
 154    env->tlb_fill_exc = 0;
 155    flags = probe_access_flags(env, addr, access_type, mmu_idx, nonfault, phost,
 156                               ra);
 157    if (env->tlb_fill_exc) {
 158        return env->tlb_fill_exc;
 159    }
 160
 161    if (unlikely(flags & TLB_WATCHPOINT)) {
 162        /* S390 does not presently use transaction attributes. */
 163        cpu_check_watchpoint(env_cpu(env), addr, size,
 164                             MEMTXATTRS_UNSPECIFIED,
 165                             (access_type == MMU_DATA_STORE
 166                              ? BP_MEM_WRITE : BP_MEM_READ), ra);
 167    }
 168    return 0;
 169#endif
 170}
 171
 172static int access_prepare_nf(S390Access *access, CPUS390XState *env,
 173                             bool nonfault, vaddr vaddr1, int size,
 174                             MMUAccessType access_type,
 175                             int mmu_idx, uintptr_t ra)
 176{
 177    void *haddr1, *haddr2 = NULL;
 178    int size1, size2, exc;
 179    vaddr vaddr2 = 0;
 180
 181    assert(size > 0 && size <= 4096);
 182
 183    size1 = MIN(size, -(vaddr1 | TARGET_PAGE_MASK)),
 184    size2 = size - size1;
 185
 186    exc = s390_probe_access(env, vaddr1, size1, access_type, mmu_idx, nonfault,
 187                            &haddr1, ra);
 188    if (exc) {
 189        return exc;
 190    }
 191    if (unlikely(size2)) {
 192        /* The access crosses page boundaries. */
 193        vaddr2 = wrap_address(env, vaddr1 + size1);
 194        exc = s390_probe_access(env, vaddr2, size2, access_type, mmu_idx,
 195                                nonfault, &haddr2, ra);
 196        if (exc) {
 197            return exc;
 198        }
 199    }
 200
 201    *access = (S390Access) {
 202        .vaddr1 = vaddr1,
 203        .vaddr2 = vaddr2,
 204        .haddr1 = haddr1,
 205        .haddr2 = haddr2,
 206        .size1 = size1,
 207        .size2 = size2,
 208        .mmu_idx = mmu_idx
 209    };
 210    return 0;
 211}
 212
 213static S390Access access_prepare(CPUS390XState *env, vaddr vaddr, int size,
 214                                 MMUAccessType access_type, int mmu_idx,
 215                                 uintptr_t ra)
 216{
 217    S390Access ret;
 218    int exc = access_prepare_nf(&ret, env, false, vaddr, size,
 219                                access_type, mmu_idx, ra);
 220    assert(!exc);
 221    return ret;
 222}
 223
 224/* Helper to handle memset on a single page. */
 225static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr,
 226                             uint8_t byte, uint16_t size, int mmu_idx,
 227                             uintptr_t ra)
 228{
 229#ifdef CONFIG_USER_ONLY
 230    g_assert(haddr);
 231    memset(haddr, byte, size);
 232#else
 233    MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
 234    int i;
 235
 236    if (likely(haddr)) {
 237        memset(haddr, byte, size);
 238    } else {
 239        /*
 240         * Do a single access and test if we can then get access to the
 241         * page. This is especially relevant to speed up TLB_NOTDIRTY.
 242         */
 243        g_assert(size > 0);
 244        cpu_stb_mmu(env, vaddr, byte, oi, ra);
 245        haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
 246        if (likely(haddr)) {
 247            memset(haddr + 1, byte, size - 1);
 248        } else {
 249            for (i = 1; i < size; i++) {
 250                cpu_stb_mmu(env, vaddr + i, byte, oi, ra);
 251            }
 252        }
 253    }
 254#endif
 255}
 256
 257static void access_memset(CPUS390XState *env, S390Access *desta,
 258                          uint8_t byte, uintptr_t ra)
 259{
 260
 261    do_access_memset(env, desta->vaddr1, desta->haddr1, byte, desta->size1,
 262                     desta->mmu_idx, ra);
 263    if (likely(!desta->size2)) {
 264        return;
 265    }
 266    do_access_memset(env, desta->vaddr2, desta->haddr2, byte, desta->size2,
 267                     desta->mmu_idx, ra);
 268}
 269
 270static uint8_t do_access_get_byte(CPUS390XState *env, vaddr vaddr, char **haddr,
 271                                  int offset, int mmu_idx, uintptr_t ra)
 272{
 273#ifdef CONFIG_USER_ONLY
 274    return ldub_p(*haddr + offset);
 275#else
 276    MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
 277    uint8_t byte;
 278
 279    if (likely(*haddr)) {
 280        return ldub_p(*haddr + offset);
 281    }
 282    /*
 283     * Do a single access and test if we can then get access to the
 284     * page. This is especially relevant to speed up TLB_NOTDIRTY.
 285     */
 286    byte = cpu_ldb_mmu(env, vaddr + offset, oi, ra);
 287    *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_LOAD, mmu_idx);
 288    return byte;
 289#endif
 290}
 291
 292static uint8_t access_get_byte(CPUS390XState *env, S390Access *access,
 293                               int offset, uintptr_t ra)
 294{
 295    if (offset < access->size1) {
 296        return do_access_get_byte(env, access->vaddr1, &access->haddr1,
 297                                  offset, access->mmu_idx, ra);
 298    }
 299    return do_access_get_byte(env, access->vaddr2, &access->haddr2,
 300                              offset - access->size1, access->mmu_idx, ra);
 301}
 302
 303static void do_access_set_byte(CPUS390XState *env, vaddr vaddr, char **haddr,
 304                               int offset, uint8_t byte, int mmu_idx,
 305                               uintptr_t ra)
 306{
 307#ifdef CONFIG_USER_ONLY
 308    stb_p(*haddr + offset, byte);
 309#else
 310    MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
 311
 312    if (likely(*haddr)) {
 313        stb_p(*haddr + offset, byte);
 314        return;
 315    }
 316    /*
 317     * Do a single access and test if we can then get access to the
 318     * page. This is especially relevant to speed up TLB_NOTDIRTY.
 319     */
 320    cpu_stb_mmu(env, vaddr + offset, byte, oi, ra);
 321    *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
 322#endif
 323}
 324
 325static void access_set_byte(CPUS390XState *env, S390Access *access,
 326                            int offset, uint8_t byte, uintptr_t ra)
 327{
 328    if (offset < access->size1) {
 329        do_access_set_byte(env, access->vaddr1, &access->haddr1, offset, byte,
 330                           access->mmu_idx, ra);
 331    } else {
 332        do_access_set_byte(env, access->vaddr2, &access->haddr2,
 333                           offset - access->size1, byte, access->mmu_idx, ra);
 334    }
 335}
 336
 337/*
 338 * Move data with the same semantics as memmove() in case ranges don't overlap
 339 * or src > dest. Undefined behavior on destructive overlaps.
 340 */
 341static void access_memmove(CPUS390XState *env, S390Access *desta,
 342                           S390Access *srca, uintptr_t ra)
 343{
 344    int diff;
 345
 346    g_assert(desta->size1 + desta->size2 == srca->size1 + srca->size2);
 347
 348    /* Fallback to slow access in case we don't have access to all host pages */
 349    if (unlikely(!desta->haddr1 || (desta->size2 && !desta->haddr2) ||
 350                 !srca->haddr1 || (srca->size2 && !srca->haddr2))) {
 351        int i;
 352
 353        for (i = 0; i < desta->size1 + desta->size2; i++) {
 354            uint8_t byte = access_get_byte(env, srca, i, ra);
 355
 356            access_set_byte(env, desta, i, byte, ra);
 357        }
 358        return;
 359    }
 360
 361    if (srca->size1 == desta->size1) {
 362        memmove(desta->haddr1, srca->haddr1, srca->size1);
 363        if (unlikely(srca->size2)) {
 364            memmove(desta->haddr2, srca->haddr2, srca->size2);
 365        }
 366    } else if (srca->size1 < desta->size1) {
 367        diff = desta->size1 - srca->size1;
 368        memmove(desta->haddr1, srca->haddr1, srca->size1);
 369        memmove(desta->haddr1 + srca->size1, srca->haddr2, diff);
 370        if (likely(desta->size2)) {
 371            memmove(desta->haddr2, srca->haddr2 + diff, desta->size2);
 372        }
 373    } else {
 374        diff = srca->size1 - desta->size1;
 375        memmove(desta->haddr1, srca->haddr1, desta->size1);
 376        memmove(desta->haddr2, srca->haddr1 + desta->size1, diff);
 377        if (likely(srca->size2)) {
 378            memmove(desta->haddr2 + diff, srca->haddr2, srca->size2);
 379        }
 380    }
 381}
 382
 383static int mmu_idx_from_as(uint8_t as)
 384{
 385    switch (as) {
 386    case AS_PRIMARY:
 387        return MMU_PRIMARY_IDX;
 388    case AS_SECONDARY:
 389        return MMU_SECONDARY_IDX;
 390    case AS_HOME:
 391        return MMU_HOME_IDX;
 392    default:
 393        /* FIXME AS_ACCREG */
 394        g_assert_not_reached();
 395    }
 396}
 397
 398/* and on array */
 399static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest,
 400                             uint64_t src, uintptr_t ra)
 401{
 402    const int mmu_idx = cpu_mmu_index(env, false);
 403    S390Access srca1, srca2, desta;
 404    uint32_t i;
 405    uint8_t c = 0;
 406
 407    HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
 408               __func__, l, dest, src);
 409
 410    /* NC always processes one more byte than specified - maximum is 256 */
 411    l++;
 412
 413    srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 414    srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
 415    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 416    for (i = 0; i < l; i++) {
 417        const uint8_t x = access_get_byte(env, &srca1, i, ra) &
 418                          access_get_byte(env, &srca2, i, ra);
 419
 420        c |= x;
 421        access_set_byte(env, &desta, i, x, ra);
 422    }
 423    return c != 0;
 424}
 425
 426uint32_t HELPER(nc)(CPUS390XState *env, uint32_t l, uint64_t dest,
 427                    uint64_t src)
 428{
 429    return do_helper_nc(env, l, dest, src, GETPC());
 430}
 431
 432/* xor on array */
 433static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest,
 434                             uint64_t src, uintptr_t ra)
 435{
 436    const int mmu_idx = cpu_mmu_index(env, false);
 437    S390Access srca1, srca2, desta;
 438    uint32_t i;
 439    uint8_t c = 0;
 440
 441    HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
 442               __func__, l, dest, src);
 443
 444    /* XC always processes one more byte than specified - maximum is 256 */
 445    l++;
 446
 447    srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 448    srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
 449    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 450
 451    /* xor with itself is the same as memset(0) */
 452    if (src == dest) {
 453        access_memset(env, &desta, 0, ra);
 454        return 0;
 455    }
 456
 457    for (i = 0; i < l; i++) {
 458        const uint8_t x = access_get_byte(env, &srca1, i, ra) ^
 459                          access_get_byte(env, &srca2, i, ra);
 460
 461        c |= x;
 462        access_set_byte(env, &desta, i, x, ra);
 463    }
 464    return c != 0;
 465}
 466
 467uint32_t HELPER(xc)(CPUS390XState *env, uint32_t l, uint64_t dest,
 468                    uint64_t src)
 469{
 470    return do_helper_xc(env, l, dest, src, GETPC());
 471}
 472
 473/* or on array */
 474static uint32_t do_helper_oc(CPUS390XState *env, uint32_t l, uint64_t dest,
 475                             uint64_t src, uintptr_t ra)
 476{
 477    const int mmu_idx = cpu_mmu_index(env, false);
 478    S390Access srca1, srca2, desta;
 479    uint32_t i;
 480    uint8_t c = 0;
 481
 482    HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
 483               __func__, l, dest, src);
 484
 485    /* OC always processes one more byte than specified - maximum is 256 */
 486    l++;
 487
 488    srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 489    srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
 490    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 491    for (i = 0; i < l; i++) {
 492        const uint8_t x = access_get_byte(env, &srca1, i, ra) |
 493                          access_get_byte(env, &srca2, i, ra);
 494
 495        c |= x;
 496        access_set_byte(env, &desta, i, x, ra);
 497    }
 498    return c != 0;
 499}
 500
 501uint32_t HELPER(oc)(CPUS390XState *env, uint32_t l, uint64_t dest,
 502                    uint64_t src)
 503{
 504    return do_helper_oc(env, l, dest, src, GETPC());
 505}
 506
 507/* memmove */
 508static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
 509                              uint64_t src, uintptr_t ra)
 510{
 511    const int mmu_idx = cpu_mmu_index(env, false);
 512    S390Access srca, desta;
 513    uint32_t i;
 514
 515    HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
 516               __func__, l, dest, src);
 517
 518    /* MVC always copies one more byte than specified - maximum is 256 */
 519    l++;
 520
 521    srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 522    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 523
 524    /*
 525     * "When the operands overlap, the result is obtained as if the operands
 526     * were processed one byte at a time". Only non-destructive overlaps
 527     * behave like memmove().
 528     */
 529    if (dest == src + 1) {
 530        access_memset(env, &desta, access_get_byte(env, &srca, 0, ra), ra);
 531    } else if (!is_destructive_overlap(env, dest, src, l)) {
 532        access_memmove(env, &desta, &srca, ra);
 533    } else {
 534        for (i = 0; i < l; i++) {
 535            uint8_t byte = access_get_byte(env, &srca, i, ra);
 536
 537            access_set_byte(env, &desta, i, byte, ra);
 538        }
 539    }
 540
 541    return env->cc_op;
 542}
 543
 544void HELPER(mvc)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 545{
 546    do_helper_mvc(env, l, dest, src, GETPC());
 547}
 548
 549/* move inverse  */
 550void HELPER(mvcin)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 551{
 552    const int mmu_idx = cpu_mmu_index(env, false);
 553    S390Access srca, desta;
 554    uintptr_t ra = GETPC();
 555    int i;
 556
 557    /* MVCIN always copies one more byte than specified - maximum is 256 */
 558    l++;
 559
 560    src = wrap_address(env, src - l + 1);
 561    srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 562    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 563    for (i = 0; i < l; i++) {
 564        const uint8_t x = access_get_byte(env, &srca, l - i - 1, ra);
 565
 566        access_set_byte(env, &desta, i, x, ra);
 567    }
 568}
 569
 570/* move numerics  */
 571void HELPER(mvn)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 572{
 573    const int mmu_idx = cpu_mmu_index(env, false);
 574    S390Access srca1, srca2, desta;
 575    uintptr_t ra = GETPC();
 576    int i;
 577
 578    /* MVN always copies one more byte than specified - maximum is 256 */
 579    l++;
 580
 581    srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 582    srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
 583    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 584    for (i = 0; i < l; i++) {
 585        const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0x0f) |
 586                          (access_get_byte(env, &srca2, i, ra) & 0xf0);
 587
 588        access_set_byte(env, &desta, i, x, ra);
 589    }
 590}
 591
 592/* move with offset  */
 593void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 594{
 595    const int mmu_idx = cpu_mmu_index(env, false);
 596    /* MVO always processes one more byte than specified - maximum is 16 */
 597    const int len_dest = (l >> 4) + 1;
 598    const int len_src = (l & 0xf) + 1;
 599    uintptr_t ra = GETPC();
 600    uint8_t byte_dest, byte_src;
 601    S390Access srca, desta;
 602    int i, j;
 603
 604    srca = access_prepare(env, src, len_src, MMU_DATA_LOAD, mmu_idx, ra);
 605    desta = access_prepare(env, dest, len_dest, MMU_DATA_STORE, mmu_idx, ra);
 606
 607    /* Handle rightmost byte */
 608    byte_dest = cpu_ldub_data_ra(env, dest + len_dest - 1, ra);
 609    byte_src = access_get_byte(env, &srca, len_src - 1, ra);
 610    byte_dest = (byte_dest & 0x0f) | (byte_src << 4);
 611    access_set_byte(env, &desta, len_dest - 1, byte_dest, ra);
 612
 613    /* Process remaining bytes from right to left */
 614    for (i = len_dest - 2, j = len_src - 2; i >= 0; i--, j--) {
 615        byte_dest = byte_src >> 4;
 616        if (j >= 0) {
 617            byte_src = access_get_byte(env, &srca, j, ra);
 618        } else {
 619            byte_src = 0;
 620        }
 621        byte_dest |= byte_src << 4;
 622        access_set_byte(env, &desta, i, byte_dest, ra);
 623    }
 624}
 625
 626/* move zones  */
 627void HELPER(mvz)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 628{
 629    const int mmu_idx = cpu_mmu_index(env, false);
 630    S390Access srca1, srca2, desta;
 631    uintptr_t ra = GETPC();
 632    int i;
 633
 634    /* MVZ always copies one more byte than specified - maximum is 256 */
 635    l++;
 636
 637    srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 638    srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
 639    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 640    for (i = 0; i < l; i++) {
 641        const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0xf0) |
 642                          (access_get_byte(env, &srca2, i, ra) & 0x0f);
 643
 644        access_set_byte(env, &desta, i, x, ra);
 645    }
 646}
 647
 648/* compare unsigned byte arrays */
 649static uint32_t do_helper_clc(CPUS390XState *env, uint32_t l, uint64_t s1,
 650                              uint64_t s2, uintptr_t ra)
 651{
 652    uint32_t i;
 653    uint32_t cc = 0;
 654
 655    HELPER_LOG("%s l %d s1 %" PRIx64 " s2 %" PRIx64 "\n",
 656               __func__, l, s1, s2);
 657
 658    for (i = 0; i <= l; i++) {
 659        uint8_t x = cpu_ldub_data_ra(env, s1 + i, ra);
 660        uint8_t y = cpu_ldub_data_ra(env, s2 + i, ra);
 661        HELPER_LOG("%02x (%c)/%02x (%c) ", x, x, y, y);
 662        if (x < y) {
 663            cc = 1;
 664            break;
 665        } else if (x > y) {
 666            cc = 2;
 667            break;
 668        }
 669    }
 670
 671    HELPER_LOG("\n");
 672    return cc;
 673}
 674
 675uint32_t HELPER(clc)(CPUS390XState *env, uint32_t l, uint64_t s1, uint64_t s2)
 676{
 677    return do_helper_clc(env, l, s1, s2, GETPC());
 678}
 679
 680/* compare logical under mask */
 681uint32_t HELPER(clm)(CPUS390XState *env, uint32_t r1, uint32_t mask,
 682                     uint64_t addr)
 683{
 684    uintptr_t ra = GETPC();
 685    uint32_t cc = 0;
 686
 687    HELPER_LOG("%s: r1 0x%x mask 0x%x addr 0x%" PRIx64 "\n", __func__, r1,
 688               mask, addr);
 689
 690    while (mask) {
 691        if (mask & 8) {
 692            uint8_t d = cpu_ldub_data_ra(env, addr, ra);
 693            uint8_t r = extract32(r1, 24, 8);
 694            HELPER_LOG("mask 0x%x %02x/%02x (0x%" PRIx64 ") ", mask, r, d,
 695                       addr);
 696            if (r < d) {
 697                cc = 1;
 698                break;
 699            } else if (r > d) {
 700                cc = 2;
 701                break;
 702            }
 703            addr++;
 704        }
 705        mask = (mask << 1) & 0xf;
 706        r1 <<= 8;
 707    }
 708
 709    HELPER_LOG("\n");
 710    return cc;
 711}
 712
 713static inline uint64_t get_address(CPUS390XState *env, int reg)
 714{
 715    return wrap_address(env, env->regs[reg]);
 716}
 717
 718/*
 719 * Store the address to the given register, zeroing out unused leftmost
 720 * bits in bit positions 32-63 (24-bit and 31-bit mode only).
 721 */
 722static inline void set_address_zero(CPUS390XState *env, int reg,
 723                                    uint64_t address)
 724{
 725    if (env->psw.mask & PSW_MASK_64) {
 726        env->regs[reg] = address;
 727    } else {
 728        if (!(env->psw.mask & PSW_MASK_32)) {
 729            address &= 0x00ffffff;
 730        } else {
 731            address &= 0x7fffffff;
 732        }
 733        env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
 734    }
 735}
 736
 737static inline void set_address(CPUS390XState *env, int reg, uint64_t address)
 738{
 739    if (env->psw.mask & PSW_MASK_64) {
 740        /* 64-Bit mode */
 741        env->regs[reg] = address;
 742    } else {
 743        if (!(env->psw.mask & PSW_MASK_32)) {
 744            /* 24-Bit mode. According to the PoO it is implementation
 745            dependent if bits 32-39 remain unchanged or are set to
 746            zeros.  Choose the former so that the function can also be
 747            used for TRT.  */
 748            env->regs[reg] = deposit64(env->regs[reg], 0, 24, address);
 749        } else {
 750            /* 31-Bit mode. According to the PoO it is implementation
 751            dependent if bit 32 remains unchanged or is set to zero.
 752            Choose the latter so that the function can also be used for
 753            TRT.  */
 754            address &= 0x7fffffff;
 755            env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
 756        }
 757    }
 758}
 759
 760static inline uint64_t wrap_length32(CPUS390XState *env, uint64_t length)
 761{
 762    if (!(env->psw.mask & PSW_MASK_64)) {
 763        return (uint32_t)length;
 764    }
 765    return length;
 766}
 767
 768static inline uint64_t wrap_length31(CPUS390XState *env, uint64_t length)
 769{
 770    if (!(env->psw.mask & PSW_MASK_64)) {
 771        /* 24-Bit and 31-Bit mode */
 772        length &= 0x7fffffff;
 773    }
 774    return length;
 775}
 776
 777static inline uint64_t get_length(CPUS390XState *env, int reg)
 778{
 779    return wrap_length31(env, env->regs[reg]);
 780}
 781
 782static inline void set_length(CPUS390XState *env, int reg, uint64_t length)
 783{
 784    if (env->psw.mask & PSW_MASK_64) {
 785        /* 64-Bit mode */
 786        env->regs[reg] = length;
 787    } else {
 788        /* 24-Bit and 31-Bit mode */
 789        env->regs[reg] = deposit64(env->regs[reg], 0, 32, length);
 790    }
 791}
 792
 793/* search string (c is byte to search, r2 is string, r1 end of string) */
 794void HELPER(srst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
 795{
 796    uintptr_t ra = GETPC();
 797    uint64_t end, str;
 798    uint32_t len;
 799    uint8_t v, c = env->regs[0];
 800
 801    /* Bits 32-55 must contain all 0.  */
 802    if (env->regs[0] & 0xffffff00u) {
 803        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
 804    }
 805
 806    str = get_address(env, r2);
 807    end = get_address(env, r1);
 808
 809    /* Lest we fail to service interrupts in a timely manner, limit the
 810       amount of work we're willing to do.  For now, let's cap at 8k.  */
 811    for (len = 0; len < 0x2000; ++len) {
 812        if (str + len == end) {
 813            /* Character not found.  R1 & R2 are unmodified.  */
 814            env->cc_op = 2;
 815            return;
 816        }
 817        v = cpu_ldub_data_ra(env, str + len, ra);
 818        if (v == c) {
 819            /* Character found.  Set R1 to the location; R2 is unmodified.  */
 820            env->cc_op = 1;
 821            set_address(env, r1, str + len);
 822            return;
 823        }
 824    }
 825
 826    /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
 827    env->cc_op = 3;
 828    set_address(env, r2, str + len);
 829}
 830
 831void HELPER(srstu)(CPUS390XState *env, uint32_t r1, uint32_t r2)
 832{
 833    uintptr_t ra = GETPC();
 834    uint32_t len;
 835    uint16_t v, c = env->regs[0];
 836    uint64_t end, str, adj_end;
 837
 838    /* Bits 32-47 of R0 must be zero.  */
 839    if (env->regs[0] & 0xffff0000u) {
 840        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
 841    }
 842
 843    str = get_address(env, r2);
 844    end = get_address(env, r1);
 845
 846    /* If the LSB of the two addresses differ, use one extra byte.  */
 847    adj_end = end + ((str ^ end) & 1);
 848
 849    /* Lest we fail to service interrupts in a timely manner, limit the
 850       amount of work we're willing to do.  For now, let's cap at 8k.  */
 851    for (len = 0; len < 0x2000; len += 2) {
 852        if (str + len == adj_end) {
 853            /* End of input found.  */
 854            env->cc_op = 2;
 855            return;
 856        }
 857        v = cpu_lduw_data_ra(env, str + len, ra);
 858        if (v == c) {
 859            /* Character found.  Set R1 to the location; R2 is unmodified.  */
 860            env->cc_op = 1;
 861            set_address(env, r1, str + len);
 862            return;
 863        }
 864    }
 865
 866    /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
 867    env->cc_op = 3;
 868    set_address(env, r2, str + len);
 869}
 870
 871/* unsigned string compare (c is string terminator) */
 872uint64_t HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2)
 873{
 874    uintptr_t ra = GETPC();
 875    uint32_t len;
 876
 877    c = c & 0xff;
 878    s1 = wrap_address(env, s1);
 879    s2 = wrap_address(env, s2);
 880
 881    /* Lest we fail to service interrupts in a timely manner, limit the
 882       amount of work we're willing to do.  For now, let's cap at 8k.  */
 883    for (len = 0; len < 0x2000; ++len) {
 884        uint8_t v1 = cpu_ldub_data_ra(env, s1 + len, ra);
 885        uint8_t v2 = cpu_ldub_data_ra(env, s2 + len, ra);
 886        if (v1 == v2) {
 887            if (v1 == c) {
 888                /* Equal.  CC=0, and don't advance the registers.  */
 889                env->cc_op = 0;
 890                env->retxl = s2;
 891                return s1;
 892            }
 893        } else {
 894            /* Unequal.  CC={1,2}, and advance the registers.  Note that
 895               the terminator need not be zero, but the string that contains
 896               the terminator is by definition "low".  */
 897            env->cc_op = (v1 == c ? 1 : v2 == c ? 2 : v1 < v2 ? 1 : 2);
 898            env->retxl = s2 + len;
 899            return s1 + len;
 900        }
 901    }
 902
 903    /* CPU-determined bytes equal; advance the registers.  */
 904    env->cc_op = 3;
 905    env->retxl = s2 + len;
 906    return s1 + len;
 907}
 908
 909/* move page */
 910uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint32_t r1, uint32_t r2)
 911{
 912    const uint64_t src = get_address(env, r2) & TARGET_PAGE_MASK;
 913    const uint64_t dst = get_address(env, r1) & TARGET_PAGE_MASK;
 914    const int mmu_idx = cpu_mmu_index(env, false);
 915    const bool f = extract64(r0, 11, 1);
 916    const bool s = extract64(r0, 10, 1);
 917    const bool cco = extract64(r0, 8, 1);
 918    uintptr_t ra = GETPC();
 919    S390Access srca, desta;
 920    int exc;
 921
 922    if ((f && s) || extract64(r0, 12, 4)) {
 923        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
 924    }
 925
 926    /*
 927     * We always manually handle exceptions such that we can properly store
 928     * r1/r2 to the lowcore on page-translation exceptions.
 929     *
 930     * TODO: Access key handling
 931     */
 932    exc = access_prepare_nf(&srca, env, true, src, TARGET_PAGE_SIZE,
 933                            MMU_DATA_LOAD, mmu_idx, ra);
 934    if (exc) {
 935        if (cco) {
 936            return 2;
 937        }
 938        goto inject_exc;
 939    }
 940    exc = access_prepare_nf(&desta, env, true, dst, TARGET_PAGE_SIZE,
 941                            MMU_DATA_STORE, mmu_idx, ra);
 942    if (exc) {
 943        if (cco && exc != PGM_PROTECTION) {
 944            return 1;
 945        }
 946        goto inject_exc;
 947    }
 948    access_memmove(env, &desta, &srca, ra);
 949    return 0; /* data moved */
 950inject_exc:
 951#if !defined(CONFIG_USER_ONLY)
 952    if (exc != PGM_ADDRESSING) {
 953        stq_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, trans_exc_code),
 954                 env->tlb_fill_tec);
 955    }
 956    if (exc == PGM_PAGE_TRANS) {
 957        stb_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, op_access_id),
 958                 r1 << 4 | r2);
 959    }
 960#endif
 961    tcg_s390_program_interrupt(env, exc, ra);
 962}
 963
 964/* string copy */
 965uint32_t HELPER(mvst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
 966{
 967    const int mmu_idx = cpu_mmu_index(env, false);
 968    const uint64_t d = get_address(env, r1);
 969    const uint64_t s = get_address(env, r2);
 970    const uint8_t c = env->regs[0];
 971    const int len = MIN(-(d | TARGET_PAGE_MASK), -(s | TARGET_PAGE_MASK));
 972    S390Access srca, desta;
 973    uintptr_t ra = GETPC();
 974    int i;
 975
 976    if (env->regs[0] & 0xffffff00ull) {
 977        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
 978    }
 979
 980    /*
 981     * Our access should not exceed single pages, as we must not report access
 982     * exceptions exceeding the actually copied range (which we don't know at
 983     * this point). We might over-indicate watchpoints within the pages
 984     * (if we ever care, we have to limit processing to a single byte).
 985     */
 986    srca = access_prepare(env, s, len, MMU_DATA_LOAD, mmu_idx, ra);
 987    desta = access_prepare(env, d, len, MMU_DATA_STORE, mmu_idx, ra);
 988    for (i = 0; i < len; i++) {
 989        const uint8_t v = access_get_byte(env, &srca, i, ra);
 990
 991        access_set_byte(env, &desta, i, v, ra);
 992        if (v == c) {
 993            set_address_zero(env, r1, d + i);
 994            return 1;
 995        }
 996    }
 997    set_address_zero(env, r1, d + len);
 998    set_address_zero(env, r2, s + len);
 999    return 3;
1000}
1001
1002/* load access registers r1 to r3 from memory at a2 */
1003void HELPER(lam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1004{
1005    uintptr_t ra = GETPC();
1006    int i;
1007
1008    if (a2 & 0x3) {
1009        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1010    }
1011
1012    for (i = r1;; i = (i + 1) % 16) {
1013        env->aregs[i] = cpu_ldl_data_ra(env, a2, ra);
1014        a2 += 4;
1015
1016        if (i == r3) {
1017            break;
1018        }
1019    }
1020}
1021
1022/* store access registers r1 to r3 in memory at a2 */
1023void HELPER(stam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1024{
1025    uintptr_t ra = GETPC();
1026    int i;
1027
1028    if (a2 & 0x3) {
1029        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1030    }
1031
1032    for (i = r1;; i = (i + 1) % 16) {
1033        cpu_stl_data_ra(env, a2, env->aregs[i], ra);
1034        a2 += 4;
1035
1036        if (i == r3) {
1037            break;
1038        }
1039    }
1040}
1041
1042/* move long helper */
1043static inline uint32_t do_mvcl(CPUS390XState *env,
1044                               uint64_t *dest, uint64_t *destlen,
1045                               uint64_t *src, uint64_t *srclen,
1046                               uint16_t pad, int wordsize, uintptr_t ra)
1047{
1048    const int mmu_idx = cpu_mmu_index(env, false);
1049    int len = MIN(*destlen, -(*dest | TARGET_PAGE_MASK));
1050    S390Access srca, desta;
1051    int i, cc;
1052
1053    if (*destlen == *srclen) {
1054        cc = 0;
1055    } else if (*destlen < *srclen) {
1056        cc = 1;
1057    } else {
1058        cc = 2;
1059    }
1060
1061    if (!*destlen) {
1062        return cc;
1063    }
1064
1065    /*
1066     * Only perform one type of type of operation (move/pad) at a time.
1067     * Stay within single pages.
1068     */
1069    if (*srclen) {
1070        /* Copy the src array */
1071        len = MIN(MIN(*srclen, -(*src | TARGET_PAGE_MASK)), len);
1072        *destlen -= len;
1073        *srclen -= len;
1074        srca = access_prepare(env, *src, len, MMU_DATA_LOAD, mmu_idx, ra);
1075        desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1076        access_memmove(env, &desta, &srca, ra);
1077        *src = wrap_address(env, *src + len);
1078        *dest = wrap_address(env, *dest + len);
1079    } else if (wordsize == 1) {
1080        /* Pad the remaining area */
1081        *destlen -= len;
1082        desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1083        access_memset(env, &desta, pad, ra);
1084        *dest = wrap_address(env, *dest + len);
1085    } else {
1086        desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1087
1088        /* The remaining length selects the padding byte. */
1089        for (i = 0; i < len; (*destlen)--, i++) {
1090            if (*destlen & 1) {
1091                access_set_byte(env, &desta, i, pad, ra);
1092            } else {
1093                access_set_byte(env, &desta, i, pad >> 8, ra);
1094            }
1095        }
1096        *dest = wrap_address(env, *dest + len);
1097    }
1098
1099    return *destlen ? 3 : cc;
1100}
1101
1102/* move long */
1103uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1104{
1105    const int mmu_idx = cpu_mmu_index(env, false);
1106    uintptr_t ra = GETPC();
1107    uint64_t destlen = env->regs[r1 + 1] & 0xffffff;
1108    uint64_t dest = get_address(env, r1);
1109    uint64_t srclen = env->regs[r2 + 1] & 0xffffff;
1110    uint64_t src = get_address(env, r2);
1111    uint8_t pad = env->regs[r2 + 1] >> 24;
1112    CPUState *cs = env_cpu(env);
1113    S390Access srca, desta;
1114    uint32_t cc, cur_len;
1115
1116    if (is_destructive_overlap(env, dest, src, MIN(srclen, destlen))) {
1117        cc = 3;
1118    } else if (srclen == destlen) {
1119        cc = 0;
1120    } else if (destlen < srclen) {
1121        cc = 1;
1122    } else {
1123        cc = 2;
1124    }
1125
1126    /* We might have to zero-out some bits even if there was no action. */
1127    if (unlikely(!destlen || cc == 3)) {
1128        set_address_zero(env, r2, src);
1129        set_address_zero(env, r1, dest);
1130        return cc;
1131    } else if (!srclen) {
1132        set_address_zero(env, r2, src);
1133    }
1134
1135    /*
1136     * Only perform one type of type of operation (move/pad) in one step.
1137     * Stay within single pages.
1138     */
1139    while (destlen) {
1140        cur_len = MIN(destlen, -(dest | TARGET_PAGE_MASK));
1141        if (!srclen) {
1142            desta = access_prepare(env, dest, cur_len, MMU_DATA_STORE, mmu_idx,
1143                                   ra);
1144            access_memset(env, &desta, pad, ra);
1145        } else {
1146            cur_len = MIN(MIN(srclen, -(src | TARGET_PAGE_MASK)), cur_len);
1147
1148            srca = access_prepare(env, src, cur_len, MMU_DATA_LOAD, mmu_idx,
1149                                  ra);
1150            desta = access_prepare(env, dest, cur_len, MMU_DATA_STORE, mmu_idx,
1151                                   ra);
1152            access_memmove(env, &desta, &srca, ra);
1153            src = wrap_address(env, src + cur_len);
1154            srclen -= cur_len;
1155            env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen);
1156            set_address_zero(env, r2, src);
1157        }
1158        dest = wrap_address(env, dest + cur_len);
1159        destlen -= cur_len;
1160        env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, destlen);
1161        set_address_zero(env, r1, dest);
1162
1163        /*
1164         * MVCL is interruptible. Return to the main loop if requested after
1165         * writing back all state to registers. If no interrupt will get
1166         * injected, we'll end up back in this handler and continue processing
1167         * the remaining parts.
1168         */
1169        if (destlen && unlikely(cpu_loop_exit_requested(cs))) {
1170            cpu_loop_exit_restore(cs, ra);
1171        }
1172    }
1173    return cc;
1174}
1175
1176/* move long extended */
1177uint32_t HELPER(mvcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1178                       uint32_t r3)
1179{
1180    uintptr_t ra = GETPC();
1181    uint64_t destlen = get_length(env, r1 + 1);
1182    uint64_t dest = get_address(env, r1);
1183    uint64_t srclen = get_length(env, r3 + 1);
1184    uint64_t src = get_address(env, r3);
1185    uint8_t pad = a2;
1186    uint32_t cc;
1187
1188    cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 1, ra);
1189
1190    set_length(env, r1 + 1, destlen);
1191    set_length(env, r3 + 1, srclen);
1192    set_address(env, r1, dest);
1193    set_address(env, r3, src);
1194
1195    return cc;
1196}
1197
1198/* move long unicode */
1199uint32_t HELPER(mvclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1200                       uint32_t r3)
1201{
1202    uintptr_t ra = GETPC();
1203    uint64_t destlen = get_length(env, r1 + 1);
1204    uint64_t dest = get_address(env, r1);
1205    uint64_t srclen = get_length(env, r3 + 1);
1206    uint64_t src = get_address(env, r3);
1207    uint16_t pad = a2;
1208    uint32_t cc;
1209
1210    cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 2, ra);
1211
1212    set_length(env, r1 + 1, destlen);
1213    set_length(env, r3 + 1, srclen);
1214    set_address(env, r1, dest);
1215    set_address(env, r3, src);
1216
1217    return cc;
1218}
1219
1220/* compare logical long helper */
1221static inline uint32_t do_clcl(CPUS390XState *env,
1222                               uint64_t *src1, uint64_t *src1len,
1223                               uint64_t *src3, uint64_t *src3len,
1224                               uint16_t pad, uint64_t limit,
1225                               int wordsize, uintptr_t ra)
1226{
1227    uint64_t len = MAX(*src1len, *src3len);
1228    uint32_t cc = 0;
1229
1230    check_alignment(env, *src1len | *src3len, wordsize, ra);
1231
1232    if (!len) {
1233        return cc;
1234    }
1235
1236    /* Lest we fail to service interrupts in a timely manner, limit the
1237       amount of work we're willing to do.  */
1238    if (len > limit) {
1239        len = limit;
1240        cc = 3;
1241    }
1242
1243    for (; len; len -= wordsize) {
1244        uint16_t v1 = pad;
1245        uint16_t v3 = pad;
1246
1247        if (*src1len) {
1248            v1 = cpu_ldusize_data_ra(env, *src1, wordsize, ra);
1249        }
1250        if (*src3len) {
1251            v3 = cpu_ldusize_data_ra(env, *src3, wordsize, ra);
1252        }
1253
1254        if (v1 != v3) {
1255            cc = (v1 < v3) ? 1 : 2;
1256            break;
1257        }
1258
1259        if (*src1len) {
1260            *src1 += wordsize;
1261            *src1len -= wordsize;
1262        }
1263        if (*src3len) {
1264            *src3 += wordsize;
1265            *src3len -= wordsize;
1266        }
1267    }
1268
1269    return cc;
1270}
1271
1272
1273/* compare logical long */
1274uint32_t HELPER(clcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1275{
1276    uintptr_t ra = GETPC();
1277    uint64_t src1len = extract64(env->regs[r1 + 1], 0, 24);
1278    uint64_t src1 = get_address(env, r1);
1279    uint64_t src3len = extract64(env->regs[r2 + 1], 0, 24);
1280    uint64_t src3 = get_address(env, r2);
1281    uint8_t pad = env->regs[r2 + 1] >> 24;
1282    uint32_t cc;
1283
1284    cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, -1, 1, ra);
1285
1286    env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, src1len);
1287    env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, src3len);
1288    set_address(env, r1, src1);
1289    set_address(env, r2, src3);
1290
1291    return cc;
1292}
1293
1294/* compare logical long extended memcompare insn with padding */
1295uint32_t HELPER(clcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1296                       uint32_t r3)
1297{
1298    uintptr_t ra = GETPC();
1299    uint64_t src1len = get_length(env, r1 + 1);
1300    uint64_t src1 = get_address(env, r1);
1301    uint64_t src3len = get_length(env, r3 + 1);
1302    uint64_t src3 = get_address(env, r3);
1303    uint8_t pad = a2;
1304    uint32_t cc;
1305
1306    cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x2000, 1, ra);
1307
1308    set_length(env, r1 + 1, src1len);
1309    set_length(env, r3 + 1, src3len);
1310    set_address(env, r1, src1);
1311    set_address(env, r3, src3);
1312
1313    return cc;
1314}
1315
1316/* compare logical long unicode memcompare insn with padding */
1317uint32_t HELPER(clclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1318                       uint32_t r3)
1319{
1320    uintptr_t ra = GETPC();
1321    uint64_t src1len = get_length(env, r1 + 1);
1322    uint64_t src1 = get_address(env, r1);
1323    uint64_t src3len = get_length(env, r3 + 1);
1324    uint64_t src3 = get_address(env, r3);
1325    uint16_t pad = a2;
1326    uint32_t cc = 0;
1327
1328    cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x1000, 2, ra);
1329
1330    set_length(env, r1 + 1, src1len);
1331    set_length(env, r3 + 1, src3len);
1332    set_address(env, r1, src1);
1333    set_address(env, r3, src3);
1334
1335    return cc;
1336}
1337
1338/* checksum */
1339uint64_t HELPER(cksm)(CPUS390XState *env, uint64_t r1,
1340                      uint64_t src, uint64_t src_len)
1341{
1342    uintptr_t ra = GETPC();
1343    uint64_t max_len, len;
1344    uint64_t cksm = (uint32_t)r1;
1345
1346    /* Lest we fail to service interrupts in a timely manner, limit the
1347       amount of work we're willing to do.  For now, let's cap at 8k.  */
1348    max_len = (src_len > 0x2000 ? 0x2000 : src_len);
1349
1350    /* Process full words as available.  */
1351    for (len = 0; len + 4 <= max_len; len += 4, src += 4) {
1352        cksm += (uint32_t)cpu_ldl_data_ra(env, src, ra);
1353    }
1354
1355    switch (max_len - len) {
1356    case 1:
1357        cksm += cpu_ldub_data_ra(env, src, ra) << 24;
1358        len += 1;
1359        break;
1360    case 2:
1361        cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1362        len += 2;
1363        break;
1364    case 3:
1365        cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1366        cksm += cpu_ldub_data_ra(env, src + 2, ra) << 8;
1367        len += 3;
1368        break;
1369    }
1370
1371    /* Fold the carry from the checksum.  Note that we can see carry-out
1372       during folding more than once (but probably not more than twice).  */
1373    while (cksm > 0xffffffffull) {
1374        cksm = (uint32_t)cksm + (cksm >> 32);
1375    }
1376
1377    /* Indicate whether or not we've processed everything.  */
1378    env->cc_op = (len == src_len ? 0 : 3);
1379
1380    /* Return both cksm and processed length.  */
1381    env->retxl = cksm;
1382    return len;
1383}
1384
1385void HELPER(pack)(CPUS390XState *env, uint32_t len, uint64_t dest, uint64_t src)
1386{
1387    uintptr_t ra = GETPC();
1388    int len_dest = len >> 4;
1389    int len_src = len & 0xf;
1390    uint8_t b;
1391
1392    dest += len_dest;
1393    src += len_src;
1394
1395    /* last byte is special, it only flips the nibbles */
1396    b = cpu_ldub_data_ra(env, src, ra);
1397    cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1398    src--;
1399    len_src--;
1400
1401    /* now pack every value */
1402    while (len_dest > 0) {
1403        b = 0;
1404
1405        if (len_src >= 0) {
1406            b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1407            src--;
1408            len_src--;
1409        }
1410        if (len_src >= 0) {
1411            b |= cpu_ldub_data_ra(env, src, ra) << 4;
1412            src--;
1413            len_src--;
1414        }
1415
1416        len_dest--;
1417        dest--;
1418        cpu_stb_data_ra(env, dest, b, ra);
1419    }
1420}
1421
1422static inline void do_pkau(CPUS390XState *env, uint64_t dest, uint64_t src,
1423                           uint32_t srclen, int ssize, uintptr_t ra)
1424{
1425    int i;
1426    /* The destination operand is always 16 bytes long.  */
1427    const int destlen = 16;
1428
1429    /* The operands are processed from right to left.  */
1430    src += srclen - 1;
1431    dest += destlen - 1;
1432
1433    for (i = 0; i < destlen; i++) {
1434        uint8_t b = 0;
1435
1436        /* Start with a positive sign */
1437        if (i == 0) {
1438            b = 0xc;
1439        } else if (srclen > ssize) {
1440            b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1441            src -= ssize;
1442            srclen -= ssize;
1443        }
1444
1445        if (srclen > ssize) {
1446            b |= cpu_ldub_data_ra(env, src, ra) << 4;
1447            src -= ssize;
1448            srclen -= ssize;
1449        }
1450
1451        cpu_stb_data_ra(env, dest, b, ra);
1452        dest--;
1453    }
1454}
1455
1456
1457void HELPER(pka)(CPUS390XState *env, uint64_t dest, uint64_t src,
1458                 uint32_t srclen)
1459{
1460    do_pkau(env, dest, src, srclen, 1, GETPC());
1461}
1462
1463void HELPER(pku)(CPUS390XState *env, uint64_t dest, uint64_t src,
1464                 uint32_t srclen)
1465{
1466    do_pkau(env, dest, src, srclen, 2, GETPC());
1467}
1468
1469void HELPER(unpk)(CPUS390XState *env, uint32_t len, uint64_t dest,
1470                  uint64_t src)
1471{
1472    uintptr_t ra = GETPC();
1473    int len_dest = len >> 4;
1474    int len_src = len & 0xf;
1475    uint8_t b;
1476    int second_nibble = 0;
1477
1478    dest += len_dest;
1479    src += len_src;
1480
1481    /* last byte is special, it only flips the nibbles */
1482    b = cpu_ldub_data_ra(env, src, ra);
1483    cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1484    src--;
1485    len_src--;
1486
1487    /* now pad every nibble with 0xf0 */
1488
1489    while (len_dest > 0) {
1490        uint8_t cur_byte = 0;
1491
1492        if (len_src > 0) {
1493            cur_byte = cpu_ldub_data_ra(env, src, ra);
1494        }
1495
1496        len_dest--;
1497        dest--;
1498
1499        /* only advance one nibble at a time */
1500        if (second_nibble) {
1501            cur_byte >>= 4;
1502            len_src--;
1503            src--;
1504        }
1505        second_nibble = !second_nibble;
1506
1507        /* digit */
1508        cur_byte = (cur_byte & 0xf);
1509        /* zone bits */
1510        cur_byte |= 0xf0;
1511
1512        cpu_stb_data_ra(env, dest, cur_byte, ra);
1513    }
1514}
1515
1516static inline uint32_t do_unpkau(CPUS390XState *env, uint64_t dest,
1517                                 uint32_t destlen, int dsize, uint64_t src,
1518                                 uintptr_t ra)
1519{
1520    int i;
1521    uint32_t cc;
1522    uint8_t b;
1523    /* The source operand is always 16 bytes long.  */
1524    const int srclen = 16;
1525
1526    /* The operands are processed from right to left.  */
1527    src += srclen - 1;
1528    dest += destlen - dsize;
1529
1530    /* Check for the sign.  */
1531    b = cpu_ldub_data_ra(env, src, ra);
1532    src--;
1533    switch (b & 0xf) {
1534    case 0xa:
1535    case 0xc:
1536    case 0xe ... 0xf:
1537        cc = 0;  /* plus */
1538        break;
1539    case 0xb:
1540    case 0xd:
1541        cc = 1;  /* minus */
1542        break;
1543    default:
1544    case 0x0 ... 0x9:
1545        cc = 3;  /* invalid */
1546        break;
1547    }
1548
1549    /* Now pad every nibble with 0x30, advancing one nibble at a time. */
1550    for (i = 0; i < destlen; i += dsize) {
1551        if (i == (31 * dsize)) {
1552            /* If length is 32/64 bytes, the leftmost byte is 0. */
1553            b = 0;
1554        } else if (i % (2 * dsize)) {
1555            b = cpu_ldub_data_ra(env, src, ra);
1556            src--;
1557        } else {
1558            b >>= 4;
1559        }
1560        cpu_stsize_data_ra(env, dest, 0x30 + (b & 0xf), dsize, ra);
1561        dest -= dsize;
1562    }
1563
1564    return cc;
1565}
1566
1567uint32_t HELPER(unpka)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1568                       uint64_t src)
1569{
1570    return do_unpkau(env, dest, destlen, 1, src, GETPC());
1571}
1572
1573uint32_t HELPER(unpku)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1574                       uint64_t src)
1575{
1576    return do_unpkau(env, dest, destlen, 2, src, GETPC());
1577}
1578
1579uint32_t HELPER(tp)(CPUS390XState *env, uint64_t dest, uint32_t destlen)
1580{
1581    uintptr_t ra = GETPC();
1582    uint32_t cc = 0;
1583    int i;
1584
1585    for (i = 0; i < destlen; i++) {
1586        uint8_t b = cpu_ldub_data_ra(env, dest + i, ra);
1587        /* digit */
1588        cc |= (b & 0xf0) > 0x90 ? 2 : 0;
1589
1590        if (i == (destlen - 1)) {
1591            /* sign */
1592            cc |= (b & 0xf) < 0xa ? 1 : 0;
1593        } else {
1594            /* digit */
1595            cc |= (b & 0xf) > 0x9 ? 2 : 0;
1596        }
1597    }
1598
1599    return cc;
1600}
1601
1602static uint32_t do_helper_tr(CPUS390XState *env, uint32_t len, uint64_t array,
1603                             uint64_t trans, uintptr_t ra)
1604{
1605    uint32_t i;
1606
1607    for (i = 0; i <= len; i++) {
1608        uint8_t byte = cpu_ldub_data_ra(env, array + i, ra);
1609        uint8_t new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1610        cpu_stb_data_ra(env, array + i, new_byte, ra);
1611    }
1612
1613    return env->cc_op;
1614}
1615
1616void HELPER(tr)(CPUS390XState *env, uint32_t len, uint64_t array,
1617                uint64_t trans)
1618{
1619    do_helper_tr(env, len, array, trans, GETPC());
1620}
1621
1622uint64_t HELPER(tre)(CPUS390XState *env, uint64_t array,
1623                     uint64_t len, uint64_t trans)
1624{
1625    uintptr_t ra = GETPC();
1626    uint8_t end = env->regs[0] & 0xff;
1627    uint64_t l = len;
1628    uint64_t i;
1629    uint32_t cc = 0;
1630
1631    if (!(env->psw.mask & PSW_MASK_64)) {
1632        array &= 0x7fffffff;
1633        l = (uint32_t)l;
1634    }
1635
1636    /* Lest we fail to service interrupts in a timely manner, limit the
1637       amount of work we're willing to do.  For now, let's cap at 8k.  */
1638    if (l > 0x2000) {
1639        l = 0x2000;
1640        cc = 3;
1641    }
1642
1643    for (i = 0; i < l; i++) {
1644        uint8_t byte, new_byte;
1645
1646        byte = cpu_ldub_data_ra(env, array + i, ra);
1647
1648        if (byte == end) {
1649            cc = 1;
1650            break;
1651        }
1652
1653        new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1654        cpu_stb_data_ra(env, array + i, new_byte, ra);
1655    }
1656
1657    env->cc_op = cc;
1658    env->retxl = len - i;
1659    return array + i;
1660}
1661
1662static inline uint32_t do_helper_trt(CPUS390XState *env, int len,
1663                                     uint64_t array, uint64_t trans,
1664                                     int inc, uintptr_t ra)
1665{
1666    int i;
1667
1668    for (i = 0; i <= len; i++) {
1669        uint8_t byte = cpu_ldub_data_ra(env, array + i * inc, ra);
1670        uint8_t sbyte = cpu_ldub_data_ra(env, trans + byte, ra);
1671
1672        if (sbyte != 0) {
1673            set_address(env, 1, array + i * inc);
1674            env->regs[2] = deposit64(env->regs[2], 0, 8, sbyte);
1675            return (i == len) ? 2 : 1;
1676        }
1677    }
1678
1679    return 0;
1680}
1681
1682static uint32_t do_helper_trt_fwd(CPUS390XState *env, uint32_t len,
1683                                  uint64_t array, uint64_t trans,
1684                                  uintptr_t ra)
1685{
1686    return do_helper_trt(env, len, array, trans, 1, ra);
1687}
1688
1689uint32_t HELPER(trt)(CPUS390XState *env, uint32_t len, uint64_t array,
1690                     uint64_t trans)
1691{
1692    return do_helper_trt(env, len, array, trans, 1, GETPC());
1693}
1694
1695static uint32_t do_helper_trt_bkwd(CPUS390XState *env, uint32_t len,
1696                                   uint64_t array, uint64_t trans,
1697                                   uintptr_t ra)
1698{
1699    return do_helper_trt(env, len, array, trans, -1, ra);
1700}
1701
1702uint32_t HELPER(trtr)(CPUS390XState *env, uint32_t len, uint64_t array,
1703                      uint64_t trans)
1704{
1705    return do_helper_trt(env, len, array, trans, -1, GETPC());
1706}
1707
1708/* Translate one/two to one/two */
1709uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
1710                      uint32_t tst, uint32_t sizes)
1711{
1712    uintptr_t ra = GETPC();
1713    int dsize = (sizes & 1) ? 1 : 2;
1714    int ssize = (sizes & 2) ? 1 : 2;
1715    uint64_t tbl = get_address(env, 1);
1716    uint64_t dst = get_address(env, r1);
1717    uint64_t len = get_length(env, r1 + 1);
1718    uint64_t src = get_address(env, r2);
1719    uint32_t cc = 3;
1720    int i;
1721
1722    /* The lower address bits of TBL are ignored.  For TROO, TROT, it's
1723       the low 3 bits (double-word aligned).  For TRTO, TRTT, it's either
1724       the low 12 bits (4K, without ETF2-ENH) or 3 bits (with ETF2-ENH).  */
1725    if (ssize == 2 && !s390_has_feat(S390_FEAT_ETF2_ENH)) {
1726        tbl &= -4096;
1727    } else {
1728        tbl &= -8;
1729    }
1730
1731    check_alignment(env, len, ssize, ra);
1732
1733    /* Lest we fail to service interrupts in a timely manner, */
1734    /* limit the amount of work we're willing to do.   */
1735    for (i = 0; i < 0x2000; i++) {
1736        uint16_t sval = cpu_ldusize_data_ra(env, src, ssize, ra);
1737        uint64_t tble = tbl + (sval * dsize);
1738        uint16_t dval = cpu_ldusize_data_ra(env, tble, dsize, ra);
1739        if (dval == tst) {
1740            cc = 1;
1741            break;
1742        }
1743        cpu_stsize_data_ra(env, dst, dval, dsize, ra);
1744
1745        len -= ssize;
1746        src += ssize;
1747        dst += dsize;
1748
1749        if (len == 0) {
1750            cc = 0;
1751            break;
1752        }
1753    }
1754
1755    set_address(env, r1, dst);
1756    set_length(env, r1 + 1, len);
1757    set_address(env, r2, src);
1758
1759    return cc;
1760}
1761
1762void HELPER(cdsg)(CPUS390XState *env, uint64_t addr,
1763                  uint32_t r1, uint32_t r3)
1764{
1765    uintptr_t ra = GETPC();
1766    Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
1767    Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1768    Int128 oldv;
1769    uint64_t oldh, oldl;
1770    bool fail;
1771
1772    check_alignment(env, addr, 16, ra);
1773
1774    oldh = cpu_ldq_data_ra(env, addr + 0, ra);
1775    oldl = cpu_ldq_data_ra(env, addr + 8, ra);
1776
1777    oldv = int128_make128(oldl, oldh);
1778    fail = !int128_eq(oldv, cmpv);
1779    if (fail) {
1780        newv = oldv;
1781    }
1782
1783    cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra);
1784    cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra);
1785
1786    env->cc_op = fail;
1787    env->regs[r1] = int128_gethi(oldv);
1788    env->regs[r1 + 1] = int128_getlo(oldv);
1789}
1790
1791void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr,
1792                           uint32_t r1, uint32_t r3)
1793{
1794    uintptr_t ra = GETPC();
1795    Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
1796    Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1797    int mem_idx;
1798    MemOpIdx oi;
1799    Int128 oldv;
1800    bool fail;
1801
1802    assert(HAVE_CMPXCHG128);
1803
1804    mem_idx = cpu_mmu_index(env, false);
1805    oi = make_memop_idx(MO_TE | MO_128 | MO_ALIGN, mem_idx);
1806    oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
1807    fail = !int128_eq(oldv, cmpv);
1808
1809    env->cc_op = fail;
1810    env->regs[r1] = int128_gethi(oldv);
1811    env->regs[r1 + 1] = int128_getlo(oldv);
1812}
1813
1814static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
1815                        uint64_t a2, bool parallel)
1816{
1817    uint32_t mem_idx = cpu_mmu_index(env, false);
1818    uintptr_t ra = GETPC();
1819    uint32_t fc = extract32(env->regs[0], 0, 8);
1820    uint32_t sc = extract32(env->regs[0], 8, 8);
1821    uint64_t pl = get_address(env, 1) & -16;
1822    uint64_t svh, svl;
1823    uint32_t cc;
1824
1825    /* Sanity check the function code and storage characteristic.  */
1826    if (fc > 1 || sc > 3) {
1827        if (!s390_has_feat(S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2)) {
1828            goto spec_exception;
1829        }
1830        if (fc > 2 || sc > 4 || (fc == 2 && (r3 & 1))) {
1831            goto spec_exception;
1832        }
1833    }
1834
1835    /* Sanity check the alignments.  */
1836    if (extract32(a1, 0, fc + 2) || extract32(a2, 0, sc)) {
1837        goto spec_exception;
1838    }
1839
1840    /* Sanity check writability of the store address.  */
1841    probe_write(env, a2, 1 << sc, mem_idx, ra);
1842
1843    /*
1844     * Note that the compare-and-swap is atomic, and the store is atomic,
1845     * but the complete operation is not.  Therefore we do not need to
1846     * assert serial context in order to implement this.  That said,
1847     * restart early if we can't support either operation that is supposed
1848     * to be atomic.
1849     */
1850    if (parallel) {
1851        uint32_t max = 2;
1852#ifdef CONFIG_ATOMIC64
1853        max = 3;
1854#endif
1855        if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) ||
1856            (HAVE_ATOMIC128  ? 0 : sc > max)) {
1857            cpu_loop_exit_atomic(env_cpu(env), ra);
1858        }
1859    }
1860
1861    /* All loads happen before all stores.  For simplicity, load the entire
1862       store value area from the parameter list.  */
1863    svh = cpu_ldq_data_ra(env, pl + 16, ra);
1864    svl = cpu_ldq_data_ra(env, pl + 24, ra);
1865
1866    switch (fc) {
1867    case 0:
1868        {
1869            uint32_t nv = cpu_ldl_data_ra(env, pl, ra);
1870            uint32_t cv = env->regs[r3];
1871            uint32_t ov;
1872
1873            if (parallel) {
1874#ifdef CONFIG_USER_ONLY
1875                uint32_t *haddr = g2h(env_cpu(env), a1);
1876                ov = qatomic_cmpxchg__nocheck(haddr, cv, nv);
1877#else
1878                MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mem_idx);
1879                ov = cpu_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi, ra);
1880#endif
1881            } else {
1882                ov = cpu_ldl_data_ra(env, a1, ra);
1883                cpu_stl_data_ra(env, a1, (ov == cv ? nv : ov), ra);
1884            }
1885            cc = (ov != cv);
1886            env->regs[r3] = deposit64(env->regs[r3], 32, 32, ov);
1887        }
1888        break;
1889
1890    case 1:
1891        {
1892            uint64_t nv = cpu_ldq_data_ra(env, pl, ra);
1893            uint64_t cv = env->regs[r3];
1894            uint64_t ov;
1895
1896            if (parallel) {
1897#ifdef CONFIG_ATOMIC64
1898                MemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN, mem_idx);
1899                ov = cpu_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi, ra);
1900#else
1901                /* Note that we asserted !parallel above.  */
1902                g_assert_not_reached();
1903#endif
1904            } else {
1905                ov = cpu_ldq_data_ra(env, a1, ra);
1906                cpu_stq_data_ra(env, a1, (ov == cv ? nv : ov), ra);
1907            }
1908            cc = (ov != cv);
1909            env->regs[r3] = ov;
1910        }
1911        break;
1912
1913    case 2:
1914        {
1915            uint64_t nvh = cpu_ldq_data_ra(env, pl, ra);
1916            uint64_t nvl = cpu_ldq_data_ra(env, pl + 8, ra);
1917            Int128 nv = int128_make128(nvl, nvh);
1918            Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1919            Int128 ov;
1920
1921            if (!parallel) {
1922                uint64_t oh = cpu_ldq_data_ra(env, a1 + 0, ra);
1923                uint64_t ol = cpu_ldq_data_ra(env, a1 + 8, ra);
1924
1925                ov = int128_make128(ol, oh);
1926                cc = !int128_eq(ov, cv);
1927                if (cc) {
1928                    nv = ov;
1929                }
1930
1931                cpu_stq_data_ra(env, a1 + 0, int128_gethi(nv), ra);
1932                cpu_stq_data_ra(env, a1 + 8, int128_getlo(nv), ra);
1933            } else if (HAVE_CMPXCHG128) {
1934                MemOpIdx oi = make_memop_idx(MO_TE | MO_128 | MO_ALIGN, mem_idx);
1935                ov = cpu_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra);
1936                cc = !int128_eq(ov, cv);
1937            } else {
1938                /* Note that we asserted !parallel above.  */
1939                g_assert_not_reached();
1940            }
1941
1942            env->regs[r3 + 0] = int128_gethi(ov);
1943            env->regs[r3 + 1] = int128_getlo(ov);
1944        }
1945        break;
1946
1947    default:
1948        g_assert_not_reached();
1949    }
1950
1951    /* Store only if the comparison succeeded.  Note that above we use a pair
1952       of 64-bit big-endian loads, so for sc < 3 we must extract the value
1953       from the most-significant bits of svh.  */
1954    if (cc == 0) {
1955        switch (sc) {
1956        case 0:
1957            cpu_stb_data_ra(env, a2, svh >> 56, ra);
1958            break;
1959        case 1:
1960            cpu_stw_data_ra(env, a2, svh >> 48, ra);
1961            break;
1962        case 2:
1963            cpu_stl_data_ra(env, a2, svh >> 32, ra);
1964            break;
1965        case 3:
1966            cpu_stq_data_ra(env, a2, svh, ra);
1967            break;
1968        case 4:
1969            if (!parallel) {
1970                cpu_stq_data_ra(env, a2 + 0, svh, ra);
1971                cpu_stq_data_ra(env, a2 + 8, svl, ra);
1972            } else if (HAVE_ATOMIC128) {
1973                MemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
1974                Int128 sv = int128_make128(svl, svh);
1975                cpu_atomic_sto_be_mmu(env, a2, sv, oi, ra);
1976            } else {
1977                /* Note that we asserted !parallel above.  */
1978                g_assert_not_reached();
1979            }
1980            break;
1981        default:
1982            g_assert_not_reached();
1983        }
1984    }
1985
1986    return cc;
1987
1988 spec_exception:
1989    tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1990}
1991
1992uint32_t HELPER(csst)(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t a2)
1993{
1994    return do_csst(env, r3, a1, a2, false);
1995}
1996
1997uint32_t HELPER(csst_parallel)(CPUS390XState *env, uint32_t r3, uint64_t a1,
1998                               uint64_t a2)
1999{
2000    return do_csst(env, r3, a1, a2, true);
2001}
2002
2003#if !defined(CONFIG_USER_ONLY)
2004void HELPER(lctlg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2005{
2006    uintptr_t ra = GETPC();
2007    bool PERchanged = false;
2008    uint64_t src = a2;
2009    uint32_t i;
2010
2011    if (src & 0x7) {
2012        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2013    }
2014
2015    for (i = r1;; i = (i + 1) % 16) {
2016        uint64_t val = cpu_ldq_data_ra(env, src, ra);
2017        if (env->cregs[i] != val && i >= 9 && i <= 11) {
2018            PERchanged = true;
2019        }
2020        env->cregs[i] = val;
2021        HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%" PRIx64 "\n",
2022                   i, src, val);
2023        src += sizeof(uint64_t);
2024
2025        if (i == r3) {
2026            break;
2027        }
2028    }
2029
2030    if (PERchanged && env->psw.mask & PSW_MASK_PER) {
2031        s390_cpu_recompute_watchpoints(env_cpu(env));
2032    }
2033
2034    tlb_flush(env_cpu(env));
2035}
2036
2037void HELPER(lctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2038{
2039    uintptr_t ra = GETPC();
2040    bool PERchanged = false;
2041    uint64_t src = a2;
2042    uint32_t i;
2043
2044    if (src & 0x3) {
2045        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2046    }
2047
2048    for (i = r1;; i = (i + 1) % 16) {
2049        uint32_t val = cpu_ldl_data_ra(env, src, ra);
2050        if ((uint32_t)env->cregs[i] != val && i >= 9 && i <= 11) {
2051            PERchanged = true;
2052        }
2053        env->cregs[i] = deposit64(env->cregs[i], 0, 32, val);
2054        HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%x\n", i, src, val);
2055        src += sizeof(uint32_t);
2056
2057        if (i == r3) {
2058            break;
2059        }
2060    }
2061
2062    if (PERchanged && env->psw.mask & PSW_MASK_PER) {
2063        s390_cpu_recompute_watchpoints(env_cpu(env));
2064    }
2065
2066    tlb_flush(env_cpu(env));
2067}
2068
2069void HELPER(stctg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2070{
2071    uintptr_t ra = GETPC();
2072    uint64_t dest = a2;
2073    uint32_t i;
2074
2075    if (dest & 0x7) {
2076        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2077    }
2078
2079    for (i = r1;; i = (i + 1) % 16) {
2080        cpu_stq_data_ra(env, dest, env->cregs[i], ra);
2081        dest += sizeof(uint64_t);
2082
2083        if (i == r3) {
2084            break;
2085        }
2086    }
2087}
2088
2089void HELPER(stctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2090{
2091    uintptr_t ra = GETPC();
2092    uint64_t dest = a2;
2093    uint32_t i;
2094
2095    if (dest & 0x3) {
2096        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2097    }
2098
2099    for (i = r1;; i = (i + 1) % 16) {
2100        cpu_stl_data_ra(env, dest, env->cregs[i], ra);
2101        dest += sizeof(uint32_t);
2102
2103        if (i == r3) {
2104            break;
2105        }
2106    }
2107}
2108
2109uint32_t HELPER(testblock)(CPUS390XState *env, uint64_t real_addr)
2110{
2111    uintptr_t ra = GETPC();
2112    int i;
2113
2114    real_addr = wrap_address(env, real_addr) & TARGET_PAGE_MASK;
2115
2116    for (i = 0; i < TARGET_PAGE_SIZE; i += 8) {
2117        cpu_stq_mmuidx_ra(env, real_addr + i, 0, MMU_REAL_IDX, ra);
2118    }
2119
2120    return 0;
2121}
2122
2123uint32_t HELPER(tprot)(CPUS390XState *env, uint64_t a1, uint64_t a2)
2124{
2125    S390CPU *cpu = env_archcpu(env);
2126    CPUState *cs = env_cpu(env);
2127
2128    /*
2129     * TODO: we currently don't handle all access protection types
2130     * (including access-list and key-controlled) as well as AR mode.
2131     */
2132    if (!s390_cpu_virt_mem_check_write(cpu, a1, 0, 1)) {
2133        /* Fetching permitted; storing permitted */
2134        return 0;
2135    }
2136
2137    if (env->int_pgm_code == PGM_PROTECTION) {
2138        /* retry if reading is possible */
2139        cs->exception_index = -1;
2140        if (!s390_cpu_virt_mem_check_read(cpu, a1, 0, 1)) {
2141            /* Fetching permitted; storing not permitted */
2142            return 1;
2143        }
2144    }
2145
2146    switch (env->int_pgm_code) {
2147    case PGM_PROTECTION:
2148        /* Fetching not permitted; storing not permitted */
2149        cs->exception_index = -1;
2150        return 2;
2151    case PGM_ADDRESSING:
2152    case PGM_TRANS_SPEC:
2153        /* exceptions forwarded to the guest */
2154        s390_cpu_virt_mem_handle_exc(cpu, GETPC());
2155        return 0;
2156    }
2157
2158    /* Translation not available */
2159    cs->exception_index = -1;
2160    return 3;
2161}
2162
2163/* insert storage key extended */
2164uint64_t HELPER(iske)(CPUS390XState *env, uint64_t r2)
2165{
2166    static S390SKeysState *ss;
2167    static S390SKeysClass *skeyclass;
2168    uint64_t addr = wrap_address(env, r2);
2169    uint8_t key;
2170    int rc;
2171
2172    addr = mmu_real2abs(env, addr);
2173    if (!mmu_absolute_addr_valid(addr, false)) {
2174        tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2175    }
2176
2177    if (unlikely(!ss)) {
2178        ss = s390_get_skeys_device();
2179        skeyclass = S390_SKEYS_GET_CLASS(ss);
2180        if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2181            tlb_flush_all_cpus_synced(env_cpu(env));
2182        }
2183    }
2184
2185    rc = skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2186    if (rc) {
2187        trace_get_skeys_nonzero(rc);
2188        return 0;
2189    }
2190    return key;
2191}
2192
2193/* set storage key extended */
2194void HELPER(sske)(CPUS390XState *env, uint64_t r1, uint64_t r2)
2195{
2196    static S390SKeysState *ss;
2197    static S390SKeysClass *skeyclass;
2198    uint64_t addr = wrap_address(env, r2);
2199    uint8_t key;
2200    int rc;
2201
2202    addr = mmu_real2abs(env, addr);
2203    if (!mmu_absolute_addr_valid(addr, false)) {
2204        tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2205    }
2206
2207    if (unlikely(!ss)) {
2208        ss = s390_get_skeys_device();
2209        skeyclass = S390_SKEYS_GET_CLASS(ss);
2210        if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2211            tlb_flush_all_cpus_synced(env_cpu(env));
2212        }
2213    }
2214
2215    key = r1 & 0xfe;
2216    rc = skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2217    if (rc) {
2218        trace_set_skeys_nonzero(rc);
2219    }
2220   /*
2221    * As we can only flush by virtual address and not all the entries
2222    * that point to a physical address we have to flush the whole TLB.
2223    */
2224    tlb_flush_all_cpus_synced(env_cpu(env));
2225}
2226
2227/* reset reference bit extended */
2228uint32_t HELPER(rrbe)(CPUS390XState *env, uint64_t r2)
2229{
2230    uint64_t addr = wrap_address(env, r2);
2231    static S390SKeysState *ss;
2232    static S390SKeysClass *skeyclass;
2233    uint8_t re, key;
2234    int rc;
2235
2236    addr = mmu_real2abs(env, addr);
2237    if (!mmu_absolute_addr_valid(addr, false)) {
2238        tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2239    }
2240
2241    if (unlikely(!ss)) {
2242        ss = s390_get_skeys_device();
2243        skeyclass = S390_SKEYS_GET_CLASS(ss);
2244        if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2245            tlb_flush_all_cpus_synced(env_cpu(env));
2246        }
2247    }
2248
2249    rc = skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2250    if (rc) {
2251        trace_get_skeys_nonzero(rc);
2252        return 0;
2253    }
2254
2255    re = key & (SK_R | SK_C);
2256    key &= ~SK_R;
2257
2258    rc = skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2259    if (rc) {
2260        trace_set_skeys_nonzero(rc);
2261        return 0;
2262    }
2263   /*
2264    * As we can only flush by virtual address and not all the entries
2265    * that point to a physical address we have to flush the whole TLB.
2266    */
2267    tlb_flush_all_cpus_synced(env_cpu(env));
2268
2269    /*
2270     * cc
2271     *
2272     * 0  Reference bit zero; change bit zero
2273     * 1  Reference bit zero; change bit one
2274     * 2  Reference bit one; change bit zero
2275     * 3  Reference bit one; change bit one
2276     */
2277
2278    return re >> 1;
2279}
2280
2281uint32_t HELPER(mvcs)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2)
2282{
2283    const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2284    S390Access srca, desta;
2285    uintptr_t ra = GETPC();
2286    int cc = 0;
2287
2288    HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2289               __func__, l, a1, a2);
2290
2291    if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2292        psw_as == AS_HOME || psw_as == AS_ACCREG) {
2293        s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2294    }
2295
2296    l = wrap_length32(env, l);
2297    if (l > 256) {
2298        /* max 256 */
2299        l = 256;
2300        cc = 3;
2301    } else if (!l) {
2302        return cc;
2303    }
2304
2305    /* TODO: Access key handling */
2306    srca = access_prepare(env, a2, l, MMU_DATA_LOAD, MMU_PRIMARY_IDX, ra);
2307    desta = access_prepare(env, a1, l, MMU_DATA_STORE, MMU_SECONDARY_IDX, ra);
2308    access_memmove(env, &desta, &srca, ra);
2309    return cc;
2310}
2311
2312uint32_t HELPER(mvcp)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2)
2313{
2314    const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2315    S390Access srca, desta;
2316    uintptr_t ra = GETPC();
2317    int cc = 0;
2318
2319    HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2320               __func__, l, a1, a2);
2321
2322    if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2323        psw_as == AS_HOME || psw_as == AS_ACCREG) {
2324        s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2325    }
2326
2327    l = wrap_length32(env, l);
2328    if (l > 256) {
2329        /* max 256 */
2330        l = 256;
2331        cc = 3;
2332    } else if (!l) {
2333        return cc;
2334    }
2335
2336    /* TODO: Access key handling */
2337    srca = access_prepare(env, a2, l, MMU_DATA_LOAD, MMU_SECONDARY_IDX, ra);
2338    desta = access_prepare(env, a1, l, MMU_DATA_STORE, MMU_PRIMARY_IDX, ra);
2339    access_memmove(env, &desta, &srca, ra);
2340    return cc;
2341}
2342
2343void HELPER(idte)(CPUS390XState *env, uint64_t r1, uint64_t r2, uint32_t m4)
2344{
2345    CPUState *cs = env_cpu(env);
2346    const uintptr_t ra = GETPC();
2347    uint64_t table, entry, raddr;
2348    uint16_t entries, i, index = 0;
2349
2350    if (r2 & 0xff000) {
2351        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2352    }
2353
2354    if (!(r2 & 0x800)) {
2355        /* invalidation-and-clearing operation */
2356        table = r1 & ASCE_ORIGIN;
2357        entries = (r2 & 0x7ff) + 1;
2358
2359        switch (r1 & ASCE_TYPE_MASK) {
2360        case ASCE_TYPE_REGION1:
2361            index = (r2 >> 53) & 0x7ff;
2362            break;
2363        case ASCE_TYPE_REGION2:
2364            index = (r2 >> 42) & 0x7ff;
2365            break;
2366        case ASCE_TYPE_REGION3:
2367            index = (r2 >> 31) & 0x7ff;
2368            break;
2369        case ASCE_TYPE_SEGMENT:
2370            index = (r2 >> 20) & 0x7ff;
2371            break;
2372        }
2373        for (i = 0; i < entries; i++) {
2374            /* addresses are not wrapped in 24/31bit mode but table index is */
2375            raddr = table + ((index + i) & 0x7ff) * sizeof(entry);
2376            entry = cpu_ldq_mmuidx_ra(env, raddr, MMU_REAL_IDX, ra);
2377            if (!(entry & REGION_ENTRY_I)) {
2378                /* we are allowed to not store if already invalid */
2379                entry |= REGION_ENTRY_I;
2380                cpu_stq_mmuidx_ra(env, raddr, entry, MMU_REAL_IDX, ra);
2381            }
2382        }
2383    }
2384
2385    /* We simply flush the complete tlb, therefore we can ignore r3. */
2386    if (m4 & 1) {
2387        tlb_flush(cs);
2388    } else {
2389        tlb_flush_all_cpus_synced(cs);
2390    }
2391}
2392
2393/* invalidate pte */
2394void HELPER(ipte)(CPUS390XState *env, uint64_t pto, uint64_t vaddr,
2395                  uint32_t m4)
2396{
2397    CPUState *cs = env_cpu(env);
2398    const uintptr_t ra = GETPC();
2399    uint64_t page = vaddr & TARGET_PAGE_MASK;
2400    uint64_t pte_addr, pte;
2401
2402    /* Compute the page table entry address */
2403    pte_addr = (pto & SEGMENT_ENTRY_ORIGIN);
2404    pte_addr += VADDR_PAGE_TX(vaddr) * 8;
2405
2406    /* Mark the page table entry as invalid */
2407    pte = cpu_ldq_mmuidx_ra(env, pte_addr, MMU_REAL_IDX, ra);
2408    pte |= PAGE_ENTRY_I;
2409    cpu_stq_mmuidx_ra(env, pte_addr, pte, MMU_REAL_IDX, ra);
2410
2411    /* XXX we exploit the fact that Linux passes the exact virtual
2412       address here - it's not obliged to! */
2413    if (m4 & 1) {
2414        if (vaddr & ~VADDR_PAGE_TX_MASK) {
2415            tlb_flush_page(cs, page);
2416            /* XXX 31-bit hack */
2417            tlb_flush_page(cs, page ^ 0x80000000);
2418        } else {
2419            /* looks like we don't have a valid virtual address */
2420            tlb_flush(cs);
2421        }
2422    } else {
2423        if (vaddr & ~VADDR_PAGE_TX_MASK) {
2424            tlb_flush_page_all_cpus_synced(cs, page);
2425            /* XXX 31-bit hack */
2426            tlb_flush_page_all_cpus_synced(cs, page ^ 0x80000000);
2427        } else {
2428            /* looks like we don't have a valid virtual address */
2429            tlb_flush_all_cpus_synced(cs);
2430        }
2431    }
2432}
2433
2434/* flush local tlb */
2435void HELPER(ptlb)(CPUS390XState *env)
2436{
2437    tlb_flush(env_cpu(env));
2438}
2439
2440/* flush global tlb */
2441void HELPER(purge)(CPUS390XState *env)
2442{
2443    tlb_flush_all_cpus_synced(env_cpu(env));
2444}
2445
2446/* load real address */
2447uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr)
2448{
2449    uint64_t asc = env->psw.mask & PSW_MASK_ASC;
2450    uint64_t ret, tec;
2451    int flags, exc, cc;
2452
2453    /* XXX incomplete - has more corner cases */
2454    if (!(env->psw.mask & PSW_MASK_64) && (addr >> 32)) {
2455        tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, GETPC());
2456    }
2457
2458    exc = mmu_translate(env, addr, MMU_S390_LRA, asc, &ret, &flags, &tec);
2459    if (exc) {
2460        cc = 3;
2461        ret = exc | 0x80000000;
2462    } else {
2463        cc = 0;
2464        ret |= addr & ~TARGET_PAGE_MASK;
2465    }
2466
2467    env->cc_op = cc;
2468    return ret;
2469}
2470#endif
2471
2472/* load pair from quadword */
2473uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr)
2474{
2475    uintptr_t ra = GETPC();
2476    uint64_t hi, lo;
2477
2478    check_alignment(env, addr, 16, ra);
2479    hi = cpu_ldq_data_ra(env, addr + 0, ra);
2480    lo = cpu_ldq_data_ra(env, addr + 8, ra);
2481
2482    env->retxl = lo;
2483    return hi;
2484}
2485
2486uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr)
2487{
2488    uintptr_t ra = GETPC();
2489    uint64_t hi, lo;
2490    int mem_idx;
2491    MemOpIdx oi;
2492    Int128 v;
2493
2494    assert(HAVE_ATOMIC128);
2495
2496    mem_idx = cpu_mmu_index(env, false);
2497    oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
2498    v = cpu_atomic_ldo_be_mmu(env, addr, oi, ra);
2499    hi = int128_gethi(v);
2500    lo = int128_getlo(v);
2501
2502    env->retxl = lo;
2503    return hi;
2504}
2505
2506/* store pair to quadword */
2507void HELPER(stpq)(CPUS390XState *env, uint64_t addr,
2508                  uint64_t low, uint64_t high)
2509{
2510    uintptr_t ra = GETPC();
2511
2512    check_alignment(env, addr, 16, ra);
2513    cpu_stq_data_ra(env, addr + 0, high, ra);
2514    cpu_stq_data_ra(env, addr + 8, low, ra);
2515}
2516
2517void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr,
2518                           uint64_t low, uint64_t high)
2519{
2520    uintptr_t ra = GETPC();
2521    int mem_idx;
2522    MemOpIdx oi;
2523    Int128 v;
2524
2525    assert(HAVE_ATOMIC128);
2526
2527    mem_idx = cpu_mmu_index(env, false);
2528    oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
2529    v = int128_make128(low, high);
2530    cpu_atomic_sto_be_mmu(env, addr, v, oi, ra);
2531}
2532
2533/* Execute instruction.  This instruction executes an insn modified with
2534   the contents of r1.  It does not change the executed instruction in memory;
2535   it does not change the program counter.
2536
2537   Perform this by recording the modified instruction in env->ex_value.
2538   This will be noticed by cpu_get_tb_cpu_state and thus tb translation.
2539*/
2540void HELPER(ex)(CPUS390XState *env, uint32_t ilen, uint64_t r1, uint64_t addr)
2541{
2542    uint64_t insn = cpu_lduw_code(env, addr);
2543    uint8_t opc = insn >> 8;
2544
2545    /* Or in the contents of R1[56:63].  */
2546    insn |= r1 & 0xff;
2547
2548    /* Load the rest of the instruction.  */
2549    insn <<= 48;
2550    switch (get_ilen(opc)) {
2551    case 2:
2552        break;
2553    case 4:
2554        insn |= (uint64_t)cpu_lduw_code(env, addr + 2) << 32;
2555        break;
2556    case 6:
2557        insn |= (uint64_t)(uint32_t)cpu_ldl_code(env, addr + 2) << 16;
2558        break;
2559    default:
2560        g_assert_not_reached();
2561    }
2562
2563    /* The very most common cases can be sped up by avoiding a new TB.  */
2564    if ((opc & 0xf0) == 0xd0) {
2565        typedef uint32_t (*dx_helper)(CPUS390XState *, uint32_t, uint64_t,
2566                                      uint64_t, uintptr_t);
2567        static const dx_helper dx[16] = {
2568            [0x0] = do_helper_trt_bkwd,
2569            [0x2] = do_helper_mvc,
2570            [0x4] = do_helper_nc,
2571            [0x5] = do_helper_clc,
2572            [0x6] = do_helper_oc,
2573            [0x7] = do_helper_xc,
2574            [0xc] = do_helper_tr,
2575            [0xd] = do_helper_trt_fwd,
2576        };
2577        dx_helper helper = dx[opc & 0xf];
2578
2579        if (helper) {
2580            uint32_t l = extract64(insn, 48, 8);
2581            uint32_t b1 = extract64(insn, 44, 4);
2582            uint32_t d1 = extract64(insn, 32, 12);
2583            uint32_t b2 = extract64(insn, 28, 4);
2584            uint32_t d2 = extract64(insn, 16, 12);
2585            uint64_t a1 = wrap_address(env, (b1 ? env->regs[b1] : 0) + d1);
2586            uint64_t a2 = wrap_address(env, (b2 ? env->regs[b2] : 0) + d2);
2587
2588            env->cc_op = helper(env, l, a1, a2, 0);
2589            env->psw.addr += ilen;
2590            return;
2591        }
2592    } else if (opc == 0x0a) {
2593        env->int_svc_code = extract64(insn, 48, 8);
2594        env->int_svc_ilen = ilen;
2595        helper_exception(env, EXCP_SVC);
2596        g_assert_not_reached();
2597    }
2598
2599    /* Record the insn we want to execute as well as the ilen to use
2600       during the execution of the target insn.  This will also ensure
2601       that ex_value is non-zero, which flags that we are in a state
2602       that requires such execution.  */
2603    env->ex_value = insn | ilen;
2604}
2605
2606uint32_t HELPER(mvcos)(CPUS390XState *env, uint64_t dest, uint64_t src,
2607                       uint64_t len)
2608{
2609    const uint8_t psw_key = (env->psw.mask & PSW_MASK_KEY) >> PSW_SHIFT_KEY;
2610    const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2611    const uint64_t r0 = env->regs[0];
2612    const uintptr_t ra = GETPC();
2613    uint8_t dest_key, dest_as, dest_k, dest_a;
2614    uint8_t src_key, src_as, src_k, src_a;
2615    uint64_t val;
2616    int cc = 0;
2617
2618    HELPER_LOG("%s dest %" PRIx64 ", src %" PRIx64 ", len %" PRIx64 "\n",
2619               __func__, dest, src, len);
2620
2621    if (!(env->psw.mask & PSW_MASK_DAT)) {
2622        tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2623    }
2624
2625    /* OAC (operand access control) for the first operand -> dest */
2626    val = (r0 & 0xffff0000ULL) >> 16;
2627    dest_key = (val >> 12) & 0xf;
2628    dest_as = (val >> 6) & 0x3;
2629    dest_k = (val >> 1) & 0x1;
2630    dest_a = val & 0x1;
2631
2632    /* OAC (operand access control) for the second operand -> src */
2633    val = (r0 & 0x0000ffffULL);
2634    src_key = (val >> 12) & 0xf;
2635    src_as = (val >> 6) & 0x3;
2636    src_k = (val >> 1) & 0x1;
2637    src_a = val & 0x1;
2638
2639    if (!dest_k) {
2640        dest_key = psw_key;
2641    }
2642    if (!src_k) {
2643        src_key = psw_key;
2644    }
2645    if (!dest_a) {
2646        dest_as = psw_as;
2647    }
2648    if (!src_a) {
2649        src_as = psw_as;
2650    }
2651
2652    if (dest_a && dest_as == AS_HOME && (env->psw.mask & PSW_MASK_PSTATE)) {
2653        tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2654    }
2655    if (!(env->cregs[0] & CR0_SECONDARY) &&
2656        (dest_as == AS_SECONDARY || src_as == AS_SECONDARY)) {
2657        tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2658    }
2659    if (!psw_key_valid(env, dest_key) || !psw_key_valid(env, src_key)) {
2660        tcg_s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2661    }
2662
2663    len = wrap_length32(env, len);
2664    if (len > 4096) {
2665        cc = 3;
2666        len = 4096;
2667    }
2668
2669    /* FIXME: AR-mode and proper problem state mode (using PSW keys) missing */
2670    if (src_as == AS_ACCREG || dest_as == AS_ACCREG ||
2671        (env->psw.mask & PSW_MASK_PSTATE)) {
2672        qemu_log_mask(LOG_UNIMP, "%s: AR-mode and PSTATE support missing\n",
2673                      __func__);
2674        tcg_s390_program_interrupt(env, PGM_ADDRESSING, ra);
2675    }
2676
2677    /* FIXME: Access using correct keys and AR-mode */
2678    if (len) {
2679        S390Access srca = access_prepare(env, src, len, MMU_DATA_LOAD,
2680                                         mmu_idx_from_as(src_as), ra);
2681        S390Access desta = access_prepare(env, dest, len, MMU_DATA_STORE,
2682                                          mmu_idx_from_as(dest_as), ra);
2683
2684        access_memmove(env, &desta, &srca, ra);
2685    }
2686
2687    return cc;
2688}
2689
2690/* Decode a Unicode character.  A return value < 0 indicates success, storing
2691   the UTF-32 result into OCHAR and the input length into OLEN.  A return
2692   value >= 0 indicates failure, and the CC value to be returned.  */
2693typedef int (*decode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2694                                 uint64_t ilen, bool enh_check, uintptr_t ra,
2695                                 uint32_t *ochar, uint32_t *olen);
2696
2697/* Encode a Unicode character.  A return value < 0 indicates success, storing
2698   the bytes into ADDR and the output length into OLEN.  A return value >= 0
2699   indicates failure, and the CC value to be returned.  */
2700typedef int (*encode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2701                                 uint64_t ilen, uintptr_t ra, uint32_t c,
2702                                 uint32_t *olen);
2703
2704static int decode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2705                       bool enh_check, uintptr_t ra,
2706                       uint32_t *ochar, uint32_t *olen)
2707{
2708    uint8_t s0, s1, s2, s3;
2709    uint32_t c, l;
2710
2711    if (ilen < 1) {
2712        return 0;
2713    }
2714    s0 = cpu_ldub_data_ra(env, addr, ra);
2715    if (s0 <= 0x7f) {
2716        /* one byte character */
2717        l = 1;
2718        c = s0;
2719    } else if (s0 <= (enh_check ? 0xc1 : 0xbf)) {
2720        /* invalid character */
2721        return 2;
2722    } else if (s0 <= 0xdf) {
2723        /* two byte character */
2724        l = 2;
2725        if (ilen < 2) {
2726            return 0;
2727        }
2728        s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2729        c = s0 & 0x1f;
2730        c = (c << 6) | (s1 & 0x3f);
2731        if (enh_check && (s1 & 0xc0) != 0x80) {
2732            return 2;
2733        }
2734    } else if (s0 <= 0xef) {
2735        /* three byte character */
2736        l = 3;
2737        if (ilen < 3) {
2738            return 0;
2739        }
2740        s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2741        s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2742        c = s0 & 0x0f;
2743        c = (c << 6) | (s1 & 0x3f);
2744        c = (c << 6) | (s2 & 0x3f);
2745        /* Fold the byte-by-byte range descriptions in the PoO into
2746           tests against the complete value.  It disallows encodings
2747           that could be smaller, and the UTF-16 surrogates.  */
2748        if (enh_check
2749            && ((s1 & 0xc0) != 0x80
2750                || (s2 & 0xc0) != 0x80
2751                || c < 0x1000
2752                || (c >= 0xd800 && c <= 0xdfff))) {
2753            return 2;
2754        }
2755    } else if (s0 <= (enh_check ? 0xf4 : 0xf7)) {
2756        /* four byte character */
2757        l = 4;
2758        if (ilen < 4) {
2759            return 0;
2760        }
2761        s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2762        s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2763        s3 = cpu_ldub_data_ra(env, addr + 3, ra);
2764        c = s0 & 0x07;
2765        c = (c << 6) | (s1 & 0x3f);
2766        c = (c << 6) | (s2 & 0x3f);
2767        c = (c << 6) | (s3 & 0x3f);
2768        /* See above.  */
2769        if (enh_check
2770            && ((s1 & 0xc0) != 0x80
2771                || (s2 & 0xc0) != 0x80
2772                || (s3 & 0xc0) != 0x80
2773                || c < 0x010000
2774                || c > 0x10ffff)) {
2775            return 2;
2776        }
2777    } else {
2778        /* invalid character */
2779        return 2;
2780    }
2781
2782    *ochar = c;
2783    *olen = l;
2784    return -1;
2785}
2786
2787static int decode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2788                        bool enh_check, uintptr_t ra,
2789                        uint32_t *ochar, uint32_t *olen)
2790{
2791    uint16_t s0, s1;
2792    uint32_t c, l;
2793
2794    if (ilen < 2) {
2795        return 0;
2796    }
2797    s0 = cpu_lduw_data_ra(env, addr, ra);
2798    if ((s0 & 0xfc00) != 0xd800) {
2799        /* one word character */
2800        l = 2;
2801        c = s0;
2802    } else {
2803        /* two word character */
2804        l = 4;
2805        if (ilen < 4) {
2806            return 0;
2807        }
2808        s1 = cpu_lduw_data_ra(env, addr + 2, ra);
2809        c = extract32(s0, 6, 4) + 1;
2810        c = (c << 6) | (s0 & 0x3f);
2811        c = (c << 10) | (s1 & 0x3ff);
2812        if (enh_check && (s1 & 0xfc00) != 0xdc00) {
2813            /* invalid surrogate character */
2814            return 2;
2815        }
2816    }
2817
2818    *ochar = c;
2819    *olen = l;
2820    return -1;
2821}
2822
2823static int decode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2824                        bool enh_check, uintptr_t ra,
2825                        uint32_t *ochar, uint32_t *olen)
2826{
2827    uint32_t c;
2828
2829    if (ilen < 4) {
2830        return 0;
2831    }
2832    c = cpu_ldl_data_ra(env, addr, ra);
2833    if ((c >= 0xd800 && c <= 0xdbff) || c > 0x10ffff) {
2834        /* invalid unicode character */
2835        return 2;
2836    }
2837
2838    *ochar = c;
2839    *olen = 4;
2840    return -1;
2841}
2842
2843static int encode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2844                       uintptr_t ra, uint32_t c, uint32_t *olen)
2845{
2846    uint8_t d[4];
2847    uint32_t l, i;
2848
2849    if (c <= 0x7f) {
2850        /* one byte character */
2851        l = 1;
2852        d[0] = c;
2853    } else if (c <= 0x7ff) {
2854        /* two byte character */
2855        l = 2;
2856        d[1] = 0x80 | extract32(c, 0, 6);
2857        d[0] = 0xc0 | extract32(c, 6, 5);
2858    } else if (c <= 0xffff) {
2859        /* three byte character */
2860        l = 3;
2861        d[2] = 0x80 | extract32(c, 0, 6);
2862        d[1] = 0x80 | extract32(c, 6, 6);
2863        d[0] = 0xe0 | extract32(c, 12, 4);
2864    } else {
2865        /* four byte character */
2866        l = 4;
2867        d[3] = 0x80 | extract32(c, 0, 6);
2868        d[2] = 0x80 | extract32(c, 6, 6);
2869        d[1] = 0x80 | extract32(c, 12, 6);
2870        d[0] = 0xf0 | extract32(c, 18, 3);
2871    }
2872
2873    if (ilen < l) {
2874        return 1;
2875    }
2876    for (i = 0; i < l; ++i) {
2877        cpu_stb_data_ra(env, addr + i, d[i], ra);
2878    }
2879
2880    *olen = l;
2881    return -1;
2882}
2883
2884static int encode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2885                        uintptr_t ra, uint32_t c, uint32_t *olen)
2886{
2887    uint16_t d0, d1;
2888
2889    if (c <= 0xffff) {
2890        /* one word character */
2891        if (ilen < 2) {
2892            return 1;
2893        }
2894        cpu_stw_data_ra(env, addr, c, ra);
2895        *olen = 2;
2896    } else {
2897        /* two word character */
2898        if (ilen < 4) {
2899            return 1;
2900        }
2901        d1 = 0xdc00 | extract32(c, 0, 10);
2902        d0 = 0xd800 | extract32(c, 10, 6);
2903        d0 = deposit32(d0, 6, 4, extract32(c, 16, 5) - 1);
2904        cpu_stw_data_ra(env, addr + 0, d0, ra);
2905        cpu_stw_data_ra(env, addr + 2, d1, ra);
2906        *olen = 4;
2907    }
2908
2909    return -1;
2910}
2911
2912static int encode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2913                        uintptr_t ra, uint32_t c, uint32_t *olen)
2914{
2915    if (ilen < 4) {
2916        return 1;
2917    }
2918    cpu_stl_data_ra(env, addr, c, ra);
2919    *olen = 4;
2920    return -1;
2921}
2922
2923static inline uint32_t convert_unicode(CPUS390XState *env, uint32_t r1,
2924                                       uint32_t r2, uint32_t m3, uintptr_t ra,
2925                                       decode_unicode_fn decode,
2926                                       encode_unicode_fn encode)
2927{
2928    uint64_t dst = get_address(env, r1);
2929    uint64_t dlen = get_length(env, r1 + 1);
2930    uint64_t src = get_address(env, r2);
2931    uint64_t slen = get_length(env, r2 + 1);
2932    bool enh_check = m3 & 1;
2933    int cc, i;
2934
2935    /* Lest we fail to service interrupts in a timely manner, limit the
2936       amount of work we're willing to do.  For now, let's cap at 256.  */
2937    for (i = 0; i < 256; ++i) {
2938        uint32_t c, ilen, olen;
2939
2940        cc = decode(env, src, slen, enh_check, ra, &c, &ilen);
2941        if (unlikely(cc >= 0)) {
2942            break;
2943        }
2944        cc = encode(env, dst, dlen, ra, c, &olen);
2945        if (unlikely(cc >= 0)) {
2946            break;
2947        }
2948
2949        src += ilen;
2950        slen -= ilen;
2951        dst += olen;
2952        dlen -= olen;
2953        cc = 3;
2954    }
2955
2956    set_address(env, r1, dst);
2957    set_length(env, r1 + 1, dlen);
2958    set_address(env, r2, src);
2959    set_length(env, r2 + 1, slen);
2960
2961    return cc;
2962}
2963
2964uint32_t HELPER(cu12)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2965{
2966    return convert_unicode(env, r1, r2, m3, GETPC(),
2967                           decode_utf8, encode_utf16);
2968}
2969
2970uint32_t HELPER(cu14)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2971{
2972    return convert_unicode(env, r1, r2, m3, GETPC(),
2973                           decode_utf8, encode_utf32);
2974}
2975
2976uint32_t HELPER(cu21)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2977{
2978    return convert_unicode(env, r1, r2, m3, GETPC(),
2979                           decode_utf16, encode_utf8);
2980}
2981
2982uint32_t HELPER(cu24)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2983{
2984    return convert_unicode(env, r1, r2, m3, GETPC(),
2985                           decode_utf16, encode_utf32);
2986}
2987
2988uint32_t HELPER(cu41)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2989{
2990    return convert_unicode(env, r1, r2, m3, GETPC(),
2991                           decode_utf32, encode_utf8);
2992}
2993
2994uint32_t HELPER(cu42)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2995{
2996    return convert_unicode(env, r1, r2, m3, GETPC(),
2997                           decode_utf32, encode_utf16);
2998}
2999
3000void probe_write_access(CPUS390XState *env, uint64_t addr, uint64_t len,
3001                        uintptr_t ra)
3002{
3003    /* test the actual access, not just any access to the page due to LAP */
3004    while (len) {
3005        const uint64_t pagelen = -(addr | TARGET_PAGE_MASK);
3006        const uint64_t curlen = MIN(pagelen, len);
3007
3008        probe_write(env, addr, curlen, cpu_mmu_index(env, false), ra);
3009        addr = wrap_address(env, addr + curlen);
3010        len -= curlen;
3011    }
3012}
3013
3014void HELPER(probe_write_access)(CPUS390XState *env, uint64_t addr, uint64_t len)
3015{
3016    probe_write_access(env, addr, len, GETPC());
3017}
3018