qemu/target/s390x/tcg/mem_helper.c
<<
>>
Prefs
   1/*
   2 *  S/390 memory access helper routines
   3 *
   4 *  Copyright (c) 2009 Ulrich Hecht
   5 *  Copyright (c) 2009 Alexander Graf
   6 *
   7 * This library is free software; you can redistribute it and/or
   8 * modify it under the terms of the GNU Lesser General Public
   9 * License as published by the Free Software Foundation; either
  10 * version 2.1 of the License, or (at your option) any later version.
  11 *
  12 * This library is distributed in the hope that it will be useful,
  13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 * Lesser General Public License for more details.
  16 *
  17 * You should have received a copy of the GNU Lesser General Public
  18 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  19 */
  20
  21#include "qemu/osdep.h"
  22#include "qemu/log.h"
  23#include "cpu.h"
  24#include "s390x-internal.h"
  25#include "tcg_s390x.h"
  26#include "exec/helper-proto.h"
  27#include "exec/exec-all.h"
  28#include "exec/cpu_ldst.h"
  29#include "qemu/int128.h"
  30#include "qemu/atomic128.h"
  31#include "trace.h"
  32
  33#if !defined(CONFIG_USER_ONLY)
  34#include "hw/s390x/storage-keys.h"
  35#include "hw/boards.h"
  36#endif
  37
  38/*****************************************************************************/
  39/* Softmmu support */
  40
  41/* #define DEBUG_HELPER */
  42#ifdef DEBUG_HELPER
  43#define HELPER_LOG(x...) qemu_log(x)
  44#else
  45#define HELPER_LOG(x...)
  46#endif
  47
  48static inline bool psw_key_valid(CPUS390XState *env, uint8_t psw_key)
  49{
  50    uint16_t pkm = env->cregs[3] >> 16;
  51
  52    if (env->psw.mask & PSW_MASK_PSTATE) {
  53        /* PSW key has range 0..15, it is valid if the bit is 1 in the PKM */
  54        return pkm & (0x80 >> psw_key);
  55    }
  56    return true;
  57}
  58
  59static bool is_destructive_overlap(CPUS390XState *env, uint64_t dest,
  60                                   uint64_t src, uint32_t len)
  61{
  62    if (!len || src == dest) {
  63        return false;
  64    }
  65    /* Take care of wrapping at the end of address space. */
  66    if (unlikely(wrap_address(env, src + len - 1) < src)) {
  67        return dest > src || dest <= wrap_address(env, src + len - 1);
  68    }
  69    return dest > src && dest <= src + len - 1;
  70}
  71
  72/* Trigger a SPECIFICATION exception if an address or a length is not
  73   naturally aligned.  */
  74static inline void check_alignment(CPUS390XState *env, uint64_t v,
  75                                   int wordsize, uintptr_t ra)
  76{
  77    if (v % wordsize) {
  78        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
  79    }
  80}
  81
  82/* Load a value from memory according to its size.  */
  83static inline uint64_t cpu_ldusize_data_ra(CPUS390XState *env, uint64_t addr,
  84                                           int wordsize, uintptr_t ra)
  85{
  86    switch (wordsize) {
  87    case 1:
  88        return cpu_ldub_data_ra(env, addr, ra);
  89    case 2:
  90        return cpu_lduw_data_ra(env, addr, ra);
  91    default:
  92        abort();
  93    }
  94}
  95
  96/* Store a to memory according to its size.  */
  97static inline void cpu_stsize_data_ra(CPUS390XState *env, uint64_t addr,
  98                                      uint64_t value, int wordsize,
  99                                      uintptr_t ra)
 100{
 101    switch (wordsize) {
 102    case 1:
 103        cpu_stb_data_ra(env, addr, value, ra);
 104        break;
 105    case 2:
 106        cpu_stw_data_ra(env, addr, value, ra);
 107        break;
 108    default:
 109        abort();
 110    }
 111}
 112
 113/* An access covers at most 4096 bytes and therefore at most two pages. */
 114typedef struct S390Access {
 115    target_ulong vaddr1;
 116    target_ulong vaddr2;
 117    char *haddr1;
 118    char *haddr2;
 119    uint16_t size1;
 120    uint16_t size2;
 121    /*
 122     * If we can't access the host page directly, we'll have to do I/O access
 123     * via ld/st helpers. These are internal details, so we store the
 124     * mmu idx to do the access here instead of passing it around in the
 125     * helpers. Maybe, one day we can get rid of ld/st access - once we can
 126     * handle TLB_NOTDIRTY differently. We don't expect these special accesses
 127     * to trigger exceptions - only if we would have TLB_NOTDIRTY on LAP
 128     * pages, we might trigger a new MMU translation - very unlikely that
 129     * the mapping changes in between and we would trigger a fault.
 130     */
 131    int mmu_idx;
 132} S390Access;
 133
 134/*
 135 * With nonfault=1, return the PGM_ exception that would have been injected
 136 * into the guest; return 0 if no exception was detected.
 137 *
 138 * For !CONFIG_USER_ONLY, the TEC is stored stored to env->tlb_fill_tec.
 139 * For CONFIG_USER_ONLY, the faulting address is stored to env->__excp_addr.
 140 */
 141static int s390_probe_access(CPUArchState *env, target_ulong addr, int size,
 142                             MMUAccessType access_type, int mmu_idx,
 143                             bool nonfault, void **phost, uintptr_t ra)
 144{
 145#if defined(CONFIG_USER_ONLY)
 146    return probe_access_flags(env, addr, access_type, mmu_idx,
 147                              nonfault, phost, ra);
 148#else
 149    int flags;
 150
 151    env->tlb_fill_exc = 0;
 152    flags = probe_access_flags(env, addr, access_type, mmu_idx, nonfault, phost,
 153                               ra);
 154    if (env->tlb_fill_exc) {
 155        return env->tlb_fill_exc;
 156    }
 157
 158    if (unlikely(flags & TLB_WATCHPOINT)) {
 159        /* S390 does not presently use transaction attributes. */
 160        cpu_check_watchpoint(env_cpu(env), addr, size,
 161                             MEMTXATTRS_UNSPECIFIED,
 162                             (access_type == MMU_DATA_STORE
 163                              ? BP_MEM_WRITE : BP_MEM_READ), ra);
 164    }
 165    return 0;
 166#endif
 167}
 168
 169static int access_prepare_nf(S390Access *access, CPUS390XState *env,
 170                             bool nonfault, vaddr vaddr1, int size,
 171                             MMUAccessType access_type,
 172                             int mmu_idx, uintptr_t ra)
 173{
 174    void *haddr1, *haddr2 = NULL;
 175    int size1, size2, exc;
 176    vaddr vaddr2 = 0;
 177
 178    assert(size > 0 && size <= 4096);
 179
 180    size1 = MIN(size, -(vaddr1 | TARGET_PAGE_MASK)),
 181    size2 = size - size1;
 182
 183    exc = s390_probe_access(env, vaddr1, size1, access_type, mmu_idx, nonfault,
 184                            &haddr1, ra);
 185    if (exc) {
 186        return exc;
 187    }
 188    if (unlikely(size2)) {
 189        /* The access crosses page boundaries. */
 190        vaddr2 = wrap_address(env, vaddr1 + size1);
 191        exc = s390_probe_access(env, vaddr2, size2, access_type, mmu_idx,
 192                                nonfault, &haddr2, ra);
 193        if (exc) {
 194            return exc;
 195        }
 196    }
 197
 198    *access = (S390Access) {
 199        .vaddr1 = vaddr1,
 200        .vaddr2 = vaddr2,
 201        .haddr1 = haddr1,
 202        .haddr2 = haddr2,
 203        .size1 = size1,
 204        .size2 = size2,
 205        .mmu_idx = mmu_idx
 206    };
 207    return 0;
 208}
 209
 210static S390Access access_prepare(CPUS390XState *env, vaddr vaddr, int size,
 211                                 MMUAccessType access_type, int mmu_idx,
 212                                 uintptr_t ra)
 213{
 214    S390Access ret;
 215    int exc = access_prepare_nf(&ret, env, false, vaddr, size,
 216                                access_type, mmu_idx, ra);
 217    assert(!exc);
 218    return ret;
 219}
 220
 221/* Helper to handle memset on a single page. */
 222static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr,
 223                             uint8_t byte, uint16_t size, int mmu_idx,
 224                             uintptr_t ra)
 225{
 226#ifdef CONFIG_USER_ONLY
 227    g_assert(haddr);
 228    memset(haddr, byte, size);
 229#else
 230    MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
 231    int i;
 232
 233    if (likely(haddr)) {
 234        memset(haddr, byte, size);
 235    } else {
 236        /*
 237         * Do a single access and test if we can then get access to the
 238         * page. This is especially relevant to speed up TLB_NOTDIRTY.
 239         */
 240        g_assert(size > 0);
 241        cpu_stb_mmu(env, vaddr, byte, oi, ra);
 242        haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
 243        if (likely(haddr)) {
 244            memset(haddr + 1, byte, size - 1);
 245        } else {
 246            for (i = 1; i < size; i++) {
 247                cpu_stb_mmu(env, vaddr + i, byte, oi, ra);
 248            }
 249        }
 250    }
 251#endif
 252}
 253
 254static void access_memset(CPUS390XState *env, S390Access *desta,
 255                          uint8_t byte, uintptr_t ra)
 256{
 257
 258    do_access_memset(env, desta->vaddr1, desta->haddr1, byte, desta->size1,
 259                     desta->mmu_idx, ra);
 260    if (likely(!desta->size2)) {
 261        return;
 262    }
 263    do_access_memset(env, desta->vaddr2, desta->haddr2, byte, desta->size2,
 264                     desta->mmu_idx, ra);
 265}
 266
 267static uint8_t do_access_get_byte(CPUS390XState *env, vaddr vaddr, char **haddr,
 268                                  int offset, int mmu_idx, uintptr_t ra)
 269{
 270#ifdef CONFIG_USER_ONLY
 271    return ldub_p(*haddr + offset);
 272#else
 273    MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
 274    uint8_t byte;
 275
 276    if (likely(*haddr)) {
 277        return ldub_p(*haddr + offset);
 278    }
 279    /*
 280     * Do a single access and test if we can then get access to the
 281     * page. This is especially relevant to speed up TLB_NOTDIRTY.
 282     */
 283    byte = cpu_ldb_mmu(env, vaddr + offset, oi, ra);
 284    *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_LOAD, mmu_idx);
 285    return byte;
 286#endif
 287}
 288
 289static uint8_t access_get_byte(CPUS390XState *env, S390Access *access,
 290                               int offset, uintptr_t ra)
 291{
 292    if (offset < access->size1) {
 293        return do_access_get_byte(env, access->vaddr1, &access->haddr1,
 294                                  offset, access->mmu_idx, ra);
 295    }
 296    return do_access_get_byte(env, access->vaddr2, &access->haddr2,
 297                              offset - access->size1, access->mmu_idx, ra);
 298}
 299
 300static void do_access_set_byte(CPUS390XState *env, vaddr vaddr, char **haddr,
 301                               int offset, uint8_t byte, int mmu_idx,
 302                               uintptr_t ra)
 303{
 304#ifdef CONFIG_USER_ONLY
 305    stb_p(*haddr + offset, byte);
 306#else
 307    MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
 308
 309    if (likely(*haddr)) {
 310        stb_p(*haddr + offset, byte);
 311        return;
 312    }
 313    /*
 314     * Do a single access and test if we can then get access to the
 315     * page. This is especially relevant to speed up TLB_NOTDIRTY.
 316     */
 317    cpu_stb_mmu(env, vaddr + offset, byte, oi, ra);
 318    *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
 319#endif
 320}
 321
 322static void access_set_byte(CPUS390XState *env, S390Access *access,
 323                            int offset, uint8_t byte, uintptr_t ra)
 324{
 325    if (offset < access->size1) {
 326        do_access_set_byte(env, access->vaddr1, &access->haddr1, offset, byte,
 327                           access->mmu_idx, ra);
 328    } else {
 329        do_access_set_byte(env, access->vaddr2, &access->haddr2,
 330                           offset - access->size1, byte, access->mmu_idx, ra);
 331    }
 332}
 333
 334/*
 335 * Move data with the same semantics as memmove() in case ranges don't overlap
 336 * or src > dest. Undefined behavior on destructive overlaps.
 337 */
 338static void access_memmove(CPUS390XState *env, S390Access *desta,
 339                           S390Access *srca, uintptr_t ra)
 340{
 341    int diff;
 342
 343    g_assert(desta->size1 + desta->size2 == srca->size1 + srca->size2);
 344
 345    /* Fallback to slow access in case we don't have access to all host pages */
 346    if (unlikely(!desta->haddr1 || (desta->size2 && !desta->haddr2) ||
 347                 !srca->haddr1 || (srca->size2 && !srca->haddr2))) {
 348        int i;
 349
 350        for (i = 0; i < desta->size1 + desta->size2; i++) {
 351            uint8_t byte = access_get_byte(env, srca, i, ra);
 352
 353            access_set_byte(env, desta, i, byte, ra);
 354        }
 355        return;
 356    }
 357
 358    if (srca->size1 == desta->size1) {
 359        memmove(desta->haddr1, srca->haddr1, srca->size1);
 360        if (unlikely(srca->size2)) {
 361            memmove(desta->haddr2, srca->haddr2, srca->size2);
 362        }
 363    } else if (srca->size1 < desta->size1) {
 364        diff = desta->size1 - srca->size1;
 365        memmove(desta->haddr1, srca->haddr1, srca->size1);
 366        memmove(desta->haddr1 + srca->size1, srca->haddr2, diff);
 367        if (likely(desta->size2)) {
 368            memmove(desta->haddr2, srca->haddr2 + diff, desta->size2);
 369        }
 370    } else {
 371        diff = srca->size1 - desta->size1;
 372        memmove(desta->haddr1, srca->haddr1, desta->size1);
 373        memmove(desta->haddr2, srca->haddr1 + desta->size1, diff);
 374        if (likely(srca->size2)) {
 375            memmove(desta->haddr2 + diff, srca->haddr2, srca->size2);
 376        }
 377    }
 378}
 379
 380static int mmu_idx_from_as(uint8_t as)
 381{
 382    switch (as) {
 383    case AS_PRIMARY:
 384        return MMU_PRIMARY_IDX;
 385    case AS_SECONDARY:
 386        return MMU_SECONDARY_IDX;
 387    case AS_HOME:
 388        return MMU_HOME_IDX;
 389    default:
 390        /* FIXME AS_ACCREG */
 391        g_assert_not_reached();
 392    }
 393}
 394
 395/* and on array */
 396static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest,
 397                             uint64_t src, uintptr_t ra)
 398{
 399    const int mmu_idx = cpu_mmu_index(env, false);
 400    S390Access srca1, srca2, desta;
 401    uint32_t i;
 402    uint8_t c = 0;
 403
 404    HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
 405               __func__, l, dest, src);
 406
 407    /* NC always processes one more byte than specified - maximum is 256 */
 408    l++;
 409
 410    srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 411    srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
 412    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 413    for (i = 0; i < l; i++) {
 414        const uint8_t x = access_get_byte(env, &srca1, i, ra) &
 415                          access_get_byte(env, &srca2, i, ra);
 416
 417        c |= x;
 418        access_set_byte(env, &desta, i, x, ra);
 419    }
 420    return c != 0;
 421}
 422
 423uint32_t HELPER(nc)(CPUS390XState *env, uint32_t l, uint64_t dest,
 424                    uint64_t src)
 425{
 426    return do_helper_nc(env, l, dest, src, GETPC());
 427}
 428
 429/* xor on array */
 430static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest,
 431                             uint64_t src, uintptr_t ra)
 432{
 433    const int mmu_idx = cpu_mmu_index(env, false);
 434    S390Access srca1, srca2, desta;
 435    uint32_t i;
 436    uint8_t c = 0;
 437
 438    HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
 439               __func__, l, dest, src);
 440
 441    /* XC always processes one more byte than specified - maximum is 256 */
 442    l++;
 443
 444    srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 445    srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
 446    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 447
 448    /* xor with itself is the same as memset(0) */
 449    if (src == dest) {
 450        access_memset(env, &desta, 0, ra);
 451        return 0;
 452    }
 453
 454    for (i = 0; i < l; i++) {
 455        const uint8_t x = access_get_byte(env, &srca1, i, ra) ^
 456                          access_get_byte(env, &srca2, i, ra);
 457
 458        c |= x;
 459        access_set_byte(env, &desta, i, x, ra);
 460    }
 461    return c != 0;
 462}
 463
 464uint32_t HELPER(xc)(CPUS390XState *env, uint32_t l, uint64_t dest,
 465                    uint64_t src)
 466{
 467    return do_helper_xc(env, l, dest, src, GETPC());
 468}
 469
 470/* or on array */
 471static uint32_t do_helper_oc(CPUS390XState *env, uint32_t l, uint64_t dest,
 472                             uint64_t src, uintptr_t ra)
 473{
 474    const int mmu_idx = cpu_mmu_index(env, false);
 475    S390Access srca1, srca2, desta;
 476    uint32_t i;
 477    uint8_t c = 0;
 478
 479    HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
 480               __func__, l, dest, src);
 481
 482    /* OC always processes one more byte than specified - maximum is 256 */
 483    l++;
 484
 485    srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 486    srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
 487    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 488    for (i = 0; i < l; i++) {
 489        const uint8_t x = access_get_byte(env, &srca1, i, ra) |
 490                          access_get_byte(env, &srca2, i, ra);
 491
 492        c |= x;
 493        access_set_byte(env, &desta, i, x, ra);
 494    }
 495    return c != 0;
 496}
 497
 498uint32_t HELPER(oc)(CPUS390XState *env, uint32_t l, uint64_t dest,
 499                    uint64_t src)
 500{
 501    return do_helper_oc(env, l, dest, src, GETPC());
 502}
 503
 504/* memmove */
 505static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
 506                              uint64_t src, uintptr_t ra)
 507{
 508    const int mmu_idx = cpu_mmu_index(env, false);
 509    S390Access srca, desta;
 510    uint32_t i;
 511
 512    HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
 513               __func__, l, dest, src);
 514
 515    /* MVC always copies one more byte than specified - maximum is 256 */
 516    l++;
 517
 518    srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 519    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 520
 521    /*
 522     * "When the operands overlap, the result is obtained as if the operands
 523     * were processed one byte at a time". Only non-destructive overlaps
 524     * behave like memmove().
 525     */
 526    if (dest == src + 1) {
 527        access_memset(env, &desta, access_get_byte(env, &srca, 0, ra), ra);
 528    } else if (!is_destructive_overlap(env, dest, src, l)) {
 529        access_memmove(env, &desta, &srca, ra);
 530    } else {
 531        for (i = 0; i < l; i++) {
 532            uint8_t byte = access_get_byte(env, &srca, i, ra);
 533
 534            access_set_byte(env, &desta, i, byte, ra);
 535        }
 536    }
 537
 538    return env->cc_op;
 539}
 540
 541void HELPER(mvc)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 542{
 543    do_helper_mvc(env, l, dest, src, GETPC());
 544}
 545
 546/* move right to left */
 547void HELPER(mvcrl)(CPUS390XState *env, uint64_t l, uint64_t dest, uint64_t src)
 548{
 549    const int mmu_idx = cpu_mmu_index(env, false);
 550    const uint64_t ra = GETPC();
 551    S390Access srca, desta;
 552    int32_t i;
 553
 554    /* MVCRL always copies one more byte than specified - maximum is 256 */
 555    l++;
 556
 557    srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 558    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 559
 560    for (i = l - 1; i >= 0; i--) {
 561        uint8_t byte = access_get_byte(env, &srca, i, ra);
 562        access_set_byte(env, &desta, i, byte, ra);
 563    }
 564}
 565
 566/* move inverse  */
 567void HELPER(mvcin)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 568{
 569    const int mmu_idx = cpu_mmu_index(env, false);
 570    S390Access srca, desta;
 571    uintptr_t ra = GETPC();
 572    int i;
 573
 574    /* MVCIN always copies one more byte than specified - maximum is 256 */
 575    l++;
 576
 577    src = wrap_address(env, src - l + 1);
 578    srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 579    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 580    for (i = 0; i < l; i++) {
 581        const uint8_t x = access_get_byte(env, &srca, l - i - 1, ra);
 582
 583        access_set_byte(env, &desta, i, x, ra);
 584    }
 585}
 586
 587/* move numerics  */
 588void HELPER(mvn)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 589{
 590    const int mmu_idx = cpu_mmu_index(env, false);
 591    S390Access srca1, srca2, desta;
 592    uintptr_t ra = GETPC();
 593    int i;
 594
 595    /* MVN always copies one more byte than specified - maximum is 256 */
 596    l++;
 597
 598    srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 599    srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
 600    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 601    for (i = 0; i < l; i++) {
 602        const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0x0f) |
 603                          (access_get_byte(env, &srca2, i, ra) & 0xf0);
 604
 605        access_set_byte(env, &desta, i, x, ra);
 606    }
 607}
 608
 609/* move with offset  */
 610void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 611{
 612    const int mmu_idx = cpu_mmu_index(env, false);
 613    /* MVO always processes one more byte than specified - maximum is 16 */
 614    const int len_dest = (l >> 4) + 1;
 615    const int len_src = (l & 0xf) + 1;
 616    uintptr_t ra = GETPC();
 617    uint8_t byte_dest, byte_src;
 618    S390Access srca, desta;
 619    int i, j;
 620
 621    srca = access_prepare(env, src, len_src, MMU_DATA_LOAD, mmu_idx, ra);
 622    desta = access_prepare(env, dest, len_dest, MMU_DATA_STORE, mmu_idx, ra);
 623
 624    /* Handle rightmost byte */
 625    byte_dest = cpu_ldub_data_ra(env, dest + len_dest - 1, ra);
 626    byte_src = access_get_byte(env, &srca, len_src - 1, ra);
 627    byte_dest = (byte_dest & 0x0f) | (byte_src << 4);
 628    access_set_byte(env, &desta, len_dest - 1, byte_dest, ra);
 629
 630    /* Process remaining bytes from right to left */
 631    for (i = len_dest - 2, j = len_src - 2; i >= 0; i--, j--) {
 632        byte_dest = byte_src >> 4;
 633        if (j >= 0) {
 634            byte_src = access_get_byte(env, &srca, j, ra);
 635        } else {
 636            byte_src = 0;
 637        }
 638        byte_dest |= byte_src << 4;
 639        access_set_byte(env, &desta, i, byte_dest, ra);
 640    }
 641}
 642
 643/* move zones  */
 644void HELPER(mvz)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 645{
 646    const int mmu_idx = cpu_mmu_index(env, false);
 647    S390Access srca1, srca2, desta;
 648    uintptr_t ra = GETPC();
 649    int i;
 650
 651    /* MVZ always copies one more byte than specified - maximum is 256 */
 652    l++;
 653
 654    srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 655    srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
 656    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 657    for (i = 0; i < l; i++) {
 658        const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0xf0) |
 659                          (access_get_byte(env, &srca2, i, ra) & 0x0f);
 660
 661        access_set_byte(env, &desta, i, x, ra);
 662    }
 663}
 664
 665/* compare unsigned byte arrays */
 666static uint32_t do_helper_clc(CPUS390XState *env, uint32_t l, uint64_t s1,
 667                              uint64_t s2, uintptr_t ra)
 668{
 669    uint32_t i;
 670    uint32_t cc = 0;
 671
 672    HELPER_LOG("%s l %d s1 %" PRIx64 " s2 %" PRIx64 "\n",
 673               __func__, l, s1, s2);
 674
 675    for (i = 0; i <= l; i++) {
 676        uint8_t x = cpu_ldub_data_ra(env, s1 + i, ra);
 677        uint8_t y = cpu_ldub_data_ra(env, s2 + i, ra);
 678        HELPER_LOG("%02x (%c)/%02x (%c) ", x, x, y, y);
 679        if (x < y) {
 680            cc = 1;
 681            break;
 682        } else if (x > y) {
 683            cc = 2;
 684            break;
 685        }
 686    }
 687
 688    HELPER_LOG("\n");
 689    return cc;
 690}
 691
 692uint32_t HELPER(clc)(CPUS390XState *env, uint32_t l, uint64_t s1, uint64_t s2)
 693{
 694    return do_helper_clc(env, l, s1, s2, GETPC());
 695}
 696
 697/* compare logical under mask */
 698uint32_t HELPER(clm)(CPUS390XState *env, uint32_t r1, uint32_t mask,
 699                     uint64_t addr)
 700{
 701    uintptr_t ra = GETPC();
 702    uint32_t cc = 0;
 703
 704    HELPER_LOG("%s: r1 0x%x mask 0x%x addr 0x%" PRIx64 "\n", __func__, r1,
 705               mask, addr);
 706
 707    while (mask) {
 708        if (mask & 8) {
 709            uint8_t d = cpu_ldub_data_ra(env, addr, ra);
 710            uint8_t r = extract32(r1, 24, 8);
 711            HELPER_LOG("mask 0x%x %02x/%02x (0x%" PRIx64 ") ", mask, r, d,
 712                       addr);
 713            if (r < d) {
 714                cc = 1;
 715                break;
 716            } else if (r > d) {
 717                cc = 2;
 718                break;
 719            }
 720            addr++;
 721        }
 722        mask = (mask << 1) & 0xf;
 723        r1 <<= 8;
 724    }
 725
 726    HELPER_LOG("\n");
 727    return cc;
 728}
 729
 730static inline uint64_t get_address(CPUS390XState *env, int reg)
 731{
 732    return wrap_address(env, env->regs[reg]);
 733}
 734
 735/*
 736 * Store the address to the given register, zeroing out unused leftmost
 737 * bits in bit positions 32-63 (24-bit and 31-bit mode only).
 738 */
 739static inline void set_address_zero(CPUS390XState *env, int reg,
 740                                    uint64_t address)
 741{
 742    if (env->psw.mask & PSW_MASK_64) {
 743        env->regs[reg] = address;
 744    } else {
 745        if (!(env->psw.mask & PSW_MASK_32)) {
 746            address &= 0x00ffffff;
 747        } else {
 748            address &= 0x7fffffff;
 749        }
 750        env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
 751    }
 752}
 753
 754static inline void set_address(CPUS390XState *env, int reg, uint64_t address)
 755{
 756    if (env->psw.mask & PSW_MASK_64) {
 757        /* 64-Bit mode */
 758        env->regs[reg] = address;
 759    } else {
 760        if (!(env->psw.mask & PSW_MASK_32)) {
 761            /* 24-Bit mode. According to the PoO it is implementation
 762            dependent if bits 32-39 remain unchanged or are set to
 763            zeros.  Choose the former so that the function can also be
 764            used for TRT.  */
 765            env->regs[reg] = deposit64(env->regs[reg], 0, 24, address);
 766        } else {
 767            /* 31-Bit mode. According to the PoO it is implementation
 768            dependent if bit 32 remains unchanged or is set to zero.
 769            Choose the latter so that the function can also be used for
 770            TRT.  */
 771            address &= 0x7fffffff;
 772            env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
 773        }
 774    }
 775}
 776
 777static inline uint64_t wrap_length32(CPUS390XState *env, uint64_t length)
 778{
 779    if (!(env->psw.mask & PSW_MASK_64)) {
 780        return (uint32_t)length;
 781    }
 782    return length;
 783}
 784
 785static inline uint64_t wrap_length31(CPUS390XState *env, uint64_t length)
 786{
 787    if (!(env->psw.mask & PSW_MASK_64)) {
 788        /* 24-Bit and 31-Bit mode */
 789        length &= 0x7fffffff;
 790    }
 791    return length;
 792}
 793
 794static inline uint64_t get_length(CPUS390XState *env, int reg)
 795{
 796    return wrap_length31(env, env->regs[reg]);
 797}
 798
 799static inline void set_length(CPUS390XState *env, int reg, uint64_t length)
 800{
 801    if (env->psw.mask & PSW_MASK_64) {
 802        /* 64-Bit mode */
 803        env->regs[reg] = length;
 804    } else {
 805        /* 24-Bit and 31-Bit mode */
 806        env->regs[reg] = deposit64(env->regs[reg], 0, 32, length);
 807    }
 808}
 809
 810/* search string (c is byte to search, r2 is string, r1 end of string) */
 811void HELPER(srst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
 812{
 813    uintptr_t ra = GETPC();
 814    uint64_t end, str;
 815    uint32_t len;
 816    uint8_t v, c = env->regs[0];
 817
 818    /* Bits 32-55 must contain all 0.  */
 819    if (env->regs[0] & 0xffffff00u) {
 820        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
 821    }
 822
 823    str = get_address(env, r2);
 824    end = get_address(env, r1);
 825
 826    /* Lest we fail to service interrupts in a timely manner, limit the
 827       amount of work we're willing to do.  For now, let's cap at 8k.  */
 828    for (len = 0; len < 0x2000; ++len) {
 829        if (str + len == end) {
 830            /* Character not found.  R1 & R2 are unmodified.  */
 831            env->cc_op = 2;
 832            return;
 833        }
 834        v = cpu_ldub_data_ra(env, str + len, ra);
 835        if (v == c) {
 836            /* Character found.  Set R1 to the location; R2 is unmodified.  */
 837            env->cc_op = 1;
 838            set_address(env, r1, str + len);
 839            return;
 840        }
 841    }
 842
 843    /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
 844    env->cc_op = 3;
 845    set_address(env, r2, str + len);
 846}
 847
 848void HELPER(srstu)(CPUS390XState *env, uint32_t r1, uint32_t r2)
 849{
 850    uintptr_t ra = GETPC();
 851    uint32_t len;
 852    uint16_t v, c = env->regs[0];
 853    uint64_t end, str, adj_end;
 854
 855    /* Bits 32-47 of R0 must be zero.  */
 856    if (env->regs[0] & 0xffff0000u) {
 857        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
 858    }
 859
 860    str = get_address(env, r2);
 861    end = get_address(env, r1);
 862
 863    /* If the LSB of the two addresses differ, use one extra byte.  */
 864    adj_end = end + ((str ^ end) & 1);
 865
 866    /* Lest we fail to service interrupts in a timely manner, limit the
 867       amount of work we're willing to do.  For now, let's cap at 8k.  */
 868    for (len = 0; len < 0x2000; len += 2) {
 869        if (str + len == adj_end) {
 870            /* End of input found.  */
 871            env->cc_op = 2;
 872            return;
 873        }
 874        v = cpu_lduw_data_ra(env, str + len, ra);
 875        if (v == c) {
 876            /* Character found.  Set R1 to the location; R2 is unmodified.  */
 877            env->cc_op = 1;
 878            set_address(env, r1, str + len);
 879            return;
 880        }
 881    }
 882
 883    /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
 884    env->cc_op = 3;
 885    set_address(env, r2, str + len);
 886}
 887
 888/* unsigned string compare (c is string terminator) */
 889uint64_t HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2)
 890{
 891    uintptr_t ra = GETPC();
 892    uint32_t len;
 893
 894    c = c & 0xff;
 895    s1 = wrap_address(env, s1);
 896    s2 = wrap_address(env, s2);
 897
 898    /* Lest we fail to service interrupts in a timely manner, limit the
 899       amount of work we're willing to do.  For now, let's cap at 8k.  */
 900    for (len = 0; len < 0x2000; ++len) {
 901        uint8_t v1 = cpu_ldub_data_ra(env, s1 + len, ra);
 902        uint8_t v2 = cpu_ldub_data_ra(env, s2 + len, ra);
 903        if (v1 == v2) {
 904            if (v1 == c) {
 905                /* Equal.  CC=0, and don't advance the registers.  */
 906                env->cc_op = 0;
 907                env->retxl = s2;
 908                return s1;
 909            }
 910        } else {
 911            /* Unequal.  CC={1,2}, and advance the registers.  Note that
 912               the terminator need not be zero, but the string that contains
 913               the terminator is by definition "low".  */
 914            env->cc_op = (v1 == c ? 1 : v2 == c ? 2 : v1 < v2 ? 1 : 2);
 915            env->retxl = s2 + len;
 916            return s1 + len;
 917        }
 918    }
 919
 920    /* CPU-determined bytes equal; advance the registers.  */
 921    env->cc_op = 3;
 922    env->retxl = s2 + len;
 923    return s1 + len;
 924}
 925
 926/* move page */
 927uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint32_t r1, uint32_t r2)
 928{
 929    const uint64_t src = get_address(env, r2) & TARGET_PAGE_MASK;
 930    const uint64_t dst = get_address(env, r1) & TARGET_PAGE_MASK;
 931    const int mmu_idx = cpu_mmu_index(env, false);
 932    const bool f = extract64(r0, 11, 1);
 933    const bool s = extract64(r0, 10, 1);
 934    const bool cco = extract64(r0, 8, 1);
 935    uintptr_t ra = GETPC();
 936    S390Access srca, desta;
 937    int exc;
 938
 939    if ((f && s) || extract64(r0, 12, 4)) {
 940        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
 941    }
 942
 943    /*
 944     * We always manually handle exceptions such that we can properly store
 945     * r1/r2 to the lowcore on page-translation exceptions.
 946     *
 947     * TODO: Access key handling
 948     */
 949    exc = access_prepare_nf(&srca, env, true, src, TARGET_PAGE_SIZE,
 950                            MMU_DATA_LOAD, mmu_idx, ra);
 951    if (exc) {
 952        if (cco) {
 953            return 2;
 954        }
 955        goto inject_exc;
 956    }
 957    exc = access_prepare_nf(&desta, env, true, dst, TARGET_PAGE_SIZE,
 958                            MMU_DATA_STORE, mmu_idx, ra);
 959    if (exc) {
 960        if (cco && exc != PGM_PROTECTION) {
 961            return 1;
 962        }
 963        goto inject_exc;
 964    }
 965    access_memmove(env, &desta, &srca, ra);
 966    return 0; /* data moved */
 967inject_exc:
 968#if !defined(CONFIG_USER_ONLY)
 969    if (exc != PGM_ADDRESSING) {
 970        stq_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, trans_exc_code),
 971                 env->tlb_fill_tec);
 972    }
 973    if (exc == PGM_PAGE_TRANS) {
 974        stb_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, op_access_id),
 975                 r1 << 4 | r2);
 976    }
 977#endif
 978    tcg_s390_program_interrupt(env, exc, ra);
 979}
 980
 981/* string copy */
 982uint32_t HELPER(mvst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
 983{
 984    const int mmu_idx = cpu_mmu_index(env, false);
 985    const uint64_t d = get_address(env, r1);
 986    const uint64_t s = get_address(env, r2);
 987    const uint8_t c = env->regs[0];
 988    const int len = MIN(-(d | TARGET_PAGE_MASK), -(s | TARGET_PAGE_MASK));
 989    S390Access srca, desta;
 990    uintptr_t ra = GETPC();
 991    int i;
 992
 993    if (env->regs[0] & 0xffffff00ull) {
 994        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
 995    }
 996
 997    /*
 998     * Our access should not exceed single pages, as we must not report access
 999     * exceptions exceeding the actually copied range (which we don't know at
1000     * this point). We might over-indicate watchpoints within the pages
1001     * (if we ever care, we have to limit processing to a single byte).
1002     */
1003    srca = access_prepare(env, s, len, MMU_DATA_LOAD, mmu_idx, ra);
1004    desta = access_prepare(env, d, len, MMU_DATA_STORE, mmu_idx, ra);
1005    for (i = 0; i < len; i++) {
1006        const uint8_t v = access_get_byte(env, &srca, i, ra);
1007
1008        access_set_byte(env, &desta, i, v, ra);
1009        if (v == c) {
1010            set_address_zero(env, r1, d + i);
1011            return 1;
1012        }
1013    }
1014    set_address_zero(env, r1, d + len);
1015    set_address_zero(env, r2, s + len);
1016    return 3;
1017}
1018
1019/* load access registers r1 to r3 from memory at a2 */
1020void HELPER(lam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1021{
1022    uintptr_t ra = GETPC();
1023    int i;
1024
1025    if (a2 & 0x3) {
1026        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1027    }
1028
1029    for (i = r1;; i = (i + 1) % 16) {
1030        env->aregs[i] = cpu_ldl_data_ra(env, a2, ra);
1031        a2 += 4;
1032
1033        if (i == r3) {
1034            break;
1035        }
1036    }
1037}
1038
1039/* store access registers r1 to r3 in memory at a2 */
1040void HELPER(stam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1041{
1042    uintptr_t ra = GETPC();
1043    int i;
1044
1045    if (a2 & 0x3) {
1046        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1047    }
1048
1049    for (i = r1;; i = (i + 1) % 16) {
1050        cpu_stl_data_ra(env, a2, env->aregs[i], ra);
1051        a2 += 4;
1052
1053        if (i == r3) {
1054            break;
1055        }
1056    }
1057}
1058
1059/* move long helper */
1060static inline uint32_t do_mvcl(CPUS390XState *env,
1061                               uint64_t *dest, uint64_t *destlen,
1062                               uint64_t *src, uint64_t *srclen,
1063                               uint16_t pad, int wordsize, uintptr_t ra)
1064{
1065    const int mmu_idx = cpu_mmu_index(env, false);
1066    int len = MIN(*destlen, -(*dest | TARGET_PAGE_MASK));
1067    S390Access srca, desta;
1068    int i, cc;
1069
1070    if (*destlen == *srclen) {
1071        cc = 0;
1072    } else if (*destlen < *srclen) {
1073        cc = 1;
1074    } else {
1075        cc = 2;
1076    }
1077
1078    if (!*destlen) {
1079        return cc;
1080    }
1081
1082    /*
1083     * Only perform one type of type of operation (move/pad) at a time.
1084     * Stay within single pages.
1085     */
1086    if (*srclen) {
1087        /* Copy the src array */
1088        len = MIN(MIN(*srclen, -(*src | TARGET_PAGE_MASK)), len);
1089        *destlen -= len;
1090        *srclen -= len;
1091        srca = access_prepare(env, *src, len, MMU_DATA_LOAD, mmu_idx, ra);
1092        desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1093        access_memmove(env, &desta, &srca, ra);
1094        *src = wrap_address(env, *src + len);
1095        *dest = wrap_address(env, *dest + len);
1096    } else if (wordsize == 1) {
1097        /* Pad the remaining area */
1098        *destlen -= len;
1099        desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1100        access_memset(env, &desta, pad, ra);
1101        *dest = wrap_address(env, *dest + len);
1102    } else {
1103        desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1104
1105        /* The remaining length selects the padding byte. */
1106        for (i = 0; i < len; (*destlen)--, i++) {
1107            if (*destlen & 1) {
1108                access_set_byte(env, &desta, i, pad, ra);
1109            } else {
1110                access_set_byte(env, &desta, i, pad >> 8, ra);
1111            }
1112        }
1113        *dest = wrap_address(env, *dest + len);
1114    }
1115
1116    return *destlen ? 3 : cc;
1117}
1118
1119/* move long */
1120uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1121{
1122    const int mmu_idx = cpu_mmu_index(env, false);
1123    uintptr_t ra = GETPC();
1124    uint64_t destlen = env->regs[r1 + 1] & 0xffffff;
1125    uint64_t dest = get_address(env, r1);
1126    uint64_t srclen = env->regs[r2 + 1] & 0xffffff;
1127    uint64_t src = get_address(env, r2);
1128    uint8_t pad = env->regs[r2 + 1] >> 24;
1129    CPUState *cs = env_cpu(env);
1130    S390Access srca, desta;
1131    uint32_t cc, cur_len;
1132
1133    if (is_destructive_overlap(env, dest, src, MIN(srclen, destlen))) {
1134        cc = 3;
1135    } else if (srclen == destlen) {
1136        cc = 0;
1137    } else if (destlen < srclen) {
1138        cc = 1;
1139    } else {
1140        cc = 2;
1141    }
1142
1143    /* We might have to zero-out some bits even if there was no action. */
1144    if (unlikely(!destlen || cc == 3)) {
1145        set_address_zero(env, r2, src);
1146        set_address_zero(env, r1, dest);
1147        return cc;
1148    } else if (!srclen) {
1149        set_address_zero(env, r2, src);
1150    }
1151
1152    /*
1153     * Only perform one type of type of operation (move/pad) in one step.
1154     * Stay within single pages.
1155     */
1156    while (destlen) {
1157        cur_len = MIN(destlen, -(dest | TARGET_PAGE_MASK));
1158        if (!srclen) {
1159            desta = access_prepare(env, dest, cur_len, MMU_DATA_STORE, mmu_idx,
1160                                   ra);
1161            access_memset(env, &desta, pad, ra);
1162        } else {
1163            cur_len = MIN(MIN(srclen, -(src | TARGET_PAGE_MASK)), cur_len);
1164
1165            srca = access_prepare(env, src, cur_len, MMU_DATA_LOAD, mmu_idx,
1166                                  ra);
1167            desta = access_prepare(env, dest, cur_len, MMU_DATA_STORE, mmu_idx,
1168                                   ra);
1169            access_memmove(env, &desta, &srca, ra);
1170            src = wrap_address(env, src + cur_len);
1171            srclen -= cur_len;
1172            env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen);
1173            set_address_zero(env, r2, src);
1174        }
1175        dest = wrap_address(env, dest + cur_len);
1176        destlen -= cur_len;
1177        env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, destlen);
1178        set_address_zero(env, r1, dest);
1179
1180        /*
1181         * MVCL is interruptible. Return to the main loop if requested after
1182         * writing back all state to registers. If no interrupt will get
1183         * injected, we'll end up back in this handler and continue processing
1184         * the remaining parts.
1185         */
1186        if (destlen && unlikely(cpu_loop_exit_requested(cs))) {
1187            cpu_loop_exit_restore(cs, ra);
1188        }
1189    }
1190    return cc;
1191}
1192
1193/* move long extended */
1194uint32_t HELPER(mvcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1195                       uint32_t r3)
1196{
1197    uintptr_t ra = GETPC();
1198    uint64_t destlen = get_length(env, r1 + 1);
1199    uint64_t dest = get_address(env, r1);
1200    uint64_t srclen = get_length(env, r3 + 1);
1201    uint64_t src = get_address(env, r3);
1202    uint8_t pad = a2;
1203    uint32_t cc;
1204
1205    cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 1, ra);
1206
1207    set_length(env, r1 + 1, destlen);
1208    set_length(env, r3 + 1, srclen);
1209    set_address(env, r1, dest);
1210    set_address(env, r3, src);
1211
1212    return cc;
1213}
1214
1215/* move long unicode */
1216uint32_t HELPER(mvclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1217                       uint32_t r3)
1218{
1219    uintptr_t ra = GETPC();
1220    uint64_t destlen = get_length(env, r1 + 1);
1221    uint64_t dest = get_address(env, r1);
1222    uint64_t srclen = get_length(env, r3 + 1);
1223    uint64_t src = get_address(env, r3);
1224    uint16_t pad = a2;
1225    uint32_t cc;
1226
1227    cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 2, ra);
1228
1229    set_length(env, r1 + 1, destlen);
1230    set_length(env, r3 + 1, srclen);
1231    set_address(env, r1, dest);
1232    set_address(env, r3, src);
1233
1234    return cc;
1235}
1236
1237/* compare logical long helper */
1238static inline uint32_t do_clcl(CPUS390XState *env,
1239                               uint64_t *src1, uint64_t *src1len,
1240                               uint64_t *src3, uint64_t *src3len,
1241                               uint16_t pad, uint64_t limit,
1242                               int wordsize, uintptr_t ra)
1243{
1244    uint64_t len = MAX(*src1len, *src3len);
1245    uint32_t cc = 0;
1246
1247    check_alignment(env, *src1len | *src3len, wordsize, ra);
1248
1249    if (!len) {
1250        return cc;
1251    }
1252
1253    /* Lest we fail to service interrupts in a timely manner, limit the
1254       amount of work we're willing to do.  */
1255    if (len > limit) {
1256        len = limit;
1257        cc = 3;
1258    }
1259
1260    for (; len; len -= wordsize) {
1261        uint16_t v1 = pad;
1262        uint16_t v3 = pad;
1263
1264        if (*src1len) {
1265            v1 = cpu_ldusize_data_ra(env, *src1, wordsize, ra);
1266        }
1267        if (*src3len) {
1268            v3 = cpu_ldusize_data_ra(env, *src3, wordsize, ra);
1269        }
1270
1271        if (v1 != v3) {
1272            cc = (v1 < v3) ? 1 : 2;
1273            break;
1274        }
1275
1276        if (*src1len) {
1277            *src1 += wordsize;
1278            *src1len -= wordsize;
1279        }
1280        if (*src3len) {
1281            *src3 += wordsize;
1282            *src3len -= wordsize;
1283        }
1284    }
1285
1286    return cc;
1287}
1288
1289
1290/* compare logical long */
1291uint32_t HELPER(clcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1292{
1293    uintptr_t ra = GETPC();
1294    uint64_t src1len = extract64(env->regs[r1 + 1], 0, 24);
1295    uint64_t src1 = get_address(env, r1);
1296    uint64_t src3len = extract64(env->regs[r2 + 1], 0, 24);
1297    uint64_t src3 = get_address(env, r2);
1298    uint8_t pad = env->regs[r2 + 1] >> 24;
1299    uint32_t cc;
1300
1301    cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, -1, 1, ra);
1302
1303    env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, src1len);
1304    env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, src3len);
1305    set_address(env, r1, src1);
1306    set_address(env, r2, src3);
1307
1308    return cc;
1309}
1310
1311/* compare logical long extended memcompare insn with padding */
1312uint32_t HELPER(clcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1313                       uint32_t r3)
1314{
1315    uintptr_t ra = GETPC();
1316    uint64_t src1len = get_length(env, r1 + 1);
1317    uint64_t src1 = get_address(env, r1);
1318    uint64_t src3len = get_length(env, r3 + 1);
1319    uint64_t src3 = get_address(env, r3);
1320    uint8_t pad = a2;
1321    uint32_t cc;
1322
1323    cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x2000, 1, ra);
1324
1325    set_length(env, r1 + 1, src1len);
1326    set_length(env, r3 + 1, src3len);
1327    set_address(env, r1, src1);
1328    set_address(env, r3, src3);
1329
1330    return cc;
1331}
1332
1333/* compare logical long unicode memcompare insn with padding */
1334uint32_t HELPER(clclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1335                       uint32_t r3)
1336{
1337    uintptr_t ra = GETPC();
1338    uint64_t src1len = get_length(env, r1 + 1);
1339    uint64_t src1 = get_address(env, r1);
1340    uint64_t src3len = get_length(env, r3 + 1);
1341    uint64_t src3 = get_address(env, r3);
1342    uint16_t pad = a2;
1343    uint32_t cc = 0;
1344
1345    cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x1000, 2, ra);
1346
1347    set_length(env, r1 + 1, src1len);
1348    set_length(env, r3 + 1, src3len);
1349    set_address(env, r1, src1);
1350    set_address(env, r3, src3);
1351
1352    return cc;
1353}
1354
1355/* checksum */
1356uint64_t HELPER(cksm)(CPUS390XState *env, uint64_t r1,
1357                      uint64_t src, uint64_t src_len)
1358{
1359    uintptr_t ra = GETPC();
1360    uint64_t max_len, len;
1361    uint64_t cksm = (uint32_t)r1;
1362
1363    /* Lest we fail to service interrupts in a timely manner, limit the
1364       amount of work we're willing to do.  For now, let's cap at 8k.  */
1365    max_len = (src_len > 0x2000 ? 0x2000 : src_len);
1366
1367    /* Process full words as available.  */
1368    for (len = 0; len + 4 <= max_len; len += 4, src += 4) {
1369        cksm += (uint32_t)cpu_ldl_data_ra(env, src, ra);
1370    }
1371
1372    switch (max_len - len) {
1373    case 1:
1374        cksm += cpu_ldub_data_ra(env, src, ra) << 24;
1375        len += 1;
1376        break;
1377    case 2:
1378        cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1379        len += 2;
1380        break;
1381    case 3:
1382        cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1383        cksm += cpu_ldub_data_ra(env, src + 2, ra) << 8;
1384        len += 3;
1385        break;
1386    }
1387
1388    /* Fold the carry from the checksum.  Note that we can see carry-out
1389       during folding more than once (but probably not more than twice).  */
1390    while (cksm > 0xffffffffull) {
1391        cksm = (uint32_t)cksm + (cksm >> 32);
1392    }
1393
1394    /* Indicate whether or not we've processed everything.  */
1395    env->cc_op = (len == src_len ? 0 : 3);
1396
1397    /* Return both cksm and processed length.  */
1398    env->retxl = cksm;
1399    return len;
1400}
1401
1402void HELPER(pack)(CPUS390XState *env, uint32_t len, uint64_t dest, uint64_t src)
1403{
1404    uintptr_t ra = GETPC();
1405    int len_dest = len >> 4;
1406    int len_src = len & 0xf;
1407    uint8_t b;
1408
1409    dest += len_dest;
1410    src += len_src;
1411
1412    /* last byte is special, it only flips the nibbles */
1413    b = cpu_ldub_data_ra(env, src, ra);
1414    cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1415    src--;
1416    len_src--;
1417
1418    /* now pack every value */
1419    while (len_dest > 0) {
1420        b = 0;
1421
1422        if (len_src >= 0) {
1423            b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1424            src--;
1425            len_src--;
1426        }
1427        if (len_src >= 0) {
1428            b |= cpu_ldub_data_ra(env, src, ra) << 4;
1429            src--;
1430            len_src--;
1431        }
1432
1433        len_dest--;
1434        dest--;
1435        cpu_stb_data_ra(env, dest, b, ra);
1436    }
1437}
1438
1439static inline void do_pkau(CPUS390XState *env, uint64_t dest, uint64_t src,
1440                           uint32_t srclen, int ssize, uintptr_t ra)
1441{
1442    int i;
1443    /* The destination operand is always 16 bytes long.  */
1444    const int destlen = 16;
1445
1446    /* The operands are processed from right to left.  */
1447    src += srclen - 1;
1448    dest += destlen - 1;
1449
1450    for (i = 0; i < destlen; i++) {
1451        uint8_t b = 0;
1452
1453        /* Start with a positive sign */
1454        if (i == 0) {
1455            b = 0xc;
1456        } else if (srclen > ssize) {
1457            b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1458            src -= ssize;
1459            srclen -= ssize;
1460        }
1461
1462        if (srclen > ssize) {
1463            b |= cpu_ldub_data_ra(env, src, ra) << 4;
1464            src -= ssize;
1465            srclen -= ssize;
1466        }
1467
1468        cpu_stb_data_ra(env, dest, b, ra);
1469        dest--;
1470    }
1471}
1472
1473
1474void HELPER(pka)(CPUS390XState *env, uint64_t dest, uint64_t src,
1475                 uint32_t srclen)
1476{
1477    do_pkau(env, dest, src, srclen, 1, GETPC());
1478}
1479
1480void HELPER(pku)(CPUS390XState *env, uint64_t dest, uint64_t src,
1481                 uint32_t srclen)
1482{
1483    do_pkau(env, dest, src, srclen, 2, GETPC());
1484}
1485
1486void HELPER(unpk)(CPUS390XState *env, uint32_t len, uint64_t dest,
1487                  uint64_t src)
1488{
1489    uintptr_t ra = GETPC();
1490    int len_dest = len >> 4;
1491    int len_src = len & 0xf;
1492    uint8_t b;
1493    int second_nibble = 0;
1494
1495    dest += len_dest;
1496    src += len_src;
1497
1498    /* last byte is special, it only flips the nibbles */
1499    b = cpu_ldub_data_ra(env, src, ra);
1500    cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1501    src--;
1502    len_src--;
1503
1504    /* now pad every nibble with 0xf0 */
1505
1506    while (len_dest > 0) {
1507        uint8_t cur_byte = 0;
1508
1509        if (len_src > 0) {
1510            cur_byte = cpu_ldub_data_ra(env, src, ra);
1511        }
1512
1513        len_dest--;
1514        dest--;
1515
1516        /* only advance one nibble at a time */
1517        if (second_nibble) {
1518            cur_byte >>= 4;
1519            len_src--;
1520            src--;
1521        }
1522        second_nibble = !second_nibble;
1523
1524        /* digit */
1525        cur_byte = (cur_byte & 0xf);
1526        /* zone bits */
1527        cur_byte |= 0xf0;
1528
1529        cpu_stb_data_ra(env, dest, cur_byte, ra);
1530    }
1531}
1532
1533static inline uint32_t do_unpkau(CPUS390XState *env, uint64_t dest,
1534                                 uint32_t destlen, int dsize, uint64_t src,
1535                                 uintptr_t ra)
1536{
1537    int i;
1538    uint32_t cc;
1539    uint8_t b;
1540    /* The source operand is always 16 bytes long.  */
1541    const int srclen = 16;
1542
1543    /* The operands are processed from right to left.  */
1544    src += srclen - 1;
1545    dest += destlen - dsize;
1546
1547    /* Check for the sign.  */
1548    b = cpu_ldub_data_ra(env, src, ra);
1549    src--;
1550    switch (b & 0xf) {
1551    case 0xa:
1552    case 0xc:
1553    case 0xe ... 0xf:
1554        cc = 0;  /* plus */
1555        break;
1556    case 0xb:
1557    case 0xd:
1558        cc = 1;  /* minus */
1559        break;
1560    default:
1561    case 0x0 ... 0x9:
1562        cc = 3;  /* invalid */
1563        break;
1564    }
1565
1566    /* Now pad every nibble with 0x30, advancing one nibble at a time. */
1567    for (i = 0; i < destlen; i += dsize) {
1568        if (i == (31 * dsize)) {
1569            /* If length is 32/64 bytes, the leftmost byte is 0. */
1570            b = 0;
1571        } else if (i % (2 * dsize)) {
1572            b = cpu_ldub_data_ra(env, src, ra);
1573            src--;
1574        } else {
1575            b >>= 4;
1576        }
1577        cpu_stsize_data_ra(env, dest, 0x30 + (b & 0xf), dsize, ra);
1578        dest -= dsize;
1579    }
1580
1581    return cc;
1582}
1583
1584uint32_t HELPER(unpka)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1585                       uint64_t src)
1586{
1587    return do_unpkau(env, dest, destlen, 1, src, GETPC());
1588}
1589
1590uint32_t HELPER(unpku)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1591                       uint64_t src)
1592{
1593    return do_unpkau(env, dest, destlen, 2, src, GETPC());
1594}
1595
1596uint32_t HELPER(tp)(CPUS390XState *env, uint64_t dest, uint32_t destlen)
1597{
1598    uintptr_t ra = GETPC();
1599    uint32_t cc = 0;
1600    int i;
1601
1602    for (i = 0; i < destlen; i++) {
1603        uint8_t b = cpu_ldub_data_ra(env, dest + i, ra);
1604        /* digit */
1605        cc |= (b & 0xf0) > 0x90 ? 2 : 0;
1606
1607        if (i == (destlen - 1)) {
1608            /* sign */
1609            cc |= (b & 0xf) < 0xa ? 1 : 0;
1610        } else {
1611            /* digit */
1612            cc |= (b & 0xf) > 0x9 ? 2 : 0;
1613        }
1614    }
1615
1616    return cc;
1617}
1618
1619static uint32_t do_helper_tr(CPUS390XState *env, uint32_t len, uint64_t array,
1620                             uint64_t trans, uintptr_t ra)
1621{
1622    uint32_t i;
1623
1624    for (i = 0; i <= len; i++) {
1625        uint8_t byte = cpu_ldub_data_ra(env, array + i, ra);
1626        uint8_t new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1627        cpu_stb_data_ra(env, array + i, new_byte, ra);
1628    }
1629
1630    return env->cc_op;
1631}
1632
1633void HELPER(tr)(CPUS390XState *env, uint32_t len, uint64_t array,
1634                uint64_t trans)
1635{
1636    do_helper_tr(env, len, array, trans, GETPC());
1637}
1638
1639uint64_t HELPER(tre)(CPUS390XState *env, uint64_t array,
1640                     uint64_t len, uint64_t trans)
1641{
1642    uintptr_t ra = GETPC();
1643    uint8_t end = env->regs[0] & 0xff;
1644    uint64_t l = len;
1645    uint64_t i;
1646    uint32_t cc = 0;
1647
1648    if (!(env->psw.mask & PSW_MASK_64)) {
1649        array &= 0x7fffffff;
1650        l = (uint32_t)l;
1651    }
1652
1653    /* Lest we fail to service interrupts in a timely manner, limit the
1654       amount of work we're willing to do.  For now, let's cap at 8k.  */
1655    if (l > 0x2000) {
1656        l = 0x2000;
1657        cc = 3;
1658    }
1659
1660    for (i = 0; i < l; i++) {
1661        uint8_t byte, new_byte;
1662
1663        byte = cpu_ldub_data_ra(env, array + i, ra);
1664
1665        if (byte == end) {
1666            cc = 1;
1667            break;
1668        }
1669
1670        new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1671        cpu_stb_data_ra(env, array + i, new_byte, ra);
1672    }
1673
1674    env->cc_op = cc;
1675    env->retxl = len - i;
1676    return array + i;
1677}
1678
1679static inline uint32_t do_helper_trt(CPUS390XState *env, int len,
1680                                     uint64_t array, uint64_t trans,
1681                                     int inc, uintptr_t ra)
1682{
1683    int i;
1684
1685    for (i = 0; i <= len; i++) {
1686        uint8_t byte = cpu_ldub_data_ra(env, array + i * inc, ra);
1687        uint8_t sbyte = cpu_ldub_data_ra(env, trans + byte, ra);
1688
1689        if (sbyte != 0) {
1690            set_address(env, 1, array + i * inc);
1691            env->regs[2] = deposit64(env->regs[2], 0, 8, sbyte);
1692            return (i == len) ? 2 : 1;
1693        }
1694    }
1695
1696    return 0;
1697}
1698
1699static uint32_t do_helper_trt_fwd(CPUS390XState *env, uint32_t len,
1700                                  uint64_t array, uint64_t trans,
1701                                  uintptr_t ra)
1702{
1703    return do_helper_trt(env, len, array, trans, 1, ra);
1704}
1705
1706uint32_t HELPER(trt)(CPUS390XState *env, uint32_t len, uint64_t array,
1707                     uint64_t trans)
1708{
1709    return do_helper_trt(env, len, array, trans, 1, GETPC());
1710}
1711
1712static uint32_t do_helper_trt_bkwd(CPUS390XState *env, uint32_t len,
1713                                   uint64_t array, uint64_t trans,
1714                                   uintptr_t ra)
1715{
1716    return do_helper_trt(env, len, array, trans, -1, ra);
1717}
1718
1719uint32_t HELPER(trtr)(CPUS390XState *env, uint32_t len, uint64_t array,
1720                      uint64_t trans)
1721{
1722    return do_helper_trt(env, len, array, trans, -1, GETPC());
1723}
1724
1725/* Translate one/two to one/two */
1726uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
1727                      uint32_t tst, uint32_t sizes)
1728{
1729    uintptr_t ra = GETPC();
1730    int dsize = (sizes & 1) ? 1 : 2;
1731    int ssize = (sizes & 2) ? 1 : 2;
1732    uint64_t tbl = get_address(env, 1);
1733    uint64_t dst = get_address(env, r1);
1734    uint64_t len = get_length(env, r1 + 1);
1735    uint64_t src = get_address(env, r2);
1736    uint32_t cc = 3;
1737    int i;
1738
1739    /* The lower address bits of TBL are ignored.  For TROO, TROT, it's
1740       the low 3 bits (double-word aligned).  For TRTO, TRTT, it's either
1741       the low 12 bits (4K, without ETF2-ENH) or 3 bits (with ETF2-ENH).  */
1742    if (ssize == 2 && !s390_has_feat(S390_FEAT_ETF2_ENH)) {
1743        tbl &= -4096;
1744    } else {
1745        tbl &= -8;
1746    }
1747
1748    check_alignment(env, len, ssize, ra);
1749
1750    /* Lest we fail to service interrupts in a timely manner, */
1751    /* limit the amount of work we're willing to do.   */
1752    for (i = 0; i < 0x2000; i++) {
1753        uint16_t sval = cpu_ldusize_data_ra(env, src, ssize, ra);
1754        uint64_t tble = tbl + (sval * dsize);
1755        uint16_t dval = cpu_ldusize_data_ra(env, tble, dsize, ra);
1756        if (dval == tst) {
1757            cc = 1;
1758            break;
1759        }
1760        cpu_stsize_data_ra(env, dst, dval, dsize, ra);
1761
1762        len -= ssize;
1763        src += ssize;
1764        dst += dsize;
1765
1766        if (len == 0) {
1767            cc = 0;
1768            break;
1769        }
1770    }
1771
1772    set_address(env, r1, dst);
1773    set_length(env, r1 + 1, len);
1774    set_address(env, r2, src);
1775
1776    return cc;
1777}
1778
1779void HELPER(cdsg)(CPUS390XState *env, uint64_t addr,
1780                  uint32_t r1, uint32_t r3)
1781{
1782    uintptr_t ra = GETPC();
1783    Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
1784    Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1785    Int128 oldv;
1786    uint64_t oldh, oldl;
1787    bool fail;
1788
1789    check_alignment(env, addr, 16, ra);
1790
1791    oldh = cpu_ldq_data_ra(env, addr + 0, ra);
1792    oldl = cpu_ldq_data_ra(env, addr + 8, ra);
1793
1794    oldv = int128_make128(oldl, oldh);
1795    fail = !int128_eq(oldv, cmpv);
1796    if (fail) {
1797        newv = oldv;
1798    }
1799
1800    cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra);
1801    cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra);
1802
1803    env->cc_op = fail;
1804    env->regs[r1] = int128_gethi(oldv);
1805    env->regs[r1 + 1] = int128_getlo(oldv);
1806}
1807
1808void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr,
1809                           uint32_t r1, uint32_t r3)
1810{
1811    uintptr_t ra = GETPC();
1812    Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
1813    Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1814    int mem_idx;
1815    MemOpIdx oi;
1816    Int128 oldv;
1817    bool fail;
1818
1819    assert(HAVE_CMPXCHG128);
1820
1821    mem_idx = cpu_mmu_index(env, false);
1822    oi = make_memop_idx(MO_TE | MO_128 | MO_ALIGN, mem_idx);
1823    oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
1824    fail = !int128_eq(oldv, cmpv);
1825
1826    env->cc_op = fail;
1827    env->regs[r1] = int128_gethi(oldv);
1828    env->regs[r1 + 1] = int128_getlo(oldv);
1829}
1830
1831static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
1832                        uint64_t a2, bool parallel)
1833{
1834    uint32_t mem_idx = cpu_mmu_index(env, false);
1835    uintptr_t ra = GETPC();
1836    uint32_t fc = extract32(env->regs[0], 0, 8);
1837    uint32_t sc = extract32(env->regs[0], 8, 8);
1838    uint64_t pl = get_address(env, 1) & -16;
1839    uint64_t svh, svl;
1840    uint32_t cc;
1841
1842    /* Sanity check the function code and storage characteristic.  */
1843    if (fc > 1 || sc > 3) {
1844        if (!s390_has_feat(S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2)) {
1845            goto spec_exception;
1846        }
1847        if (fc > 2 || sc > 4 || (fc == 2 && (r3 & 1))) {
1848            goto spec_exception;
1849        }
1850    }
1851
1852    /* Sanity check the alignments.  */
1853    if (extract32(a1, 0, fc + 2) || extract32(a2, 0, sc)) {
1854        goto spec_exception;
1855    }
1856
1857    /* Sanity check writability of the store address.  */
1858    probe_write(env, a2, 1 << sc, mem_idx, ra);
1859
1860    /*
1861     * Note that the compare-and-swap is atomic, and the store is atomic,
1862     * but the complete operation is not.  Therefore we do not need to
1863     * assert serial context in order to implement this.  That said,
1864     * restart early if we can't support either operation that is supposed
1865     * to be atomic.
1866     */
1867    if (parallel) {
1868        uint32_t max = 2;
1869#ifdef CONFIG_ATOMIC64
1870        max = 3;
1871#endif
1872        if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) ||
1873            (HAVE_ATOMIC128  ? 0 : sc > max)) {
1874            cpu_loop_exit_atomic(env_cpu(env), ra);
1875        }
1876    }
1877
1878    /* All loads happen before all stores.  For simplicity, load the entire
1879       store value area from the parameter list.  */
1880    svh = cpu_ldq_data_ra(env, pl + 16, ra);
1881    svl = cpu_ldq_data_ra(env, pl + 24, ra);
1882
1883    switch (fc) {
1884    case 0:
1885        {
1886            uint32_t nv = cpu_ldl_data_ra(env, pl, ra);
1887            uint32_t cv = env->regs[r3];
1888            uint32_t ov;
1889
1890            if (parallel) {
1891#ifdef CONFIG_USER_ONLY
1892                uint32_t *haddr = g2h(env_cpu(env), a1);
1893                ov = qatomic_cmpxchg__nocheck(haddr, cv, nv);
1894#else
1895                MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mem_idx);
1896                ov = cpu_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi, ra);
1897#endif
1898            } else {
1899                ov = cpu_ldl_data_ra(env, a1, ra);
1900                cpu_stl_data_ra(env, a1, (ov == cv ? nv : ov), ra);
1901            }
1902            cc = (ov != cv);
1903            env->regs[r3] = deposit64(env->regs[r3], 32, 32, ov);
1904        }
1905        break;
1906
1907    case 1:
1908        {
1909            uint64_t nv = cpu_ldq_data_ra(env, pl, ra);
1910            uint64_t cv = env->regs[r3];
1911            uint64_t ov;
1912
1913            if (parallel) {
1914#ifdef CONFIG_ATOMIC64
1915                MemOpIdx oi = make_memop_idx(MO_TEUQ | MO_ALIGN, mem_idx);
1916                ov = cpu_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi, ra);
1917#else
1918                /* Note that we asserted !parallel above.  */
1919                g_assert_not_reached();
1920#endif
1921            } else {
1922                ov = cpu_ldq_data_ra(env, a1, ra);
1923                cpu_stq_data_ra(env, a1, (ov == cv ? nv : ov), ra);
1924            }
1925            cc = (ov != cv);
1926            env->regs[r3] = ov;
1927        }
1928        break;
1929
1930    case 2:
1931        {
1932            uint64_t nvh = cpu_ldq_data_ra(env, pl, ra);
1933            uint64_t nvl = cpu_ldq_data_ra(env, pl + 8, ra);
1934            Int128 nv = int128_make128(nvl, nvh);
1935            Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1936            Int128 ov;
1937
1938            if (!parallel) {
1939                uint64_t oh = cpu_ldq_data_ra(env, a1 + 0, ra);
1940                uint64_t ol = cpu_ldq_data_ra(env, a1 + 8, ra);
1941
1942                ov = int128_make128(ol, oh);
1943                cc = !int128_eq(ov, cv);
1944                if (cc) {
1945                    nv = ov;
1946                }
1947
1948                cpu_stq_data_ra(env, a1 + 0, int128_gethi(nv), ra);
1949                cpu_stq_data_ra(env, a1 + 8, int128_getlo(nv), ra);
1950            } else if (HAVE_CMPXCHG128) {
1951                MemOpIdx oi = make_memop_idx(MO_TE | MO_128 | MO_ALIGN, mem_idx);
1952                ov = cpu_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra);
1953                cc = !int128_eq(ov, cv);
1954            } else {
1955                /* Note that we asserted !parallel above.  */
1956                g_assert_not_reached();
1957            }
1958
1959            env->regs[r3 + 0] = int128_gethi(ov);
1960            env->regs[r3 + 1] = int128_getlo(ov);
1961        }
1962        break;
1963
1964    default:
1965        g_assert_not_reached();
1966    }
1967
1968    /* Store only if the comparison succeeded.  Note that above we use a pair
1969       of 64-bit big-endian loads, so for sc < 3 we must extract the value
1970       from the most-significant bits of svh.  */
1971    if (cc == 0) {
1972        switch (sc) {
1973        case 0:
1974            cpu_stb_data_ra(env, a2, svh >> 56, ra);
1975            break;
1976        case 1:
1977            cpu_stw_data_ra(env, a2, svh >> 48, ra);
1978            break;
1979        case 2:
1980            cpu_stl_data_ra(env, a2, svh >> 32, ra);
1981            break;
1982        case 3:
1983            cpu_stq_data_ra(env, a2, svh, ra);
1984            break;
1985        case 4:
1986            if (!parallel) {
1987                cpu_stq_data_ra(env, a2 + 0, svh, ra);
1988                cpu_stq_data_ra(env, a2 + 8, svl, ra);
1989            } else if (HAVE_ATOMIC128) {
1990                MemOpIdx oi = make_memop_idx(MO_TEUQ | MO_ALIGN_16, mem_idx);
1991                Int128 sv = int128_make128(svl, svh);
1992                cpu_atomic_sto_be_mmu(env, a2, sv, oi, ra);
1993            } else {
1994                /* Note that we asserted !parallel above.  */
1995                g_assert_not_reached();
1996            }
1997            break;
1998        default:
1999            g_assert_not_reached();
2000        }
2001    }
2002
2003    return cc;
2004
2005 spec_exception:
2006    tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2007}
2008
2009uint32_t HELPER(csst)(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t a2)
2010{
2011    return do_csst(env, r3, a1, a2, false);
2012}
2013
2014uint32_t HELPER(csst_parallel)(CPUS390XState *env, uint32_t r3, uint64_t a1,
2015                               uint64_t a2)
2016{
2017    return do_csst(env, r3, a1, a2, true);
2018}
2019
2020#if !defined(CONFIG_USER_ONLY)
2021void HELPER(lctlg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2022{
2023    uintptr_t ra = GETPC();
2024    bool PERchanged = false;
2025    uint64_t src = a2;
2026    uint32_t i;
2027
2028    if (src & 0x7) {
2029        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2030    }
2031
2032    for (i = r1;; i = (i + 1) % 16) {
2033        uint64_t val = cpu_ldq_data_ra(env, src, ra);
2034        if (env->cregs[i] != val && i >= 9 && i <= 11) {
2035            PERchanged = true;
2036        }
2037        env->cregs[i] = val;
2038        HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%" PRIx64 "\n",
2039                   i, src, val);
2040        src += sizeof(uint64_t);
2041
2042        if (i == r3) {
2043            break;
2044        }
2045    }
2046
2047    if (PERchanged && env->psw.mask & PSW_MASK_PER) {
2048        s390_cpu_recompute_watchpoints(env_cpu(env));
2049    }
2050
2051    tlb_flush(env_cpu(env));
2052}
2053
2054void HELPER(lctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2055{
2056    uintptr_t ra = GETPC();
2057    bool PERchanged = false;
2058    uint64_t src = a2;
2059    uint32_t i;
2060
2061    if (src & 0x3) {
2062        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2063    }
2064
2065    for (i = r1;; i = (i + 1) % 16) {
2066        uint32_t val = cpu_ldl_data_ra(env, src, ra);
2067        if ((uint32_t)env->cregs[i] != val && i >= 9 && i <= 11) {
2068            PERchanged = true;
2069        }
2070        env->cregs[i] = deposit64(env->cregs[i], 0, 32, val);
2071        HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%x\n", i, src, val);
2072        src += sizeof(uint32_t);
2073
2074        if (i == r3) {
2075            break;
2076        }
2077    }
2078
2079    if (PERchanged && env->psw.mask & PSW_MASK_PER) {
2080        s390_cpu_recompute_watchpoints(env_cpu(env));
2081    }
2082
2083    tlb_flush(env_cpu(env));
2084}
2085
2086void HELPER(stctg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2087{
2088    uintptr_t ra = GETPC();
2089    uint64_t dest = a2;
2090    uint32_t i;
2091
2092    if (dest & 0x7) {
2093        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2094    }
2095
2096    for (i = r1;; i = (i + 1) % 16) {
2097        cpu_stq_data_ra(env, dest, env->cregs[i], ra);
2098        dest += sizeof(uint64_t);
2099
2100        if (i == r3) {
2101            break;
2102        }
2103    }
2104}
2105
2106void HELPER(stctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2107{
2108    uintptr_t ra = GETPC();
2109    uint64_t dest = a2;
2110    uint32_t i;
2111
2112    if (dest & 0x3) {
2113        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2114    }
2115
2116    for (i = r1;; i = (i + 1) % 16) {
2117        cpu_stl_data_ra(env, dest, env->cregs[i], ra);
2118        dest += sizeof(uint32_t);
2119
2120        if (i == r3) {
2121            break;
2122        }
2123    }
2124}
2125
2126uint32_t HELPER(testblock)(CPUS390XState *env, uint64_t real_addr)
2127{
2128    uintptr_t ra = GETPC();
2129    int i;
2130
2131    real_addr = wrap_address(env, real_addr) & TARGET_PAGE_MASK;
2132
2133    for (i = 0; i < TARGET_PAGE_SIZE; i += 8) {
2134        cpu_stq_mmuidx_ra(env, real_addr + i, 0, MMU_REAL_IDX, ra);
2135    }
2136
2137    return 0;
2138}
2139
2140uint32_t HELPER(tprot)(CPUS390XState *env, uint64_t a1, uint64_t a2)
2141{
2142    S390CPU *cpu = env_archcpu(env);
2143    CPUState *cs = env_cpu(env);
2144
2145    /*
2146     * TODO: we currently don't handle all access protection types
2147     * (including access-list and key-controlled) as well as AR mode.
2148     */
2149    if (!s390_cpu_virt_mem_check_write(cpu, a1, 0, 1)) {
2150        /* Fetching permitted; storing permitted */
2151        return 0;
2152    }
2153
2154    if (env->int_pgm_code == PGM_PROTECTION) {
2155        /* retry if reading is possible */
2156        cs->exception_index = -1;
2157        if (!s390_cpu_virt_mem_check_read(cpu, a1, 0, 1)) {
2158            /* Fetching permitted; storing not permitted */
2159            return 1;
2160        }
2161    }
2162
2163    switch (env->int_pgm_code) {
2164    case PGM_PROTECTION:
2165        /* Fetching not permitted; storing not permitted */
2166        cs->exception_index = -1;
2167        return 2;
2168    case PGM_ADDRESSING:
2169    case PGM_TRANS_SPEC:
2170        /* exceptions forwarded to the guest */
2171        s390_cpu_virt_mem_handle_exc(cpu, GETPC());
2172        return 0;
2173    }
2174
2175    /* Translation not available */
2176    cs->exception_index = -1;
2177    return 3;
2178}
2179
2180/* insert storage key extended */
2181uint64_t HELPER(iske)(CPUS390XState *env, uint64_t r2)
2182{
2183    static S390SKeysState *ss;
2184    static S390SKeysClass *skeyclass;
2185    uint64_t addr = wrap_address(env, r2);
2186    uint8_t key;
2187    int rc;
2188
2189    addr = mmu_real2abs(env, addr);
2190    if (!mmu_absolute_addr_valid(addr, false)) {
2191        tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2192    }
2193
2194    if (unlikely(!ss)) {
2195        ss = s390_get_skeys_device();
2196        skeyclass = S390_SKEYS_GET_CLASS(ss);
2197        if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2198            tlb_flush_all_cpus_synced(env_cpu(env));
2199        }
2200    }
2201
2202    rc = skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2203    if (rc) {
2204        trace_get_skeys_nonzero(rc);
2205        return 0;
2206    }
2207    return key;
2208}
2209
2210/* set storage key extended */
2211void HELPER(sske)(CPUS390XState *env, uint64_t r1, uint64_t r2)
2212{
2213    static S390SKeysState *ss;
2214    static S390SKeysClass *skeyclass;
2215    uint64_t addr = wrap_address(env, r2);
2216    uint8_t key;
2217    int rc;
2218
2219    addr = mmu_real2abs(env, addr);
2220    if (!mmu_absolute_addr_valid(addr, false)) {
2221        tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2222    }
2223
2224    if (unlikely(!ss)) {
2225        ss = s390_get_skeys_device();
2226        skeyclass = S390_SKEYS_GET_CLASS(ss);
2227        if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2228            tlb_flush_all_cpus_synced(env_cpu(env));
2229        }
2230    }
2231
2232    key = r1 & 0xfe;
2233    rc = skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2234    if (rc) {
2235        trace_set_skeys_nonzero(rc);
2236    }
2237   /*
2238    * As we can only flush by virtual address and not all the entries
2239    * that point to a physical address we have to flush the whole TLB.
2240    */
2241    tlb_flush_all_cpus_synced(env_cpu(env));
2242}
2243
2244/* reset reference bit extended */
2245uint32_t HELPER(rrbe)(CPUS390XState *env, uint64_t r2)
2246{
2247    uint64_t addr = wrap_address(env, r2);
2248    static S390SKeysState *ss;
2249    static S390SKeysClass *skeyclass;
2250    uint8_t re, key;
2251    int rc;
2252
2253    addr = mmu_real2abs(env, addr);
2254    if (!mmu_absolute_addr_valid(addr, false)) {
2255        tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2256    }
2257
2258    if (unlikely(!ss)) {
2259        ss = s390_get_skeys_device();
2260        skeyclass = S390_SKEYS_GET_CLASS(ss);
2261        if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2262            tlb_flush_all_cpus_synced(env_cpu(env));
2263        }
2264    }
2265
2266    rc = skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2267    if (rc) {
2268        trace_get_skeys_nonzero(rc);
2269        return 0;
2270    }
2271
2272    re = key & (SK_R | SK_C);
2273    key &= ~SK_R;
2274
2275    rc = skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2276    if (rc) {
2277        trace_set_skeys_nonzero(rc);
2278        return 0;
2279    }
2280   /*
2281    * As we can only flush by virtual address and not all the entries
2282    * that point to a physical address we have to flush the whole TLB.
2283    */
2284    tlb_flush_all_cpus_synced(env_cpu(env));
2285
2286    /*
2287     * cc
2288     *
2289     * 0  Reference bit zero; change bit zero
2290     * 1  Reference bit zero; change bit one
2291     * 2  Reference bit one; change bit zero
2292     * 3  Reference bit one; change bit one
2293     */
2294
2295    return re >> 1;
2296}
2297
2298uint32_t HELPER(mvcs)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2)
2299{
2300    const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2301    S390Access srca, desta;
2302    uintptr_t ra = GETPC();
2303    int cc = 0;
2304
2305    HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2306               __func__, l, a1, a2);
2307
2308    if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2309        psw_as == AS_HOME || psw_as == AS_ACCREG) {
2310        s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2311    }
2312
2313    l = wrap_length32(env, l);
2314    if (l > 256) {
2315        /* max 256 */
2316        l = 256;
2317        cc = 3;
2318    } else if (!l) {
2319        return cc;
2320    }
2321
2322    /* TODO: Access key handling */
2323    srca = access_prepare(env, a2, l, MMU_DATA_LOAD, MMU_PRIMARY_IDX, ra);
2324    desta = access_prepare(env, a1, l, MMU_DATA_STORE, MMU_SECONDARY_IDX, ra);
2325    access_memmove(env, &desta, &srca, ra);
2326    return cc;
2327}
2328
2329uint32_t HELPER(mvcp)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2)
2330{
2331    const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2332    S390Access srca, desta;
2333    uintptr_t ra = GETPC();
2334    int cc = 0;
2335
2336    HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2337               __func__, l, a1, a2);
2338
2339    if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2340        psw_as == AS_HOME || psw_as == AS_ACCREG) {
2341        s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2342    }
2343
2344    l = wrap_length32(env, l);
2345    if (l > 256) {
2346        /* max 256 */
2347        l = 256;
2348        cc = 3;
2349    } else if (!l) {
2350        return cc;
2351    }
2352
2353    /* TODO: Access key handling */
2354    srca = access_prepare(env, a2, l, MMU_DATA_LOAD, MMU_SECONDARY_IDX, ra);
2355    desta = access_prepare(env, a1, l, MMU_DATA_STORE, MMU_PRIMARY_IDX, ra);
2356    access_memmove(env, &desta, &srca, ra);
2357    return cc;
2358}
2359
2360void HELPER(idte)(CPUS390XState *env, uint64_t r1, uint64_t r2, uint32_t m4)
2361{
2362    CPUState *cs = env_cpu(env);
2363    const uintptr_t ra = GETPC();
2364    uint64_t table, entry, raddr;
2365    uint16_t entries, i, index = 0;
2366
2367    if (r2 & 0xff000) {
2368        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2369    }
2370
2371    if (!(r2 & 0x800)) {
2372        /* invalidation-and-clearing operation */
2373        table = r1 & ASCE_ORIGIN;
2374        entries = (r2 & 0x7ff) + 1;
2375
2376        switch (r1 & ASCE_TYPE_MASK) {
2377        case ASCE_TYPE_REGION1:
2378            index = (r2 >> 53) & 0x7ff;
2379            break;
2380        case ASCE_TYPE_REGION2:
2381            index = (r2 >> 42) & 0x7ff;
2382            break;
2383        case ASCE_TYPE_REGION3:
2384            index = (r2 >> 31) & 0x7ff;
2385            break;
2386        case ASCE_TYPE_SEGMENT:
2387            index = (r2 >> 20) & 0x7ff;
2388            break;
2389        }
2390        for (i = 0; i < entries; i++) {
2391            /* addresses are not wrapped in 24/31bit mode but table index is */
2392            raddr = table + ((index + i) & 0x7ff) * sizeof(entry);
2393            entry = cpu_ldq_mmuidx_ra(env, raddr, MMU_REAL_IDX, ra);
2394            if (!(entry & REGION_ENTRY_I)) {
2395                /* we are allowed to not store if already invalid */
2396                entry |= REGION_ENTRY_I;
2397                cpu_stq_mmuidx_ra(env, raddr, entry, MMU_REAL_IDX, ra);
2398            }
2399        }
2400    }
2401
2402    /* We simply flush the complete tlb, therefore we can ignore r3. */
2403    if (m4 & 1) {
2404        tlb_flush(cs);
2405    } else {
2406        tlb_flush_all_cpus_synced(cs);
2407    }
2408}
2409
2410/* invalidate pte */
2411void HELPER(ipte)(CPUS390XState *env, uint64_t pto, uint64_t vaddr,
2412                  uint32_t m4)
2413{
2414    CPUState *cs = env_cpu(env);
2415    const uintptr_t ra = GETPC();
2416    uint64_t page = vaddr & TARGET_PAGE_MASK;
2417    uint64_t pte_addr, pte;
2418
2419    /* Compute the page table entry address */
2420    pte_addr = (pto & SEGMENT_ENTRY_ORIGIN);
2421    pte_addr += VADDR_PAGE_TX(vaddr) * 8;
2422
2423    /* Mark the page table entry as invalid */
2424    pte = cpu_ldq_mmuidx_ra(env, pte_addr, MMU_REAL_IDX, ra);
2425    pte |= PAGE_ENTRY_I;
2426    cpu_stq_mmuidx_ra(env, pte_addr, pte, MMU_REAL_IDX, ra);
2427
2428    /* XXX we exploit the fact that Linux passes the exact virtual
2429       address here - it's not obliged to! */
2430    if (m4 & 1) {
2431        if (vaddr & ~VADDR_PAGE_TX_MASK) {
2432            tlb_flush_page(cs, page);
2433            /* XXX 31-bit hack */
2434            tlb_flush_page(cs, page ^ 0x80000000);
2435        } else {
2436            /* looks like we don't have a valid virtual address */
2437            tlb_flush(cs);
2438        }
2439    } else {
2440        if (vaddr & ~VADDR_PAGE_TX_MASK) {
2441            tlb_flush_page_all_cpus_synced(cs, page);
2442            /* XXX 31-bit hack */
2443            tlb_flush_page_all_cpus_synced(cs, page ^ 0x80000000);
2444        } else {
2445            /* looks like we don't have a valid virtual address */
2446            tlb_flush_all_cpus_synced(cs);
2447        }
2448    }
2449}
2450
2451/* flush local tlb */
2452void HELPER(ptlb)(CPUS390XState *env)
2453{
2454    tlb_flush(env_cpu(env));
2455}
2456
2457/* flush global tlb */
2458void HELPER(purge)(CPUS390XState *env)
2459{
2460    tlb_flush_all_cpus_synced(env_cpu(env));
2461}
2462
2463/* load real address */
2464uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr)
2465{
2466    uint64_t asc = env->psw.mask & PSW_MASK_ASC;
2467    uint64_t ret, tec;
2468    int flags, exc, cc;
2469
2470    /* XXX incomplete - has more corner cases */
2471    if (!(env->psw.mask & PSW_MASK_64) && (addr >> 32)) {
2472        tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, GETPC());
2473    }
2474
2475    exc = mmu_translate(env, addr, MMU_S390_LRA, asc, &ret, &flags, &tec);
2476    if (exc) {
2477        cc = 3;
2478        ret = exc | 0x80000000;
2479    } else {
2480        cc = 0;
2481        ret |= addr & ~TARGET_PAGE_MASK;
2482    }
2483
2484    env->cc_op = cc;
2485    return ret;
2486}
2487#endif
2488
2489/* load pair from quadword */
2490uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr)
2491{
2492    uintptr_t ra = GETPC();
2493    uint64_t hi, lo;
2494
2495    check_alignment(env, addr, 16, ra);
2496    hi = cpu_ldq_data_ra(env, addr + 0, ra);
2497    lo = cpu_ldq_data_ra(env, addr + 8, ra);
2498
2499    env->retxl = lo;
2500    return hi;
2501}
2502
2503uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr)
2504{
2505    uintptr_t ra = GETPC();
2506    uint64_t hi, lo;
2507    int mem_idx;
2508    MemOpIdx oi;
2509    Int128 v;
2510
2511    assert(HAVE_ATOMIC128);
2512
2513    mem_idx = cpu_mmu_index(env, false);
2514    oi = make_memop_idx(MO_TEUQ | MO_ALIGN_16, mem_idx);
2515    v = cpu_atomic_ldo_be_mmu(env, addr, oi, ra);
2516    hi = int128_gethi(v);
2517    lo = int128_getlo(v);
2518
2519    env->retxl = lo;
2520    return hi;
2521}
2522
2523/* store pair to quadword */
2524void HELPER(stpq)(CPUS390XState *env, uint64_t addr,
2525                  uint64_t low, uint64_t high)
2526{
2527    uintptr_t ra = GETPC();
2528
2529    check_alignment(env, addr, 16, ra);
2530    cpu_stq_data_ra(env, addr + 0, high, ra);
2531    cpu_stq_data_ra(env, addr + 8, low, ra);
2532}
2533
2534void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr,
2535                           uint64_t low, uint64_t high)
2536{
2537    uintptr_t ra = GETPC();
2538    int mem_idx;
2539    MemOpIdx oi;
2540    Int128 v;
2541
2542    assert(HAVE_ATOMIC128);
2543
2544    mem_idx = cpu_mmu_index(env, false);
2545    oi = make_memop_idx(MO_TEUQ | MO_ALIGN_16, mem_idx);
2546    v = int128_make128(low, high);
2547    cpu_atomic_sto_be_mmu(env, addr, v, oi, ra);
2548}
2549
2550/* Execute instruction.  This instruction executes an insn modified with
2551   the contents of r1.  It does not change the executed instruction in memory;
2552   it does not change the program counter.
2553
2554   Perform this by recording the modified instruction in env->ex_value.
2555   This will be noticed by cpu_get_tb_cpu_state and thus tb translation.
2556*/
2557void HELPER(ex)(CPUS390XState *env, uint32_t ilen, uint64_t r1, uint64_t addr)
2558{
2559    uint64_t insn = cpu_lduw_code(env, addr);
2560    uint8_t opc = insn >> 8;
2561
2562    /* Or in the contents of R1[56:63].  */
2563    insn |= r1 & 0xff;
2564
2565    /* Load the rest of the instruction.  */
2566    insn <<= 48;
2567    switch (get_ilen(opc)) {
2568    case 2:
2569        break;
2570    case 4:
2571        insn |= (uint64_t)cpu_lduw_code(env, addr + 2) << 32;
2572        break;
2573    case 6:
2574        insn |= (uint64_t)(uint32_t)cpu_ldl_code(env, addr + 2) << 16;
2575        break;
2576    default:
2577        g_assert_not_reached();
2578    }
2579
2580    /* The very most common cases can be sped up by avoiding a new TB.  */
2581    if ((opc & 0xf0) == 0xd0) {
2582        typedef uint32_t (*dx_helper)(CPUS390XState *, uint32_t, uint64_t,
2583                                      uint64_t, uintptr_t);
2584        static const dx_helper dx[16] = {
2585            [0x0] = do_helper_trt_bkwd,
2586            [0x2] = do_helper_mvc,
2587            [0x4] = do_helper_nc,
2588            [0x5] = do_helper_clc,
2589            [0x6] = do_helper_oc,
2590            [0x7] = do_helper_xc,
2591            [0xc] = do_helper_tr,
2592            [0xd] = do_helper_trt_fwd,
2593        };
2594        dx_helper helper = dx[opc & 0xf];
2595
2596        if (helper) {
2597            uint32_t l = extract64(insn, 48, 8);
2598            uint32_t b1 = extract64(insn, 44, 4);
2599            uint32_t d1 = extract64(insn, 32, 12);
2600            uint32_t b2 = extract64(insn, 28, 4);
2601            uint32_t d2 = extract64(insn, 16, 12);
2602            uint64_t a1 = wrap_address(env, (b1 ? env->regs[b1] : 0) + d1);
2603            uint64_t a2 = wrap_address(env, (b2 ? env->regs[b2] : 0) + d2);
2604
2605            env->cc_op = helper(env, l, a1, a2, 0);
2606            env->psw.addr += ilen;
2607            return;
2608        }
2609    } else if (opc == 0x0a) {
2610        env->int_svc_code = extract64(insn, 48, 8);
2611        env->int_svc_ilen = ilen;
2612        helper_exception(env, EXCP_SVC);
2613        g_assert_not_reached();
2614    }
2615
2616    /* Record the insn we want to execute as well as the ilen to use
2617       during the execution of the target insn.  This will also ensure
2618       that ex_value is non-zero, which flags that we are in a state
2619       that requires such execution.  */
2620    env->ex_value = insn | ilen;
2621}
2622
2623uint32_t HELPER(mvcos)(CPUS390XState *env, uint64_t dest, uint64_t src,
2624                       uint64_t len)
2625{
2626    const uint8_t psw_key = (env->psw.mask & PSW_MASK_KEY) >> PSW_SHIFT_KEY;
2627    const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2628    const uint64_t r0 = env->regs[0];
2629    const uintptr_t ra = GETPC();
2630    uint8_t dest_key, dest_as, dest_k, dest_a;
2631    uint8_t src_key, src_as, src_k, src_a;
2632    uint64_t val;
2633    int cc = 0;
2634
2635    HELPER_LOG("%s dest %" PRIx64 ", src %" PRIx64 ", len %" PRIx64 "\n",
2636               __func__, dest, src, len);
2637
2638    if (!(env->psw.mask & PSW_MASK_DAT)) {
2639        tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2640    }
2641
2642    /* OAC (operand access control) for the first operand -> dest */
2643    val = (r0 & 0xffff0000ULL) >> 16;
2644    dest_key = (val >> 12) & 0xf;
2645    dest_as = (val >> 6) & 0x3;
2646    dest_k = (val >> 1) & 0x1;
2647    dest_a = val & 0x1;
2648
2649    /* OAC (operand access control) for the second operand -> src */
2650    val = (r0 & 0x0000ffffULL);
2651    src_key = (val >> 12) & 0xf;
2652    src_as = (val >> 6) & 0x3;
2653    src_k = (val >> 1) & 0x1;
2654    src_a = val & 0x1;
2655
2656    if (!dest_k) {
2657        dest_key = psw_key;
2658    }
2659    if (!src_k) {
2660        src_key = psw_key;
2661    }
2662    if (!dest_a) {
2663        dest_as = psw_as;
2664    }
2665    if (!src_a) {
2666        src_as = psw_as;
2667    }
2668
2669    if (dest_a && dest_as == AS_HOME && (env->psw.mask & PSW_MASK_PSTATE)) {
2670        tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2671    }
2672    if (!(env->cregs[0] & CR0_SECONDARY) &&
2673        (dest_as == AS_SECONDARY || src_as == AS_SECONDARY)) {
2674        tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2675    }
2676    if (!psw_key_valid(env, dest_key) || !psw_key_valid(env, src_key)) {
2677        tcg_s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2678    }
2679
2680    len = wrap_length32(env, len);
2681    if (len > 4096) {
2682        cc = 3;
2683        len = 4096;
2684    }
2685
2686    /* FIXME: AR-mode and proper problem state mode (using PSW keys) missing */
2687    if (src_as == AS_ACCREG || dest_as == AS_ACCREG ||
2688        (env->psw.mask & PSW_MASK_PSTATE)) {
2689        qemu_log_mask(LOG_UNIMP, "%s: AR-mode and PSTATE support missing\n",
2690                      __func__);
2691        tcg_s390_program_interrupt(env, PGM_ADDRESSING, ra);
2692    }
2693
2694    /* FIXME: Access using correct keys and AR-mode */
2695    if (len) {
2696        S390Access srca = access_prepare(env, src, len, MMU_DATA_LOAD,
2697                                         mmu_idx_from_as(src_as), ra);
2698        S390Access desta = access_prepare(env, dest, len, MMU_DATA_STORE,
2699                                          mmu_idx_from_as(dest_as), ra);
2700
2701        access_memmove(env, &desta, &srca, ra);
2702    }
2703
2704    return cc;
2705}
2706
2707/* Decode a Unicode character.  A return value < 0 indicates success, storing
2708   the UTF-32 result into OCHAR and the input length into OLEN.  A return
2709   value >= 0 indicates failure, and the CC value to be returned.  */
2710typedef int (*decode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2711                                 uint64_t ilen, bool enh_check, uintptr_t ra,
2712                                 uint32_t *ochar, uint32_t *olen);
2713
2714/* Encode a Unicode character.  A return value < 0 indicates success, storing
2715   the bytes into ADDR and the output length into OLEN.  A return value >= 0
2716   indicates failure, and the CC value to be returned.  */
2717typedef int (*encode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2718                                 uint64_t ilen, uintptr_t ra, uint32_t c,
2719                                 uint32_t *olen);
2720
2721static int decode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2722                       bool enh_check, uintptr_t ra,
2723                       uint32_t *ochar, uint32_t *olen)
2724{
2725    uint8_t s0, s1, s2, s3;
2726    uint32_t c, l;
2727
2728    if (ilen < 1) {
2729        return 0;
2730    }
2731    s0 = cpu_ldub_data_ra(env, addr, ra);
2732    if (s0 <= 0x7f) {
2733        /* one byte character */
2734        l = 1;
2735        c = s0;
2736    } else if (s0 <= (enh_check ? 0xc1 : 0xbf)) {
2737        /* invalid character */
2738        return 2;
2739    } else if (s0 <= 0xdf) {
2740        /* two byte character */
2741        l = 2;
2742        if (ilen < 2) {
2743            return 0;
2744        }
2745        s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2746        c = s0 & 0x1f;
2747        c = (c << 6) | (s1 & 0x3f);
2748        if (enh_check && (s1 & 0xc0) != 0x80) {
2749            return 2;
2750        }
2751    } else if (s0 <= 0xef) {
2752        /* three byte character */
2753        l = 3;
2754        if (ilen < 3) {
2755            return 0;
2756        }
2757        s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2758        s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2759        c = s0 & 0x0f;
2760        c = (c << 6) | (s1 & 0x3f);
2761        c = (c << 6) | (s2 & 0x3f);
2762        /* Fold the byte-by-byte range descriptions in the PoO into
2763           tests against the complete value.  It disallows encodings
2764           that could be smaller, and the UTF-16 surrogates.  */
2765        if (enh_check
2766            && ((s1 & 0xc0) != 0x80
2767                || (s2 & 0xc0) != 0x80
2768                || c < 0x1000
2769                || (c >= 0xd800 && c <= 0xdfff))) {
2770            return 2;
2771        }
2772    } else if (s0 <= (enh_check ? 0xf4 : 0xf7)) {
2773        /* four byte character */
2774        l = 4;
2775        if (ilen < 4) {
2776            return 0;
2777        }
2778        s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2779        s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2780        s3 = cpu_ldub_data_ra(env, addr + 3, ra);
2781        c = s0 & 0x07;
2782        c = (c << 6) | (s1 & 0x3f);
2783        c = (c << 6) | (s2 & 0x3f);
2784        c = (c << 6) | (s3 & 0x3f);
2785        /* See above.  */
2786        if (enh_check
2787            && ((s1 & 0xc0) != 0x80
2788                || (s2 & 0xc0) != 0x80
2789                || (s3 & 0xc0) != 0x80
2790                || c < 0x010000
2791                || c > 0x10ffff)) {
2792            return 2;
2793        }
2794    } else {
2795        /* invalid character */
2796        return 2;
2797    }
2798
2799    *ochar = c;
2800    *olen = l;
2801    return -1;
2802}
2803
2804static int decode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2805                        bool enh_check, uintptr_t ra,
2806                        uint32_t *ochar, uint32_t *olen)
2807{
2808    uint16_t s0, s1;
2809    uint32_t c, l;
2810
2811    if (ilen < 2) {
2812        return 0;
2813    }
2814    s0 = cpu_lduw_data_ra(env, addr, ra);
2815    if ((s0 & 0xfc00) != 0xd800) {
2816        /* one word character */
2817        l = 2;
2818        c = s0;
2819    } else {
2820        /* two word character */
2821        l = 4;
2822        if (ilen < 4) {
2823            return 0;
2824        }
2825        s1 = cpu_lduw_data_ra(env, addr + 2, ra);
2826        c = extract32(s0, 6, 4) + 1;
2827        c = (c << 6) | (s0 & 0x3f);
2828        c = (c << 10) | (s1 & 0x3ff);
2829        if (enh_check && (s1 & 0xfc00) != 0xdc00) {
2830            /* invalid surrogate character */
2831            return 2;
2832        }
2833    }
2834
2835    *ochar = c;
2836    *olen = l;
2837    return -1;
2838}
2839
2840static int decode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2841                        bool enh_check, uintptr_t ra,
2842                        uint32_t *ochar, uint32_t *olen)
2843{
2844    uint32_t c;
2845
2846    if (ilen < 4) {
2847        return 0;
2848    }
2849    c = cpu_ldl_data_ra(env, addr, ra);
2850    if ((c >= 0xd800 && c <= 0xdbff) || c > 0x10ffff) {
2851        /* invalid unicode character */
2852        return 2;
2853    }
2854
2855    *ochar = c;
2856    *olen = 4;
2857    return -1;
2858}
2859
2860static int encode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2861                       uintptr_t ra, uint32_t c, uint32_t *olen)
2862{
2863    uint8_t d[4];
2864    uint32_t l, i;
2865
2866    if (c <= 0x7f) {
2867        /* one byte character */
2868        l = 1;
2869        d[0] = c;
2870    } else if (c <= 0x7ff) {
2871        /* two byte character */
2872        l = 2;
2873        d[1] = 0x80 | extract32(c, 0, 6);
2874        d[0] = 0xc0 | extract32(c, 6, 5);
2875    } else if (c <= 0xffff) {
2876        /* three byte character */
2877        l = 3;
2878        d[2] = 0x80 | extract32(c, 0, 6);
2879        d[1] = 0x80 | extract32(c, 6, 6);
2880        d[0] = 0xe0 | extract32(c, 12, 4);
2881    } else {
2882        /* four byte character */
2883        l = 4;
2884        d[3] = 0x80 | extract32(c, 0, 6);
2885        d[2] = 0x80 | extract32(c, 6, 6);
2886        d[1] = 0x80 | extract32(c, 12, 6);
2887        d[0] = 0xf0 | extract32(c, 18, 3);
2888    }
2889
2890    if (ilen < l) {
2891        return 1;
2892    }
2893    for (i = 0; i < l; ++i) {
2894        cpu_stb_data_ra(env, addr + i, d[i], ra);
2895    }
2896
2897    *olen = l;
2898    return -1;
2899}
2900
2901static int encode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2902                        uintptr_t ra, uint32_t c, uint32_t *olen)
2903{
2904    uint16_t d0, d1;
2905
2906    if (c <= 0xffff) {
2907        /* one word character */
2908        if (ilen < 2) {
2909            return 1;
2910        }
2911        cpu_stw_data_ra(env, addr, c, ra);
2912        *olen = 2;
2913    } else {
2914        /* two word character */
2915        if (ilen < 4) {
2916            return 1;
2917        }
2918        d1 = 0xdc00 | extract32(c, 0, 10);
2919        d0 = 0xd800 | extract32(c, 10, 6);
2920        d0 = deposit32(d0, 6, 4, extract32(c, 16, 5) - 1);
2921        cpu_stw_data_ra(env, addr + 0, d0, ra);
2922        cpu_stw_data_ra(env, addr + 2, d1, ra);
2923        *olen = 4;
2924    }
2925
2926    return -1;
2927}
2928
2929static int encode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2930                        uintptr_t ra, uint32_t c, uint32_t *olen)
2931{
2932    if (ilen < 4) {
2933        return 1;
2934    }
2935    cpu_stl_data_ra(env, addr, c, ra);
2936    *olen = 4;
2937    return -1;
2938}
2939
2940static inline uint32_t convert_unicode(CPUS390XState *env, uint32_t r1,
2941                                       uint32_t r2, uint32_t m3, uintptr_t ra,
2942                                       decode_unicode_fn decode,
2943                                       encode_unicode_fn encode)
2944{
2945    uint64_t dst = get_address(env, r1);
2946    uint64_t dlen = get_length(env, r1 + 1);
2947    uint64_t src = get_address(env, r2);
2948    uint64_t slen = get_length(env, r2 + 1);
2949    bool enh_check = m3 & 1;
2950    int cc, i;
2951
2952    /* Lest we fail to service interrupts in a timely manner, limit the
2953       amount of work we're willing to do.  For now, let's cap at 256.  */
2954    for (i = 0; i < 256; ++i) {
2955        uint32_t c, ilen, olen;
2956
2957        cc = decode(env, src, slen, enh_check, ra, &c, &ilen);
2958        if (unlikely(cc >= 0)) {
2959            break;
2960        }
2961        cc = encode(env, dst, dlen, ra, c, &olen);
2962        if (unlikely(cc >= 0)) {
2963            break;
2964        }
2965
2966        src += ilen;
2967        slen -= ilen;
2968        dst += olen;
2969        dlen -= olen;
2970        cc = 3;
2971    }
2972
2973    set_address(env, r1, dst);
2974    set_length(env, r1 + 1, dlen);
2975    set_address(env, r2, src);
2976    set_length(env, r2 + 1, slen);
2977
2978    return cc;
2979}
2980
2981uint32_t HELPER(cu12)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2982{
2983    return convert_unicode(env, r1, r2, m3, GETPC(),
2984                           decode_utf8, encode_utf16);
2985}
2986
2987uint32_t HELPER(cu14)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2988{
2989    return convert_unicode(env, r1, r2, m3, GETPC(),
2990                           decode_utf8, encode_utf32);
2991}
2992
2993uint32_t HELPER(cu21)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2994{
2995    return convert_unicode(env, r1, r2, m3, GETPC(),
2996                           decode_utf16, encode_utf8);
2997}
2998
2999uint32_t HELPER(cu24)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
3000{
3001    return convert_unicode(env, r1, r2, m3, GETPC(),
3002                           decode_utf16, encode_utf32);
3003}
3004
3005uint32_t HELPER(cu41)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
3006{
3007    return convert_unicode(env, r1, r2, m3, GETPC(),
3008                           decode_utf32, encode_utf8);
3009}
3010
3011uint32_t HELPER(cu42)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
3012{
3013    return convert_unicode(env, r1, r2, m3, GETPC(),
3014                           decode_utf32, encode_utf16);
3015}
3016
3017void probe_write_access(CPUS390XState *env, uint64_t addr, uint64_t len,
3018                        uintptr_t ra)
3019{
3020    /* test the actual access, not just any access to the page due to LAP */
3021    while (len) {
3022        const uint64_t pagelen = -(addr | TARGET_PAGE_MASK);
3023        const uint64_t curlen = MIN(pagelen, len);
3024
3025        probe_write(env, addr, curlen, cpu_mmu_index(env, false), ra);
3026        addr = wrap_address(env, addr + curlen);
3027        len -= curlen;
3028    }
3029}
3030
3031void HELPER(probe_write_access)(CPUS390XState *env, uint64_t addr, uint64_t len)
3032{
3033    probe_write_access(env, addr, len, GETPC());
3034}
3035