qemu/target/s390x/tcg/mem_helper.c
<<
>>
Prefs
   1/*
   2 *  S/390 memory access helper routines
   3 *
   4 *  Copyright (c) 2009 Ulrich Hecht
   5 *  Copyright (c) 2009 Alexander Graf
   6 *
   7 * This library is free software; you can redistribute it and/or
   8 * modify it under the terms of the GNU Lesser General Public
   9 * License as published by the Free Software Foundation; either
  10 * version 2.1 of the License, or (at your option) any later version.
  11 *
  12 * This library is distributed in the hope that it will be useful,
  13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 * Lesser General Public License for more details.
  16 *
  17 * You should have received a copy of the GNU Lesser General Public
  18 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  19 */
  20
  21#include "qemu/osdep.h"
  22#include "cpu.h"
  23#include "s390x-internal.h"
  24#include "tcg_s390x.h"
  25#include "exec/helper-proto.h"
  26#include "exec/exec-all.h"
  27#include "exec/cpu_ldst.h"
  28#include "qemu/int128.h"
  29#include "qemu/atomic128.h"
  30#include "tcg/tcg.h"
  31
  32#if !defined(CONFIG_USER_ONLY)
  33#include "hw/s390x/storage-keys.h"
  34#include "hw/boards.h"
  35#endif
  36
  37/*****************************************************************************/
  38/* Softmmu support */
  39
  40/* #define DEBUG_HELPER */
  41#ifdef DEBUG_HELPER
  42#define HELPER_LOG(x...) qemu_log(x)
  43#else
  44#define HELPER_LOG(x...)
  45#endif
  46
  47static inline bool psw_key_valid(CPUS390XState *env, uint8_t psw_key)
  48{
  49    uint16_t pkm = env->cregs[3] >> 16;
  50
  51    if (env->psw.mask & PSW_MASK_PSTATE) {
  52        /* PSW key has range 0..15, it is valid if the bit is 1 in the PKM */
  53        return pkm & (0x80 >> psw_key);
  54    }
  55    return true;
  56}
  57
  58static bool is_destructive_overlap(CPUS390XState *env, uint64_t dest,
  59                                   uint64_t src, uint32_t len)
  60{
  61    if (!len || src == dest) {
  62        return false;
  63    }
  64    /* Take care of wrapping at the end of address space. */
  65    if (unlikely(wrap_address(env, src + len - 1) < src)) {
  66        return dest > src || dest <= wrap_address(env, src + len - 1);
  67    }
  68    return dest > src && dest <= src + len - 1;
  69}
  70
  71/* Trigger a SPECIFICATION exception if an address or a length is not
  72   naturally aligned.  */
  73static inline void check_alignment(CPUS390XState *env, uint64_t v,
  74                                   int wordsize, uintptr_t ra)
  75{
  76    if (v % wordsize) {
  77        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
  78    }
  79}
  80
  81/* Load a value from memory according to its size.  */
  82static inline uint64_t cpu_ldusize_data_ra(CPUS390XState *env, uint64_t addr,
  83                                           int wordsize, uintptr_t ra)
  84{
  85    switch (wordsize) {
  86    case 1:
  87        return cpu_ldub_data_ra(env, addr, ra);
  88    case 2:
  89        return cpu_lduw_data_ra(env, addr, ra);
  90    default:
  91        abort();
  92    }
  93}
  94
  95/* Store a to memory according to its size.  */
  96static inline void cpu_stsize_data_ra(CPUS390XState *env, uint64_t addr,
  97                                      uint64_t value, int wordsize,
  98                                      uintptr_t ra)
  99{
 100    switch (wordsize) {
 101    case 1:
 102        cpu_stb_data_ra(env, addr, value, ra);
 103        break;
 104    case 2:
 105        cpu_stw_data_ra(env, addr, value, ra);
 106        break;
 107    default:
 108        abort();
 109    }
 110}
 111
 112/* An access covers at most 4096 bytes and therefore at most two pages. */
 113typedef struct S390Access {
 114    target_ulong vaddr1;
 115    target_ulong vaddr2;
 116    char *haddr1;
 117    char *haddr2;
 118    uint16_t size1;
 119    uint16_t size2;
 120    /*
 121     * If we can't access the host page directly, we'll have to do I/O access
 122     * via ld/st helpers. These are internal details, so we store the
 123     * mmu idx to do the access here instead of passing it around in the
 124     * helpers. Maybe, one day we can get rid of ld/st access - once we can
 125     * handle TLB_NOTDIRTY differently. We don't expect these special accesses
 126     * to trigger exceptions - only if we would have TLB_NOTDIRTY on LAP
 127     * pages, we might trigger a new MMU translation - very unlikely that
 128     * the mapping changes in between and we would trigger a fault.
 129     */
 130    int mmu_idx;
 131} S390Access;
 132
 133/*
 134 * With nonfault=1, return the PGM_ exception that would have been injected
 135 * into the guest; return 0 if no exception was detected.
 136 *
 137 * For !CONFIG_USER_ONLY, the TEC is stored stored to env->tlb_fill_tec.
 138 * For CONFIG_USER_ONLY, the faulting address is stored to env->__excp_addr.
 139 */
 140static int s390_probe_access(CPUArchState *env, target_ulong addr, int size,
 141                             MMUAccessType access_type, int mmu_idx,
 142                             bool nonfault, void **phost, uintptr_t ra)
 143{
 144    int flags;
 145
 146#if defined(CONFIG_USER_ONLY)
 147    flags = page_get_flags(addr);
 148    if (!(flags & (access_type == MMU_DATA_LOAD ?  PAGE_READ : PAGE_WRITE_ORG))) {
 149        env->__excp_addr = addr;
 150        flags = (flags & PAGE_VALID) ? PGM_PROTECTION : PGM_ADDRESSING;
 151        if (nonfault) {
 152            return flags;
 153        }
 154        tcg_s390_program_interrupt(env, flags, ra);
 155    }
 156    *phost = g2h(env_cpu(env), addr);
 157#else
 158    /*
 159     * For !CONFIG_USER_ONLY, we cannot rely on TLB_INVALID_MASK or haddr==NULL
 160     * to detect if there was an exception during tlb_fill().
 161     */
 162    env->tlb_fill_exc = 0;
 163    flags = probe_access_flags(env, addr, access_type, mmu_idx, nonfault, phost,
 164                               ra);
 165    if (env->tlb_fill_exc) {
 166        return env->tlb_fill_exc;
 167    }
 168
 169    if (unlikely(flags & TLB_WATCHPOINT)) {
 170        /* S390 does not presently use transaction attributes. */
 171        cpu_check_watchpoint(env_cpu(env), addr, size,
 172                             MEMTXATTRS_UNSPECIFIED,
 173                             (access_type == MMU_DATA_STORE
 174                              ? BP_MEM_WRITE : BP_MEM_READ), ra);
 175    }
 176#endif
 177    return 0;
 178}
 179
 180static int access_prepare_nf(S390Access *access, CPUS390XState *env,
 181                             bool nonfault, vaddr vaddr1, int size,
 182                             MMUAccessType access_type,
 183                             int mmu_idx, uintptr_t ra)
 184{
 185    void *haddr1, *haddr2 = NULL;
 186    int size1, size2, exc;
 187    vaddr vaddr2 = 0;
 188
 189    assert(size > 0 && size <= 4096);
 190
 191    size1 = MIN(size, -(vaddr1 | TARGET_PAGE_MASK)),
 192    size2 = size - size1;
 193
 194    exc = s390_probe_access(env, vaddr1, size1, access_type, mmu_idx, nonfault,
 195                            &haddr1, ra);
 196    if (exc) {
 197        return exc;
 198    }
 199    if (unlikely(size2)) {
 200        /* The access crosses page boundaries. */
 201        vaddr2 = wrap_address(env, vaddr1 + size1);
 202        exc = s390_probe_access(env, vaddr2, size2, access_type, mmu_idx,
 203                                nonfault, &haddr2, ra);
 204        if (exc) {
 205            return exc;
 206        }
 207    }
 208
 209    *access = (S390Access) {
 210        .vaddr1 = vaddr1,
 211        .vaddr2 = vaddr2,
 212        .haddr1 = haddr1,
 213        .haddr2 = haddr2,
 214        .size1 = size1,
 215        .size2 = size2,
 216        .mmu_idx = mmu_idx
 217    };
 218    return 0;
 219}
 220
 221static S390Access access_prepare(CPUS390XState *env, vaddr vaddr, int size,
 222                                 MMUAccessType access_type, int mmu_idx,
 223                                 uintptr_t ra)
 224{
 225    S390Access ret;
 226    int exc = access_prepare_nf(&ret, env, false, vaddr, size,
 227                                access_type, mmu_idx, ra);
 228    assert(!exc);
 229    return ret;
 230}
 231
 232/* Helper to handle memset on a single page. */
 233static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr,
 234                             uint8_t byte, uint16_t size, int mmu_idx,
 235                             uintptr_t ra)
 236{
 237#ifdef CONFIG_USER_ONLY
 238    g_assert(haddr);
 239    memset(haddr, byte, size);
 240#else
 241    TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
 242    int i;
 243
 244    if (likely(haddr)) {
 245        memset(haddr, byte, size);
 246    } else {
 247        /*
 248         * Do a single access and test if we can then get access to the
 249         * page. This is especially relevant to speed up TLB_NOTDIRTY.
 250         */
 251        g_assert(size > 0);
 252        helper_ret_stb_mmu(env, vaddr, byte, oi, ra);
 253        haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
 254        if (likely(haddr)) {
 255            memset(haddr + 1, byte, size - 1);
 256        } else {
 257            for (i = 1; i < size; i++) {
 258                helper_ret_stb_mmu(env, vaddr + i, byte, oi, ra);
 259            }
 260        }
 261    }
 262#endif
 263}
 264
 265static void access_memset(CPUS390XState *env, S390Access *desta,
 266                          uint8_t byte, uintptr_t ra)
 267{
 268
 269    do_access_memset(env, desta->vaddr1, desta->haddr1, byte, desta->size1,
 270                     desta->mmu_idx, ra);
 271    if (likely(!desta->size2)) {
 272        return;
 273    }
 274    do_access_memset(env, desta->vaddr2, desta->haddr2, byte, desta->size2,
 275                     desta->mmu_idx, ra);
 276}
 277
 278static uint8_t do_access_get_byte(CPUS390XState *env, vaddr vaddr, char **haddr,
 279                                  int offset, int mmu_idx, uintptr_t ra)
 280{
 281#ifdef CONFIG_USER_ONLY
 282    return ldub_p(*haddr + offset);
 283#else
 284    TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
 285    uint8_t byte;
 286
 287    if (likely(*haddr)) {
 288        return ldub_p(*haddr + offset);
 289    }
 290    /*
 291     * Do a single access and test if we can then get access to the
 292     * page. This is especially relevant to speed up TLB_NOTDIRTY.
 293     */
 294    byte = helper_ret_ldub_mmu(env, vaddr + offset, oi, ra);
 295    *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_LOAD, mmu_idx);
 296    return byte;
 297#endif
 298}
 299
 300static uint8_t access_get_byte(CPUS390XState *env, S390Access *access,
 301                               int offset, uintptr_t ra)
 302{
 303    if (offset < access->size1) {
 304        return do_access_get_byte(env, access->vaddr1, &access->haddr1,
 305                                  offset, access->mmu_idx, ra);
 306    }
 307    return do_access_get_byte(env, access->vaddr2, &access->haddr2,
 308                              offset - access->size1, access->mmu_idx, ra);
 309}
 310
 311static void do_access_set_byte(CPUS390XState *env, vaddr vaddr, char **haddr,
 312                               int offset, uint8_t byte, int mmu_idx,
 313                               uintptr_t ra)
 314{
 315#ifdef CONFIG_USER_ONLY
 316    stb_p(*haddr + offset, byte);
 317#else
 318    TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
 319
 320    if (likely(*haddr)) {
 321        stb_p(*haddr + offset, byte);
 322        return;
 323    }
 324    /*
 325     * Do a single access and test if we can then get access to the
 326     * page. This is especially relevant to speed up TLB_NOTDIRTY.
 327     */
 328    helper_ret_stb_mmu(env, vaddr + offset, byte, oi, ra);
 329    *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
 330#endif
 331}
 332
 333static void access_set_byte(CPUS390XState *env, S390Access *access,
 334                            int offset, uint8_t byte, uintptr_t ra)
 335{
 336    if (offset < access->size1) {
 337        do_access_set_byte(env, access->vaddr1, &access->haddr1, offset, byte,
 338                           access->mmu_idx, ra);
 339    } else {
 340        do_access_set_byte(env, access->vaddr2, &access->haddr2,
 341                           offset - access->size1, byte, access->mmu_idx, ra);
 342    }
 343}
 344
 345/*
 346 * Move data with the same semantics as memmove() in case ranges don't overlap
 347 * or src > dest. Undefined behavior on destructive overlaps.
 348 */
 349static void access_memmove(CPUS390XState *env, S390Access *desta,
 350                           S390Access *srca, uintptr_t ra)
 351{
 352    int diff;
 353
 354    g_assert(desta->size1 + desta->size2 == srca->size1 + srca->size2);
 355
 356    /* Fallback to slow access in case we don't have access to all host pages */
 357    if (unlikely(!desta->haddr1 || (desta->size2 && !desta->haddr2) ||
 358                 !srca->haddr1 || (srca->size2 && !srca->haddr2))) {
 359        int i;
 360
 361        for (i = 0; i < desta->size1 + desta->size2; i++) {
 362            uint8_t byte = access_get_byte(env, srca, i, ra);
 363
 364            access_set_byte(env, desta, i, byte, ra);
 365        }
 366        return;
 367    }
 368
 369    if (srca->size1 == desta->size1) {
 370        memmove(desta->haddr1, srca->haddr1, srca->size1);
 371        if (unlikely(srca->size2)) {
 372            memmove(desta->haddr2, srca->haddr2, srca->size2);
 373        }
 374    } else if (srca->size1 < desta->size1) {
 375        diff = desta->size1 - srca->size1;
 376        memmove(desta->haddr1, srca->haddr1, srca->size1);
 377        memmove(desta->haddr1 + srca->size1, srca->haddr2, diff);
 378        if (likely(desta->size2)) {
 379            memmove(desta->haddr2, srca->haddr2 + diff, desta->size2);
 380        }
 381    } else {
 382        diff = srca->size1 - desta->size1;
 383        memmove(desta->haddr1, srca->haddr1, desta->size1);
 384        memmove(desta->haddr2, srca->haddr1 + desta->size1, diff);
 385        if (likely(srca->size2)) {
 386            memmove(desta->haddr2 + diff, srca->haddr2, srca->size2);
 387        }
 388    }
 389}
 390
 391static int mmu_idx_from_as(uint8_t as)
 392{
 393    switch (as) {
 394    case AS_PRIMARY:
 395        return MMU_PRIMARY_IDX;
 396    case AS_SECONDARY:
 397        return MMU_SECONDARY_IDX;
 398    case AS_HOME:
 399        return MMU_HOME_IDX;
 400    default:
 401        /* FIXME AS_ACCREG */
 402        g_assert_not_reached();
 403    }
 404}
 405
 406/* and on array */
 407static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest,
 408                             uint64_t src, uintptr_t ra)
 409{
 410    const int mmu_idx = cpu_mmu_index(env, false);
 411    S390Access srca1, srca2, desta;
 412    uint32_t i;
 413    uint8_t c = 0;
 414
 415    HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
 416               __func__, l, dest, src);
 417
 418    /* NC always processes one more byte than specified - maximum is 256 */
 419    l++;
 420
 421    srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 422    srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
 423    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 424    for (i = 0; i < l; i++) {
 425        const uint8_t x = access_get_byte(env, &srca1, i, ra) &
 426                          access_get_byte(env, &srca2, i, ra);
 427
 428        c |= x;
 429        access_set_byte(env, &desta, i, x, ra);
 430    }
 431    return c != 0;
 432}
 433
 434uint32_t HELPER(nc)(CPUS390XState *env, uint32_t l, uint64_t dest,
 435                    uint64_t src)
 436{
 437    return do_helper_nc(env, l, dest, src, GETPC());
 438}
 439
 440/* xor on array */
 441static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest,
 442                             uint64_t src, uintptr_t ra)
 443{
 444    const int mmu_idx = cpu_mmu_index(env, false);
 445    S390Access srca1, srca2, desta;
 446    uint32_t i;
 447    uint8_t c = 0;
 448
 449    HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
 450               __func__, l, dest, src);
 451
 452    /* XC always processes one more byte than specified - maximum is 256 */
 453    l++;
 454
 455    srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 456    srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
 457    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 458
 459    /* xor with itself is the same as memset(0) */
 460    if (src == dest) {
 461        access_memset(env, &desta, 0, ra);
 462        return 0;
 463    }
 464
 465    for (i = 0; i < l; i++) {
 466        const uint8_t x = access_get_byte(env, &srca1, i, ra) ^
 467                          access_get_byte(env, &srca2, i, ra);
 468
 469        c |= x;
 470        access_set_byte(env, &desta, i, x, ra);
 471    }
 472    return c != 0;
 473}
 474
 475uint32_t HELPER(xc)(CPUS390XState *env, uint32_t l, uint64_t dest,
 476                    uint64_t src)
 477{
 478    return do_helper_xc(env, l, dest, src, GETPC());
 479}
 480
 481/* or on array */
 482static uint32_t do_helper_oc(CPUS390XState *env, uint32_t l, uint64_t dest,
 483                             uint64_t src, uintptr_t ra)
 484{
 485    const int mmu_idx = cpu_mmu_index(env, false);
 486    S390Access srca1, srca2, desta;
 487    uint32_t i;
 488    uint8_t c = 0;
 489
 490    HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
 491               __func__, l, dest, src);
 492
 493    /* OC always processes one more byte than specified - maximum is 256 */
 494    l++;
 495
 496    srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 497    srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
 498    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 499    for (i = 0; i < l; i++) {
 500        const uint8_t x = access_get_byte(env, &srca1, i, ra) |
 501                          access_get_byte(env, &srca2, i, ra);
 502
 503        c |= x;
 504        access_set_byte(env, &desta, i, x, ra);
 505    }
 506    return c != 0;
 507}
 508
 509uint32_t HELPER(oc)(CPUS390XState *env, uint32_t l, uint64_t dest,
 510                    uint64_t src)
 511{
 512    return do_helper_oc(env, l, dest, src, GETPC());
 513}
 514
 515/* memmove */
 516static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
 517                              uint64_t src, uintptr_t ra)
 518{
 519    const int mmu_idx = cpu_mmu_index(env, false);
 520    S390Access srca, desta;
 521    uint32_t i;
 522
 523    HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
 524               __func__, l, dest, src);
 525
 526    /* MVC always copies one more byte than specified - maximum is 256 */
 527    l++;
 528
 529    srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 530    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 531
 532    /*
 533     * "When the operands overlap, the result is obtained as if the operands
 534     * were processed one byte at a time". Only non-destructive overlaps
 535     * behave like memmove().
 536     */
 537    if (dest == src + 1) {
 538        access_memset(env, &desta, access_get_byte(env, &srca, 0, ra), ra);
 539    } else if (!is_destructive_overlap(env, dest, src, l)) {
 540        access_memmove(env, &desta, &srca, ra);
 541    } else {
 542        for (i = 0; i < l; i++) {
 543            uint8_t byte = access_get_byte(env, &srca, i, ra);
 544
 545            access_set_byte(env, &desta, i, byte, ra);
 546        }
 547    }
 548
 549    return env->cc_op;
 550}
 551
 552void HELPER(mvc)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 553{
 554    do_helper_mvc(env, l, dest, src, GETPC());
 555}
 556
 557/* move inverse  */
 558void HELPER(mvcin)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 559{
 560    const int mmu_idx = cpu_mmu_index(env, false);
 561    S390Access srca, desta;
 562    uintptr_t ra = GETPC();
 563    int i;
 564
 565    /* MVCIN always copies one more byte than specified - maximum is 256 */
 566    l++;
 567
 568    src = wrap_address(env, src - l + 1);
 569    srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 570    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 571    for (i = 0; i < l; i++) {
 572        const uint8_t x = access_get_byte(env, &srca, l - i - 1, ra);
 573
 574        access_set_byte(env, &desta, i, x, ra);
 575    }
 576}
 577
 578/* move numerics  */
 579void HELPER(mvn)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 580{
 581    const int mmu_idx = cpu_mmu_index(env, false);
 582    S390Access srca1, srca2, desta;
 583    uintptr_t ra = GETPC();
 584    int i;
 585
 586    /* MVN always copies one more byte than specified - maximum is 256 */
 587    l++;
 588
 589    srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 590    srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
 591    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 592    for (i = 0; i < l; i++) {
 593        const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0x0f) |
 594                          (access_get_byte(env, &srca2, i, ra) & 0xf0);
 595
 596        access_set_byte(env, &desta, i, x, ra);
 597    }
 598}
 599
 600/* move with offset  */
 601void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 602{
 603    const int mmu_idx = cpu_mmu_index(env, false);
 604    /* MVO always processes one more byte than specified - maximum is 16 */
 605    const int len_dest = (l >> 4) + 1;
 606    const int len_src = (l & 0xf) + 1;
 607    uintptr_t ra = GETPC();
 608    uint8_t byte_dest, byte_src;
 609    S390Access srca, desta;
 610    int i, j;
 611
 612    srca = access_prepare(env, src, len_src, MMU_DATA_LOAD, mmu_idx, ra);
 613    desta = access_prepare(env, dest, len_dest, MMU_DATA_STORE, mmu_idx, ra);
 614
 615    /* Handle rightmost byte */
 616    byte_dest = cpu_ldub_data_ra(env, dest + len_dest - 1, ra);
 617    byte_src = access_get_byte(env, &srca, len_src - 1, ra);
 618    byte_dest = (byte_dest & 0x0f) | (byte_src << 4);
 619    access_set_byte(env, &desta, len_dest - 1, byte_dest, ra);
 620
 621    /* Process remaining bytes from right to left */
 622    for (i = len_dest - 2, j = len_src - 2; i >= 0; i--, j--) {
 623        byte_dest = byte_src >> 4;
 624        if (j >= 0) {
 625            byte_src = access_get_byte(env, &srca, j, ra);
 626        } else {
 627            byte_src = 0;
 628        }
 629        byte_dest |= byte_src << 4;
 630        access_set_byte(env, &desta, i, byte_dest, ra);
 631    }
 632}
 633
 634/* move zones  */
 635void HELPER(mvz)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 636{
 637    const int mmu_idx = cpu_mmu_index(env, false);
 638    S390Access srca1, srca2, desta;
 639    uintptr_t ra = GETPC();
 640    int i;
 641
 642    /* MVZ always copies one more byte than specified - maximum is 256 */
 643    l++;
 644
 645    srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 646    srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
 647    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 648    for (i = 0; i < l; i++) {
 649        const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0xf0) |
 650                          (access_get_byte(env, &srca2, i, ra) & 0x0f);
 651
 652        access_set_byte(env, &desta, i, x, ra);
 653    }
 654}
 655
 656/* compare unsigned byte arrays */
 657static uint32_t do_helper_clc(CPUS390XState *env, uint32_t l, uint64_t s1,
 658                              uint64_t s2, uintptr_t ra)
 659{
 660    uint32_t i;
 661    uint32_t cc = 0;
 662
 663    HELPER_LOG("%s l %d s1 %" PRIx64 " s2 %" PRIx64 "\n",
 664               __func__, l, s1, s2);
 665
 666    for (i = 0; i <= l; i++) {
 667        uint8_t x = cpu_ldub_data_ra(env, s1 + i, ra);
 668        uint8_t y = cpu_ldub_data_ra(env, s2 + i, ra);
 669        HELPER_LOG("%02x (%c)/%02x (%c) ", x, x, y, y);
 670        if (x < y) {
 671            cc = 1;
 672            break;
 673        } else if (x > y) {
 674            cc = 2;
 675            break;
 676        }
 677    }
 678
 679    HELPER_LOG("\n");
 680    return cc;
 681}
 682
 683uint32_t HELPER(clc)(CPUS390XState *env, uint32_t l, uint64_t s1, uint64_t s2)
 684{
 685    return do_helper_clc(env, l, s1, s2, GETPC());
 686}
 687
 688/* compare logical under mask */
 689uint32_t HELPER(clm)(CPUS390XState *env, uint32_t r1, uint32_t mask,
 690                     uint64_t addr)
 691{
 692    uintptr_t ra = GETPC();
 693    uint32_t cc = 0;
 694
 695    HELPER_LOG("%s: r1 0x%x mask 0x%x addr 0x%" PRIx64 "\n", __func__, r1,
 696               mask, addr);
 697
 698    while (mask) {
 699        if (mask & 8) {
 700            uint8_t d = cpu_ldub_data_ra(env, addr, ra);
 701            uint8_t r = extract32(r1, 24, 8);
 702            HELPER_LOG("mask 0x%x %02x/%02x (0x%" PRIx64 ") ", mask, r, d,
 703                       addr);
 704            if (r < d) {
 705                cc = 1;
 706                break;
 707            } else if (r > d) {
 708                cc = 2;
 709                break;
 710            }
 711            addr++;
 712        }
 713        mask = (mask << 1) & 0xf;
 714        r1 <<= 8;
 715    }
 716
 717    HELPER_LOG("\n");
 718    return cc;
 719}
 720
 721static inline uint64_t get_address(CPUS390XState *env, int reg)
 722{
 723    return wrap_address(env, env->regs[reg]);
 724}
 725
 726/*
 727 * Store the address to the given register, zeroing out unused leftmost
 728 * bits in bit positions 32-63 (24-bit and 31-bit mode only).
 729 */
 730static inline void set_address_zero(CPUS390XState *env, int reg,
 731                                    uint64_t address)
 732{
 733    if (env->psw.mask & PSW_MASK_64) {
 734        env->regs[reg] = address;
 735    } else {
 736        if (!(env->psw.mask & PSW_MASK_32)) {
 737            address &= 0x00ffffff;
 738        } else {
 739            address &= 0x7fffffff;
 740        }
 741        env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
 742    }
 743}
 744
 745static inline void set_address(CPUS390XState *env, int reg, uint64_t address)
 746{
 747    if (env->psw.mask & PSW_MASK_64) {
 748        /* 64-Bit mode */
 749        env->regs[reg] = address;
 750    } else {
 751        if (!(env->psw.mask & PSW_MASK_32)) {
 752            /* 24-Bit mode. According to the PoO it is implementation
 753            dependent if bits 32-39 remain unchanged or are set to
 754            zeros.  Choose the former so that the function can also be
 755            used for TRT.  */
 756            env->regs[reg] = deposit64(env->regs[reg], 0, 24, address);
 757        } else {
 758            /* 31-Bit mode. According to the PoO it is implementation
 759            dependent if bit 32 remains unchanged or is set to zero.
 760            Choose the latter so that the function can also be used for
 761            TRT.  */
 762            address &= 0x7fffffff;
 763            env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
 764        }
 765    }
 766}
 767
 768static inline uint64_t wrap_length32(CPUS390XState *env, uint64_t length)
 769{
 770    if (!(env->psw.mask & PSW_MASK_64)) {
 771        return (uint32_t)length;
 772    }
 773    return length;
 774}
 775
 776static inline uint64_t wrap_length31(CPUS390XState *env, uint64_t length)
 777{
 778    if (!(env->psw.mask & PSW_MASK_64)) {
 779        /* 24-Bit and 31-Bit mode */
 780        length &= 0x7fffffff;
 781    }
 782    return length;
 783}
 784
 785static inline uint64_t get_length(CPUS390XState *env, int reg)
 786{
 787    return wrap_length31(env, env->regs[reg]);
 788}
 789
 790static inline void set_length(CPUS390XState *env, int reg, uint64_t length)
 791{
 792    if (env->psw.mask & PSW_MASK_64) {
 793        /* 64-Bit mode */
 794        env->regs[reg] = length;
 795    } else {
 796        /* 24-Bit and 31-Bit mode */
 797        env->regs[reg] = deposit64(env->regs[reg], 0, 32, length);
 798    }
 799}
 800
 801/* search string (c is byte to search, r2 is string, r1 end of string) */
 802void HELPER(srst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
 803{
 804    uintptr_t ra = GETPC();
 805    uint64_t end, str;
 806    uint32_t len;
 807    uint8_t v, c = env->regs[0];
 808
 809    /* Bits 32-55 must contain all 0.  */
 810    if (env->regs[0] & 0xffffff00u) {
 811        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
 812    }
 813
 814    str = get_address(env, r2);
 815    end = get_address(env, r1);
 816
 817    /* Lest we fail to service interrupts in a timely manner, limit the
 818       amount of work we're willing to do.  For now, let's cap at 8k.  */
 819    for (len = 0; len < 0x2000; ++len) {
 820        if (str + len == end) {
 821            /* Character not found.  R1 & R2 are unmodified.  */
 822            env->cc_op = 2;
 823            return;
 824        }
 825        v = cpu_ldub_data_ra(env, str + len, ra);
 826        if (v == c) {
 827            /* Character found.  Set R1 to the location; R2 is unmodified.  */
 828            env->cc_op = 1;
 829            set_address(env, r1, str + len);
 830            return;
 831        }
 832    }
 833
 834    /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
 835    env->cc_op = 3;
 836    set_address(env, r2, str + len);
 837}
 838
 839void HELPER(srstu)(CPUS390XState *env, uint32_t r1, uint32_t r2)
 840{
 841    uintptr_t ra = GETPC();
 842    uint32_t len;
 843    uint16_t v, c = env->regs[0];
 844    uint64_t end, str, adj_end;
 845
 846    /* Bits 32-47 of R0 must be zero.  */
 847    if (env->regs[0] & 0xffff0000u) {
 848        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
 849    }
 850
 851    str = get_address(env, r2);
 852    end = get_address(env, r1);
 853
 854    /* If the LSB of the two addresses differ, use one extra byte.  */
 855    adj_end = end + ((str ^ end) & 1);
 856
 857    /* Lest we fail to service interrupts in a timely manner, limit the
 858       amount of work we're willing to do.  For now, let's cap at 8k.  */
 859    for (len = 0; len < 0x2000; len += 2) {
 860        if (str + len == adj_end) {
 861            /* End of input found.  */
 862            env->cc_op = 2;
 863            return;
 864        }
 865        v = cpu_lduw_data_ra(env, str + len, ra);
 866        if (v == c) {
 867            /* Character found.  Set R1 to the location; R2 is unmodified.  */
 868            env->cc_op = 1;
 869            set_address(env, r1, str + len);
 870            return;
 871        }
 872    }
 873
 874    /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
 875    env->cc_op = 3;
 876    set_address(env, r2, str + len);
 877}
 878
 879/* unsigned string compare (c is string terminator) */
 880uint64_t HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2)
 881{
 882    uintptr_t ra = GETPC();
 883    uint32_t len;
 884
 885    c = c & 0xff;
 886    s1 = wrap_address(env, s1);
 887    s2 = wrap_address(env, s2);
 888
 889    /* Lest we fail to service interrupts in a timely manner, limit the
 890       amount of work we're willing to do.  For now, let's cap at 8k.  */
 891    for (len = 0; len < 0x2000; ++len) {
 892        uint8_t v1 = cpu_ldub_data_ra(env, s1 + len, ra);
 893        uint8_t v2 = cpu_ldub_data_ra(env, s2 + len, ra);
 894        if (v1 == v2) {
 895            if (v1 == c) {
 896                /* Equal.  CC=0, and don't advance the registers.  */
 897                env->cc_op = 0;
 898                env->retxl = s2;
 899                return s1;
 900            }
 901        } else {
 902            /* Unequal.  CC={1,2}, and advance the registers.  Note that
 903               the terminator need not be zero, but the string that contains
 904               the terminator is by definition "low".  */
 905            env->cc_op = (v1 == c ? 1 : v2 == c ? 2 : v1 < v2 ? 1 : 2);
 906            env->retxl = s2 + len;
 907            return s1 + len;
 908        }
 909    }
 910
 911    /* CPU-determined bytes equal; advance the registers.  */
 912    env->cc_op = 3;
 913    env->retxl = s2 + len;
 914    return s1 + len;
 915}
 916
 917/* move page */
 918uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint32_t r1, uint32_t r2)
 919{
 920    const uint64_t src = get_address(env, r2) & TARGET_PAGE_MASK;
 921    const uint64_t dst = get_address(env, r1) & TARGET_PAGE_MASK;
 922    const int mmu_idx = cpu_mmu_index(env, false);
 923    const bool f = extract64(r0, 11, 1);
 924    const bool s = extract64(r0, 10, 1);
 925    const bool cco = extract64(r0, 8, 1);
 926    uintptr_t ra = GETPC();
 927    S390Access srca, desta;
 928    int exc;
 929
 930    if ((f && s) || extract64(r0, 12, 4)) {
 931        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
 932    }
 933
 934    /*
 935     * We always manually handle exceptions such that we can properly store
 936     * r1/r2 to the lowcore on page-translation exceptions.
 937     *
 938     * TODO: Access key handling
 939     */
 940    exc = access_prepare_nf(&srca, env, true, src, TARGET_PAGE_SIZE,
 941                            MMU_DATA_LOAD, mmu_idx, ra);
 942    if (exc) {
 943        if (cco) {
 944            return 2;
 945        }
 946        goto inject_exc;
 947    }
 948    exc = access_prepare_nf(&desta, env, true, dst, TARGET_PAGE_SIZE,
 949                            MMU_DATA_STORE, mmu_idx, ra);
 950    if (exc) {
 951        if (cco && exc != PGM_PROTECTION) {
 952            return 1;
 953        }
 954        goto inject_exc;
 955    }
 956    access_memmove(env, &desta, &srca, ra);
 957    return 0; /* data moved */
 958inject_exc:
 959#if !defined(CONFIG_USER_ONLY)
 960    if (exc != PGM_ADDRESSING) {
 961        stq_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, trans_exc_code),
 962                 env->tlb_fill_tec);
 963    }
 964    if (exc == PGM_PAGE_TRANS) {
 965        stb_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, op_access_id),
 966                 r1 << 4 | r2);
 967    }
 968#endif
 969    tcg_s390_program_interrupt(env, exc, ra);
 970}
 971
 972/* string copy */
 973uint32_t HELPER(mvst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
 974{
 975    const int mmu_idx = cpu_mmu_index(env, false);
 976    const uint64_t d = get_address(env, r1);
 977    const uint64_t s = get_address(env, r2);
 978    const uint8_t c = env->regs[0];
 979    const int len = MIN(-(d | TARGET_PAGE_MASK), -(s | TARGET_PAGE_MASK));
 980    S390Access srca, desta;
 981    uintptr_t ra = GETPC();
 982    int i;
 983
 984    if (env->regs[0] & 0xffffff00ull) {
 985        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
 986    }
 987
 988    /*
 989     * Our access should not exceed single pages, as we must not report access
 990     * exceptions exceeding the actually copied range (which we don't know at
 991     * this point). We might over-indicate watchpoints within the pages
 992     * (if we ever care, we have to limit processing to a single byte).
 993     */
 994    srca = access_prepare(env, s, len, MMU_DATA_LOAD, mmu_idx, ra);
 995    desta = access_prepare(env, d, len, MMU_DATA_STORE, mmu_idx, ra);
 996    for (i = 0; i < len; i++) {
 997        const uint8_t v = access_get_byte(env, &srca, i, ra);
 998
 999        access_set_byte(env, &desta, i, v, ra);
1000        if (v == c) {
1001            set_address_zero(env, r1, d + i);
1002            return 1;
1003        }
1004    }
1005    set_address_zero(env, r1, d + len);
1006    set_address_zero(env, r2, s + len);
1007    return 3;
1008}
1009
1010/* load access registers r1 to r3 from memory at a2 */
1011void HELPER(lam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1012{
1013    uintptr_t ra = GETPC();
1014    int i;
1015
1016    if (a2 & 0x3) {
1017        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1018    }
1019
1020    for (i = r1;; i = (i + 1) % 16) {
1021        env->aregs[i] = cpu_ldl_data_ra(env, a2, ra);
1022        a2 += 4;
1023
1024        if (i == r3) {
1025            break;
1026        }
1027    }
1028}
1029
1030/* store access registers r1 to r3 in memory at a2 */
1031void HELPER(stam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1032{
1033    uintptr_t ra = GETPC();
1034    int i;
1035
1036    if (a2 & 0x3) {
1037        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1038    }
1039
1040    for (i = r1;; i = (i + 1) % 16) {
1041        cpu_stl_data_ra(env, a2, env->aregs[i], ra);
1042        a2 += 4;
1043
1044        if (i == r3) {
1045            break;
1046        }
1047    }
1048}
1049
1050/* move long helper */
1051static inline uint32_t do_mvcl(CPUS390XState *env,
1052                               uint64_t *dest, uint64_t *destlen,
1053                               uint64_t *src, uint64_t *srclen,
1054                               uint16_t pad, int wordsize, uintptr_t ra)
1055{
1056    const int mmu_idx = cpu_mmu_index(env, false);
1057    int len = MIN(*destlen, -(*dest | TARGET_PAGE_MASK));
1058    S390Access srca, desta;
1059    int i, cc;
1060
1061    if (*destlen == *srclen) {
1062        cc = 0;
1063    } else if (*destlen < *srclen) {
1064        cc = 1;
1065    } else {
1066        cc = 2;
1067    }
1068
1069    if (!*destlen) {
1070        return cc;
1071    }
1072
1073    /*
1074     * Only perform one type of type of operation (move/pad) at a time.
1075     * Stay within single pages.
1076     */
1077    if (*srclen) {
1078        /* Copy the src array */
1079        len = MIN(MIN(*srclen, -(*src | TARGET_PAGE_MASK)), len);
1080        *destlen -= len;
1081        *srclen -= len;
1082        srca = access_prepare(env, *src, len, MMU_DATA_LOAD, mmu_idx, ra);
1083        desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1084        access_memmove(env, &desta, &srca, ra);
1085        *src = wrap_address(env, *src + len);
1086        *dest = wrap_address(env, *dest + len);
1087    } else if (wordsize == 1) {
1088        /* Pad the remaining area */
1089        *destlen -= len;
1090        desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1091        access_memset(env, &desta, pad, ra);
1092        *dest = wrap_address(env, *dest + len);
1093    } else {
1094        desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1095
1096        /* The remaining length selects the padding byte. */
1097        for (i = 0; i < len; (*destlen)--, i++) {
1098            if (*destlen & 1) {
1099                access_set_byte(env, &desta, i, pad, ra);
1100            } else {
1101                access_set_byte(env, &desta, i, pad >> 8, ra);
1102            }
1103        }
1104        *dest = wrap_address(env, *dest + len);
1105    }
1106
1107    return *destlen ? 3 : cc;
1108}
1109
1110/* move long */
1111uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1112{
1113    const int mmu_idx = cpu_mmu_index(env, false);
1114    uintptr_t ra = GETPC();
1115    uint64_t destlen = env->regs[r1 + 1] & 0xffffff;
1116    uint64_t dest = get_address(env, r1);
1117    uint64_t srclen = env->regs[r2 + 1] & 0xffffff;
1118    uint64_t src = get_address(env, r2);
1119    uint8_t pad = env->regs[r2 + 1] >> 24;
1120    CPUState *cs = env_cpu(env);
1121    S390Access srca, desta;
1122    uint32_t cc, cur_len;
1123
1124    if (is_destructive_overlap(env, dest, src, MIN(srclen, destlen))) {
1125        cc = 3;
1126    } else if (srclen == destlen) {
1127        cc = 0;
1128    } else if (destlen < srclen) {
1129        cc = 1;
1130    } else {
1131        cc = 2;
1132    }
1133
1134    /* We might have to zero-out some bits even if there was no action. */
1135    if (unlikely(!destlen || cc == 3)) {
1136        set_address_zero(env, r2, src);
1137        set_address_zero(env, r1, dest);
1138        return cc;
1139    } else if (!srclen) {
1140        set_address_zero(env, r2, src);
1141    }
1142
1143    /*
1144     * Only perform one type of type of operation (move/pad) in one step.
1145     * Stay within single pages.
1146     */
1147    while (destlen) {
1148        cur_len = MIN(destlen, -(dest | TARGET_PAGE_MASK));
1149        if (!srclen) {
1150            desta = access_prepare(env, dest, cur_len, MMU_DATA_STORE, mmu_idx,
1151                                   ra);
1152            access_memset(env, &desta, pad, ra);
1153        } else {
1154            cur_len = MIN(MIN(srclen, -(src | TARGET_PAGE_MASK)), cur_len);
1155
1156            srca = access_prepare(env, src, cur_len, MMU_DATA_LOAD, mmu_idx,
1157                                  ra);
1158            desta = access_prepare(env, dest, cur_len, MMU_DATA_STORE, mmu_idx,
1159                                   ra);
1160            access_memmove(env, &desta, &srca, ra);
1161            src = wrap_address(env, src + cur_len);
1162            srclen -= cur_len;
1163            env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen);
1164            set_address_zero(env, r2, src);
1165        }
1166        dest = wrap_address(env, dest + cur_len);
1167        destlen -= cur_len;
1168        env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, destlen);
1169        set_address_zero(env, r1, dest);
1170
1171        /*
1172         * MVCL is interruptible. Return to the main loop if requested after
1173         * writing back all state to registers. If no interrupt will get
1174         * injected, we'll end up back in this handler and continue processing
1175         * the remaining parts.
1176         */
1177        if (destlen && unlikely(cpu_loop_exit_requested(cs))) {
1178            cpu_loop_exit_restore(cs, ra);
1179        }
1180    }
1181    return cc;
1182}
1183
1184/* move long extended */
1185uint32_t HELPER(mvcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1186                       uint32_t r3)
1187{
1188    uintptr_t ra = GETPC();
1189    uint64_t destlen = get_length(env, r1 + 1);
1190    uint64_t dest = get_address(env, r1);
1191    uint64_t srclen = get_length(env, r3 + 1);
1192    uint64_t src = get_address(env, r3);
1193    uint8_t pad = a2;
1194    uint32_t cc;
1195
1196    cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 1, ra);
1197
1198    set_length(env, r1 + 1, destlen);
1199    set_length(env, r3 + 1, srclen);
1200    set_address(env, r1, dest);
1201    set_address(env, r3, src);
1202
1203    return cc;
1204}
1205
1206/* move long unicode */
1207uint32_t HELPER(mvclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1208                       uint32_t r3)
1209{
1210    uintptr_t ra = GETPC();
1211    uint64_t destlen = get_length(env, r1 + 1);
1212    uint64_t dest = get_address(env, r1);
1213    uint64_t srclen = get_length(env, r3 + 1);
1214    uint64_t src = get_address(env, r3);
1215    uint16_t pad = a2;
1216    uint32_t cc;
1217
1218    cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 2, ra);
1219
1220    set_length(env, r1 + 1, destlen);
1221    set_length(env, r3 + 1, srclen);
1222    set_address(env, r1, dest);
1223    set_address(env, r3, src);
1224
1225    return cc;
1226}
1227
1228/* compare logical long helper */
1229static inline uint32_t do_clcl(CPUS390XState *env,
1230                               uint64_t *src1, uint64_t *src1len,
1231                               uint64_t *src3, uint64_t *src3len,
1232                               uint16_t pad, uint64_t limit,
1233                               int wordsize, uintptr_t ra)
1234{
1235    uint64_t len = MAX(*src1len, *src3len);
1236    uint32_t cc = 0;
1237
1238    check_alignment(env, *src1len | *src3len, wordsize, ra);
1239
1240    if (!len) {
1241        return cc;
1242    }
1243
1244    /* Lest we fail to service interrupts in a timely manner, limit the
1245       amount of work we're willing to do.  */
1246    if (len > limit) {
1247        len = limit;
1248        cc = 3;
1249    }
1250
1251    for (; len; len -= wordsize) {
1252        uint16_t v1 = pad;
1253        uint16_t v3 = pad;
1254
1255        if (*src1len) {
1256            v1 = cpu_ldusize_data_ra(env, *src1, wordsize, ra);
1257        }
1258        if (*src3len) {
1259            v3 = cpu_ldusize_data_ra(env, *src3, wordsize, ra);
1260        }
1261
1262        if (v1 != v3) {
1263            cc = (v1 < v3) ? 1 : 2;
1264            break;
1265        }
1266
1267        if (*src1len) {
1268            *src1 += wordsize;
1269            *src1len -= wordsize;
1270        }
1271        if (*src3len) {
1272            *src3 += wordsize;
1273            *src3len -= wordsize;
1274        }
1275    }
1276
1277    return cc;
1278}
1279
1280
1281/* compare logical long */
1282uint32_t HELPER(clcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1283{
1284    uintptr_t ra = GETPC();
1285    uint64_t src1len = extract64(env->regs[r1 + 1], 0, 24);
1286    uint64_t src1 = get_address(env, r1);
1287    uint64_t src3len = extract64(env->regs[r2 + 1], 0, 24);
1288    uint64_t src3 = get_address(env, r2);
1289    uint8_t pad = env->regs[r2 + 1] >> 24;
1290    uint32_t cc;
1291
1292    cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, -1, 1, ra);
1293
1294    env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, src1len);
1295    env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, src3len);
1296    set_address(env, r1, src1);
1297    set_address(env, r2, src3);
1298
1299    return cc;
1300}
1301
1302/* compare logical long extended memcompare insn with padding */
1303uint32_t HELPER(clcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1304                       uint32_t r3)
1305{
1306    uintptr_t ra = GETPC();
1307    uint64_t src1len = get_length(env, r1 + 1);
1308    uint64_t src1 = get_address(env, r1);
1309    uint64_t src3len = get_length(env, r3 + 1);
1310    uint64_t src3 = get_address(env, r3);
1311    uint8_t pad = a2;
1312    uint32_t cc;
1313
1314    cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x2000, 1, ra);
1315
1316    set_length(env, r1 + 1, src1len);
1317    set_length(env, r3 + 1, src3len);
1318    set_address(env, r1, src1);
1319    set_address(env, r3, src3);
1320
1321    return cc;
1322}
1323
1324/* compare logical long unicode memcompare insn with padding */
1325uint32_t HELPER(clclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1326                       uint32_t r3)
1327{
1328    uintptr_t ra = GETPC();
1329    uint64_t src1len = get_length(env, r1 + 1);
1330    uint64_t src1 = get_address(env, r1);
1331    uint64_t src3len = get_length(env, r3 + 1);
1332    uint64_t src3 = get_address(env, r3);
1333    uint16_t pad = a2;
1334    uint32_t cc = 0;
1335
1336    cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x1000, 2, ra);
1337
1338    set_length(env, r1 + 1, src1len);
1339    set_length(env, r3 + 1, src3len);
1340    set_address(env, r1, src1);
1341    set_address(env, r3, src3);
1342
1343    return cc;
1344}
1345
1346/* checksum */
1347uint64_t HELPER(cksm)(CPUS390XState *env, uint64_t r1,
1348                      uint64_t src, uint64_t src_len)
1349{
1350    uintptr_t ra = GETPC();
1351    uint64_t max_len, len;
1352    uint64_t cksm = (uint32_t)r1;
1353
1354    /* Lest we fail to service interrupts in a timely manner, limit the
1355       amount of work we're willing to do.  For now, let's cap at 8k.  */
1356    max_len = (src_len > 0x2000 ? 0x2000 : src_len);
1357
1358    /* Process full words as available.  */
1359    for (len = 0; len + 4 <= max_len; len += 4, src += 4) {
1360        cksm += (uint32_t)cpu_ldl_data_ra(env, src, ra);
1361    }
1362
1363    switch (max_len - len) {
1364    case 1:
1365        cksm += cpu_ldub_data_ra(env, src, ra) << 24;
1366        len += 1;
1367        break;
1368    case 2:
1369        cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1370        len += 2;
1371        break;
1372    case 3:
1373        cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1374        cksm += cpu_ldub_data_ra(env, src + 2, ra) << 8;
1375        len += 3;
1376        break;
1377    }
1378
1379    /* Fold the carry from the checksum.  Note that we can see carry-out
1380       during folding more than once (but probably not more than twice).  */
1381    while (cksm > 0xffffffffull) {
1382        cksm = (uint32_t)cksm + (cksm >> 32);
1383    }
1384
1385    /* Indicate whether or not we've processed everything.  */
1386    env->cc_op = (len == src_len ? 0 : 3);
1387
1388    /* Return both cksm and processed length.  */
1389    env->retxl = cksm;
1390    return len;
1391}
1392
1393void HELPER(pack)(CPUS390XState *env, uint32_t len, uint64_t dest, uint64_t src)
1394{
1395    uintptr_t ra = GETPC();
1396    int len_dest = len >> 4;
1397    int len_src = len & 0xf;
1398    uint8_t b;
1399
1400    dest += len_dest;
1401    src += len_src;
1402
1403    /* last byte is special, it only flips the nibbles */
1404    b = cpu_ldub_data_ra(env, src, ra);
1405    cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1406    src--;
1407    len_src--;
1408
1409    /* now pack every value */
1410    while (len_dest > 0) {
1411        b = 0;
1412
1413        if (len_src >= 0) {
1414            b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1415            src--;
1416            len_src--;
1417        }
1418        if (len_src >= 0) {
1419            b |= cpu_ldub_data_ra(env, src, ra) << 4;
1420            src--;
1421            len_src--;
1422        }
1423
1424        len_dest--;
1425        dest--;
1426        cpu_stb_data_ra(env, dest, b, ra);
1427    }
1428}
1429
1430static inline void do_pkau(CPUS390XState *env, uint64_t dest, uint64_t src,
1431                           uint32_t srclen, int ssize, uintptr_t ra)
1432{
1433    int i;
1434    /* The destination operand is always 16 bytes long.  */
1435    const int destlen = 16;
1436
1437    /* The operands are processed from right to left.  */
1438    src += srclen - 1;
1439    dest += destlen - 1;
1440
1441    for (i = 0; i < destlen; i++) {
1442        uint8_t b = 0;
1443
1444        /* Start with a positive sign */
1445        if (i == 0) {
1446            b = 0xc;
1447        } else if (srclen > ssize) {
1448            b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1449            src -= ssize;
1450            srclen -= ssize;
1451        }
1452
1453        if (srclen > ssize) {
1454            b |= cpu_ldub_data_ra(env, src, ra) << 4;
1455            src -= ssize;
1456            srclen -= ssize;
1457        }
1458
1459        cpu_stb_data_ra(env, dest, b, ra);
1460        dest--;
1461    }
1462}
1463
1464
1465void HELPER(pka)(CPUS390XState *env, uint64_t dest, uint64_t src,
1466                 uint32_t srclen)
1467{
1468    do_pkau(env, dest, src, srclen, 1, GETPC());
1469}
1470
1471void HELPER(pku)(CPUS390XState *env, uint64_t dest, uint64_t src,
1472                 uint32_t srclen)
1473{
1474    do_pkau(env, dest, src, srclen, 2, GETPC());
1475}
1476
1477void HELPER(unpk)(CPUS390XState *env, uint32_t len, uint64_t dest,
1478                  uint64_t src)
1479{
1480    uintptr_t ra = GETPC();
1481    int len_dest = len >> 4;
1482    int len_src = len & 0xf;
1483    uint8_t b;
1484    int second_nibble = 0;
1485
1486    dest += len_dest;
1487    src += len_src;
1488
1489    /* last byte is special, it only flips the nibbles */
1490    b = cpu_ldub_data_ra(env, src, ra);
1491    cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1492    src--;
1493    len_src--;
1494
1495    /* now pad every nibble with 0xf0 */
1496
1497    while (len_dest > 0) {
1498        uint8_t cur_byte = 0;
1499
1500        if (len_src > 0) {
1501            cur_byte = cpu_ldub_data_ra(env, src, ra);
1502        }
1503
1504        len_dest--;
1505        dest--;
1506
1507        /* only advance one nibble at a time */
1508        if (second_nibble) {
1509            cur_byte >>= 4;
1510            len_src--;
1511            src--;
1512        }
1513        second_nibble = !second_nibble;
1514
1515        /* digit */
1516        cur_byte = (cur_byte & 0xf);
1517        /* zone bits */
1518        cur_byte |= 0xf0;
1519
1520        cpu_stb_data_ra(env, dest, cur_byte, ra);
1521    }
1522}
1523
1524static inline uint32_t do_unpkau(CPUS390XState *env, uint64_t dest,
1525                                 uint32_t destlen, int dsize, uint64_t src,
1526                                 uintptr_t ra)
1527{
1528    int i;
1529    uint32_t cc;
1530    uint8_t b;
1531    /* The source operand is always 16 bytes long.  */
1532    const int srclen = 16;
1533
1534    /* The operands are processed from right to left.  */
1535    src += srclen - 1;
1536    dest += destlen - dsize;
1537
1538    /* Check for the sign.  */
1539    b = cpu_ldub_data_ra(env, src, ra);
1540    src--;
1541    switch (b & 0xf) {
1542    case 0xa:
1543    case 0xc:
1544    case 0xe ... 0xf:
1545        cc = 0;  /* plus */
1546        break;
1547    case 0xb:
1548    case 0xd:
1549        cc = 1;  /* minus */
1550        break;
1551    default:
1552    case 0x0 ... 0x9:
1553        cc = 3;  /* invalid */
1554        break;
1555    }
1556
1557    /* Now pad every nibble with 0x30, advancing one nibble at a time. */
1558    for (i = 0; i < destlen; i += dsize) {
1559        if (i == (31 * dsize)) {
1560            /* If length is 32/64 bytes, the leftmost byte is 0. */
1561            b = 0;
1562        } else if (i % (2 * dsize)) {
1563            b = cpu_ldub_data_ra(env, src, ra);
1564            src--;
1565        } else {
1566            b >>= 4;
1567        }
1568        cpu_stsize_data_ra(env, dest, 0x30 + (b & 0xf), dsize, ra);
1569        dest -= dsize;
1570    }
1571
1572    return cc;
1573}
1574
1575uint32_t HELPER(unpka)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1576                       uint64_t src)
1577{
1578    return do_unpkau(env, dest, destlen, 1, src, GETPC());
1579}
1580
1581uint32_t HELPER(unpku)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1582                       uint64_t src)
1583{
1584    return do_unpkau(env, dest, destlen, 2, src, GETPC());
1585}
1586
1587uint32_t HELPER(tp)(CPUS390XState *env, uint64_t dest, uint32_t destlen)
1588{
1589    uintptr_t ra = GETPC();
1590    uint32_t cc = 0;
1591    int i;
1592
1593    for (i = 0; i < destlen; i++) {
1594        uint8_t b = cpu_ldub_data_ra(env, dest + i, ra);
1595        /* digit */
1596        cc |= (b & 0xf0) > 0x90 ? 2 : 0;
1597
1598        if (i == (destlen - 1)) {
1599            /* sign */
1600            cc |= (b & 0xf) < 0xa ? 1 : 0;
1601        } else {
1602            /* digit */
1603            cc |= (b & 0xf) > 0x9 ? 2 : 0;
1604        }
1605    }
1606
1607    return cc;
1608}
1609
1610static uint32_t do_helper_tr(CPUS390XState *env, uint32_t len, uint64_t array,
1611                             uint64_t trans, uintptr_t ra)
1612{
1613    uint32_t i;
1614
1615    for (i = 0; i <= len; i++) {
1616        uint8_t byte = cpu_ldub_data_ra(env, array + i, ra);
1617        uint8_t new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1618        cpu_stb_data_ra(env, array + i, new_byte, ra);
1619    }
1620
1621    return env->cc_op;
1622}
1623
1624void HELPER(tr)(CPUS390XState *env, uint32_t len, uint64_t array,
1625                uint64_t trans)
1626{
1627    do_helper_tr(env, len, array, trans, GETPC());
1628}
1629
1630uint64_t HELPER(tre)(CPUS390XState *env, uint64_t array,
1631                     uint64_t len, uint64_t trans)
1632{
1633    uintptr_t ra = GETPC();
1634    uint8_t end = env->regs[0] & 0xff;
1635    uint64_t l = len;
1636    uint64_t i;
1637    uint32_t cc = 0;
1638
1639    if (!(env->psw.mask & PSW_MASK_64)) {
1640        array &= 0x7fffffff;
1641        l = (uint32_t)l;
1642    }
1643
1644    /* Lest we fail to service interrupts in a timely manner, limit the
1645       amount of work we're willing to do.  For now, let's cap at 8k.  */
1646    if (l > 0x2000) {
1647        l = 0x2000;
1648        cc = 3;
1649    }
1650
1651    for (i = 0; i < l; i++) {
1652        uint8_t byte, new_byte;
1653
1654        byte = cpu_ldub_data_ra(env, array + i, ra);
1655
1656        if (byte == end) {
1657            cc = 1;
1658            break;
1659        }
1660
1661        new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1662        cpu_stb_data_ra(env, array + i, new_byte, ra);
1663    }
1664
1665    env->cc_op = cc;
1666    env->retxl = len - i;
1667    return array + i;
1668}
1669
1670static inline uint32_t do_helper_trt(CPUS390XState *env, int len,
1671                                     uint64_t array, uint64_t trans,
1672                                     int inc, uintptr_t ra)
1673{
1674    int i;
1675
1676    for (i = 0; i <= len; i++) {
1677        uint8_t byte = cpu_ldub_data_ra(env, array + i * inc, ra);
1678        uint8_t sbyte = cpu_ldub_data_ra(env, trans + byte, ra);
1679
1680        if (sbyte != 0) {
1681            set_address(env, 1, array + i * inc);
1682            env->regs[2] = deposit64(env->regs[2], 0, 8, sbyte);
1683            return (i == len) ? 2 : 1;
1684        }
1685    }
1686
1687    return 0;
1688}
1689
1690static uint32_t do_helper_trt_fwd(CPUS390XState *env, uint32_t len,
1691                                  uint64_t array, uint64_t trans,
1692                                  uintptr_t ra)
1693{
1694    return do_helper_trt(env, len, array, trans, 1, ra);
1695}
1696
1697uint32_t HELPER(trt)(CPUS390XState *env, uint32_t len, uint64_t array,
1698                     uint64_t trans)
1699{
1700    return do_helper_trt(env, len, array, trans, 1, GETPC());
1701}
1702
1703static uint32_t do_helper_trt_bkwd(CPUS390XState *env, uint32_t len,
1704                                   uint64_t array, uint64_t trans,
1705                                   uintptr_t ra)
1706{
1707    return do_helper_trt(env, len, array, trans, -1, ra);
1708}
1709
1710uint32_t HELPER(trtr)(CPUS390XState *env, uint32_t len, uint64_t array,
1711                      uint64_t trans)
1712{
1713    return do_helper_trt(env, len, array, trans, -1, GETPC());
1714}
1715
1716/* Translate one/two to one/two */
1717uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
1718                      uint32_t tst, uint32_t sizes)
1719{
1720    uintptr_t ra = GETPC();
1721    int dsize = (sizes & 1) ? 1 : 2;
1722    int ssize = (sizes & 2) ? 1 : 2;
1723    uint64_t tbl = get_address(env, 1);
1724    uint64_t dst = get_address(env, r1);
1725    uint64_t len = get_length(env, r1 + 1);
1726    uint64_t src = get_address(env, r2);
1727    uint32_t cc = 3;
1728    int i;
1729
1730    /* The lower address bits of TBL are ignored.  For TROO, TROT, it's
1731       the low 3 bits (double-word aligned).  For TRTO, TRTT, it's either
1732       the low 12 bits (4K, without ETF2-ENH) or 3 bits (with ETF2-ENH).  */
1733    if (ssize == 2 && !s390_has_feat(S390_FEAT_ETF2_ENH)) {
1734        tbl &= -4096;
1735    } else {
1736        tbl &= -8;
1737    }
1738
1739    check_alignment(env, len, ssize, ra);
1740
1741    /* Lest we fail to service interrupts in a timely manner, */
1742    /* limit the amount of work we're willing to do.   */
1743    for (i = 0; i < 0x2000; i++) {
1744        uint16_t sval = cpu_ldusize_data_ra(env, src, ssize, ra);
1745        uint64_t tble = tbl + (sval * dsize);
1746        uint16_t dval = cpu_ldusize_data_ra(env, tble, dsize, ra);
1747        if (dval == tst) {
1748            cc = 1;
1749            break;
1750        }
1751        cpu_stsize_data_ra(env, dst, dval, dsize, ra);
1752
1753        len -= ssize;
1754        src += ssize;
1755        dst += dsize;
1756
1757        if (len == 0) {
1758            cc = 0;
1759            break;
1760        }
1761    }
1762
1763    set_address(env, r1, dst);
1764    set_length(env, r1 + 1, len);
1765    set_address(env, r2, src);
1766
1767    return cc;
1768}
1769
1770void HELPER(cdsg)(CPUS390XState *env, uint64_t addr,
1771                  uint32_t r1, uint32_t r3)
1772{
1773    uintptr_t ra = GETPC();
1774    Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
1775    Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1776    Int128 oldv;
1777    uint64_t oldh, oldl;
1778    bool fail;
1779
1780    check_alignment(env, addr, 16, ra);
1781
1782    oldh = cpu_ldq_data_ra(env, addr + 0, ra);
1783    oldl = cpu_ldq_data_ra(env, addr + 8, ra);
1784
1785    oldv = int128_make128(oldl, oldh);
1786    fail = !int128_eq(oldv, cmpv);
1787    if (fail) {
1788        newv = oldv;
1789    }
1790
1791    cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra);
1792    cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra);
1793
1794    env->cc_op = fail;
1795    env->regs[r1] = int128_gethi(oldv);
1796    env->regs[r1 + 1] = int128_getlo(oldv);
1797}
1798
1799void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr,
1800                           uint32_t r1, uint32_t r3)
1801{
1802    uintptr_t ra = GETPC();
1803    Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
1804    Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1805    int mem_idx;
1806    TCGMemOpIdx oi;
1807    Int128 oldv;
1808    bool fail;
1809
1810    assert(HAVE_CMPXCHG128);
1811
1812    mem_idx = cpu_mmu_index(env, false);
1813    oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
1814    oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
1815    fail = !int128_eq(oldv, cmpv);
1816
1817    env->cc_op = fail;
1818    env->regs[r1] = int128_gethi(oldv);
1819    env->regs[r1 + 1] = int128_getlo(oldv);
1820}
1821
1822static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
1823                        uint64_t a2, bool parallel)
1824{
1825    uint32_t mem_idx = cpu_mmu_index(env, false);
1826    uintptr_t ra = GETPC();
1827    uint32_t fc = extract32(env->regs[0], 0, 8);
1828    uint32_t sc = extract32(env->regs[0], 8, 8);
1829    uint64_t pl = get_address(env, 1) & -16;
1830    uint64_t svh, svl;
1831    uint32_t cc;
1832
1833    /* Sanity check the function code and storage characteristic.  */
1834    if (fc > 1 || sc > 3) {
1835        if (!s390_has_feat(S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2)) {
1836            goto spec_exception;
1837        }
1838        if (fc > 2 || sc > 4 || (fc == 2 && (r3 & 1))) {
1839            goto spec_exception;
1840        }
1841    }
1842
1843    /* Sanity check the alignments.  */
1844    if (extract32(a1, 0, fc + 2) || extract32(a2, 0, sc)) {
1845        goto spec_exception;
1846    }
1847
1848    /* Sanity check writability of the store address.  */
1849    probe_write(env, a2, 1 << sc, mem_idx, ra);
1850
1851    /*
1852     * Note that the compare-and-swap is atomic, and the store is atomic,
1853     * but the complete operation is not.  Therefore we do not need to
1854     * assert serial context in order to implement this.  That said,
1855     * restart early if we can't support either operation that is supposed
1856     * to be atomic.
1857     */
1858    if (parallel) {
1859        uint32_t max = 2;
1860#ifdef CONFIG_ATOMIC64
1861        max = 3;
1862#endif
1863        if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) ||
1864            (HAVE_ATOMIC128  ? 0 : sc > max)) {
1865            cpu_loop_exit_atomic(env_cpu(env), ra);
1866        }
1867    }
1868
1869    /* All loads happen before all stores.  For simplicity, load the entire
1870       store value area from the parameter list.  */
1871    svh = cpu_ldq_data_ra(env, pl + 16, ra);
1872    svl = cpu_ldq_data_ra(env, pl + 24, ra);
1873
1874    switch (fc) {
1875    case 0:
1876        {
1877            uint32_t nv = cpu_ldl_data_ra(env, pl, ra);
1878            uint32_t cv = env->regs[r3];
1879            uint32_t ov;
1880
1881            if (parallel) {
1882#ifdef CONFIG_USER_ONLY
1883                uint32_t *haddr = g2h(env_cpu(env), a1);
1884                ov = qatomic_cmpxchg__nocheck(haddr, cv, nv);
1885#else
1886                TCGMemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mem_idx);
1887                ov = cpu_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi, ra);
1888#endif
1889            } else {
1890                ov = cpu_ldl_data_ra(env, a1, ra);
1891                cpu_stl_data_ra(env, a1, (ov == cv ? nv : ov), ra);
1892            }
1893            cc = (ov != cv);
1894            env->regs[r3] = deposit64(env->regs[r3], 32, 32, ov);
1895        }
1896        break;
1897
1898    case 1:
1899        {
1900            uint64_t nv = cpu_ldq_data_ra(env, pl, ra);
1901            uint64_t cv = env->regs[r3];
1902            uint64_t ov;
1903
1904            if (parallel) {
1905#ifdef CONFIG_ATOMIC64
1906                TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN, mem_idx);
1907                ov = cpu_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi, ra);
1908#else
1909                /* Note that we asserted !parallel above.  */
1910                g_assert_not_reached();
1911#endif
1912            } else {
1913                ov = cpu_ldq_data_ra(env, a1, ra);
1914                cpu_stq_data_ra(env, a1, (ov == cv ? nv : ov), ra);
1915            }
1916            cc = (ov != cv);
1917            env->regs[r3] = ov;
1918        }
1919        break;
1920
1921    case 2:
1922        {
1923            uint64_t nvh = cpu_ldq_data_ra(env, pl, ra);
1924            uint64_t nvl = cpu_ldq_data_ra(env, pl + 8, ra);
1925            Int128 nv = int128_make128(nvl, nvh);
1926            Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1927            Int128 ov;
1928
1929            if (!parallel) {
1930                uint64_t oh = cpu_ldq_data_ra(env, a1 + 0, ra);
1931                uint64_t ol = cpu_ldq_data_ra(env, a1 + 8, ra);
1932
1933                ov = int128_make128(ol, oh);
1934                cc = !int128_eq(ov, cv);
1935                if (cc) {
1936                    nv = ov;
1937                }
1938
1939                cpu_stq_data_ra(env, a1 + 0, int128_gethi(nv), ra);
1940                cpu_stq_data_ra(env, a1 + 8, int128_getlo(nv), ra);
1941            } else if (HAVE_CMPXCHG128) {
1942                TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
1943                ov = cpu_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra);
1944                cc = !int128_eq(ov, cv);
1945            } else {
1946                /* Note that we asserted !parallel above.  */
1947                g_assert_not_reached();
1948            }
1949
1950            env->regs[r3 + 0] = int128_gethi(ov);
1951            env->regs[r3 + 1] = int128_getlo(ov);
1952        }
1953        break;
1954
1955    default:
1956        g_assert_not_reached();
1957    }
1958
1959    /* Store only if the comparison succeeded.  Note that above we use a pair
1960       of 64-bit big-endian loads, so for sc < 3 we must extract the value
1961       from the most-significant bits of svh.  */
1962    if (cc == 0) {
1963        switch (sc) {
1964        case 0:
1965            cpu_stb_data_ra(env, a2, svh >> 56, ra);
1966            break;
1967        case 1:
1968            cpu_stw_data_ra(env, a2, svh >> 48, ra);
1969            break;
1970        case 2:
1971            cpu_stl_data_ra(env, a2, svh >> 32, ra);
1972            break;
1973        case 3:
1974            cpu_stq_data_ra(env, a2, svh, ra);
1975            break;
1976        case 4:
1977            if (!parallel) {
1978                cpu_stq_data_ra(env, a2 + 0, svh, ra);
1979                cpu_stq_data_ra(env, a2 + 8, svl, ra);
1980            } else if (HAVE_ATOMIC128) {
1981                TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
1982                Int128 sv = int128_make128(svl, svh);
1983                cpu_atomic_sto_be_mmu(env, a2, sv, oi, ra);
1984            } else {
1985                /* Note that we asserted !parallel above.  */
1986                g_assert_not_reached();
1987            }
1988            break;
1989        default:
1990            g_assert_not_reached();
1991        }
1992    }
1993
1994    return cc;
1995
1996 spec_exception:
1997    tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1998}
1999
2000uint32_t HELPER(csst)(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t a2)
2001{
2002    return do_csst(env, r3, a1, a2, false);
2003}
2004
2005uint32_t HELPER(csst_parallel)(CPUS390XState *env, uint32_t r3, uint64_t a1,
2006                               uint64_t a2)
2007{
2008    return do_csst(env, r3, a1, a2, true);
2009}
2010
2011#if !defined(CONFIG_USER_ONLY)
2012void HELPER(lctlg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2013{
2014    uintptr_t ra = GETPC();
2015    bool PERchanged = false;
2016    uint64_t src = a2;
2017    uint32_t i;
2018
2019    if (src & 0x7) {
2020        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2021    }
2022
2023    for (i = r1;; i = (i + 1) % 16) {
2024        uint64_t val = cpu_ldq_data_ra(env, src, ra);
2025        if (env->cregs[i] != val && i >= 9 && i <= 11) {
2026            PERchanged = true;
2027        }
2028        env->cregs[i] = val;
2029        HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%" PRIx64 "\n",
2030                   i, src, val);
2031        src += sizeof(uint64_t);
2032
2033        if (i == r3) {
2034            break;
2035        }
2036    }
2037
2038    if (PERchanged && env->psw.mask & PSW_MASK_PER) {
2039        s390_cpu_recompute_watchpoints(env_cpu(env));
2040    }
2041
2042    tlb_flush(env_cpu(env));
2043}
2044
2045void HELPER(lctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2046{
2047    uintptr_t ra = GETPC();
2048    bool PERchanged = false;
2049    uint64_t src = a2;
2050    uint32_t i;
2051
2052    if (src & 0x3) {
2053        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2054    }
2055
2056    for (i = r1;; i = (i + 1) % 16) {
2057        uint32_t val = cpu_ldl_data_ra(env, src, ra);
2058        if ((uint32_t)env->cregs[i] != val && i >= 9 && i <= 11) {
2059            PERchanged = true;
2060        }
2061        env->cregs[i] = deposit64(env->cregs[i], 0, 32, val);
2062        HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%x\n", i, src, val);
2063        src += sizeof(uint32_t);
2064
2065        if (i == r3) {
2066            break;
2067        }
2068    }
2069
2070    if (PERchanged && env->psw.mask & PSW_MASK_PER) {
2071        s390_cpu_recompute_watchpoints(env_cpu(env));
2072    }
2073
2074    tlb_flush(env_cpu(env));
2075}
2076
2077void HELPER(stctg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2078{
2079    uintptr_t ra = GETPC();
2080    uint64_t dest = a2;
2081    uint32_t i;
2082
2083    if (dest & 0x7) {
2084        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2085    }
2086
2087    for (i = r1;; i = (i + 1) % 16) {
2088        cpu_stq_data_ra(env, dest, env->cregs[i], ra);
2089        dest += sizeof(uint64_t);
2090
2091        if (i == r3) {
2092            break;
2093        }
2094    }
2095}
2096
2097void HELPER(stctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2098{
2099    uintptr_t ra = GETPC();
2100    uint64_t dest = a2;
2101    uint32_t i;
2102
2103    if (dest & 0x3) {
2104        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2105    }
2106
2107    for (i = r1;; i = (i + 1) % 16) {
2108        cpu_stl_data_ra(env, dest, env->cregs[i], ra);
2109        dest += sizeof(uint32_t);
2110
2111        if (i == r3) {
2112            break;
2113        }
2114    }
2115}
2116
2117uint32_t HELPER(testblock)(CPUS390XState *env, uint64_t real_addr)
2118{
2119    uintptr_t ra = GETPC();
2120    int i;
2121
2122    real_addr = wrap_address(env, real_addr) & TARGET_PAGE_MASK;
2123
2124    for (i = 0; i < TARGET_PAGE_SIZE; i += 8) {
2125        cpu_stq_mmuidx_ra(env, real_addr + i, 0, MMU_REAL_IDX, ra);
2126    }
2127
2128    return 0;
2129}
2130
2131uint32_t HELPER(tprot)(CPUS390XState *env, uint64_t a1, uint64_t a2)
2132{
2133    S390CPU *cpu = env_archcpu(env);
2134    CPUState *cs = env_cpu(env);
2135
2136    /*
2137     * TODO: we currently don't handle all access protection types
2138     * (including access-list and key-controlled) as well as AR mode.
2139     */
2140    if (!s390_cpu_virt_mem_check_write(cpu, a1, 0, 1)) {
2141        /* Fetching permitted; storing permitted */
2142        return 0;
2143    }
2144
2145    if (env->int_pgm_code == PGM_PROTECTION) {
2146        /* retry if reading is possible */
2147        cs->exception_index = -1;
2148        if (!s390_cpu_virt_mem_check_read(cpu, a1, 0, 1)) {
2149            /* Fetching permitted; storing not permitted */
2150            return 1;
2151        }
2152    }
2153
2154    switch (env->int_pgm_code) {
2155    case PGM_PROTECTION:
2156        /* Fetching not permitted; storing not permitted */
2157        cs->exception_index = -1;
2158        return 2;
2159    case PGM_ADDRESSING:
2160    case PGM_TRANS_SPEC:
2161        /* exceptions forwarded to the guest */
2162        s390_cpu_virt_mem_handle_exc(cpu, GETPC());
2163        return 0;
2164    }
2165
2166    /* Translation not available */
2167    cs->exception_index = -1;
2168    return 3;
2169}
2170
2171/* insert storage key extended */
2172uint64_t HELPER(iske)(CPUS390XState *env, uint64_t r2)
2173{
2174    MachineState *ms = MACHINE(qdev_get_machine());
2175    static S390SKeysState *ss;
2176    static S390SKeysClass *skeyclass;
2177    uint64_t addr = wrap_address(env, r2);
2178    uint8_t key;
2179
2180    if (addr > ms->ram_size) {
2181        return 0;
2182    }
2183
2184    if (unlikely(!ss)) {
2185        ss = s390_get_skeys_device();
2186        skeyclass = S390_SKEYS_GET_CLASS(ss);
2187    }
2188
2189    if (skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key)) {
2190        return 0;
2191    }
2192    return key;
2193}
2194
2195/* set storage key extended */
2196void HELPER(sske)(CPUS390XState *env, uint64_t r1, uint64_t r2)
2197{
2198    MachineState *ms = MACHINE(qdev_get_machine());
2199    static S390SKeysState *ss;
2200    static S390SKeysClass *skeyclass;
2201    uint64_t addr = wrap_address(env, r2);
2202    uint8_t key;
2203
2204    if (addr > ms->ram_size) {
2205        return;
2206    }
2207
2208    if (unlikely(!ss)) {
2209        ss = s390_get_skeys_device();
2210        skeyclass = S390_SKEYS_GET_CLASS(ss);
2211    }
2212
2213    key = (uint8_t) r1;
2214    skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2215   /*
2216    * As we can only flush by virtual address and not all the entries
2217    * that point to a physical address we have to flush the whole TLB.
2218    */
2219    tlb_flush_all_cpus_synced(env_cpu(env));
2220}
2221
2222/* reset reference bit extended */
2223uint32_t HELPER(rrbe)(CPUS390XState *env, uint64_t r2)
2224{
2225    MachineState *ms = MACHINE(qdev_get_machine());
2226    static S390SKeysState *ss;
2227    static S390SKeysClass *skeyclass;
2228    uint8_t re, key;
2229
2230    if (r2 > ms->ram_size) {
2231        return 0;
2232    }
2233
2234    if (unlikely(!ss)) {
2235        ss = s390_get_skeys_device();
2236        skeyclass = S390_SKEYS_GET_CLASS(ss);
2237    }
2238
2239    if (skeyclass->get_skeys(ss, r2 / TARGET_PAGE_SIZE, 1, &key)) {
2240        return 0;
2241    }
2242
2243    re = key & (SK_R | SK_C);
2244    key &= ~SK_R;
2245
2246    if (skeyclass->set_skeys(ss, r2 / TARGET_PAGE_SIZE, 1, &key)) {
2247        return 0;
2248    }
2249   /*
2250    * As we can only flush by virtual address and not all the entries
2251    * that point to a physical address we have to flush the whole TLB.
2252    */
2253    tlb_flush_all_cpus_synced(env_cpu(env));
2254
2255    /*
2256     * cc
2257     *
2258     * 0  Reference bit zero; change bit zero
2259     * 1  Reference bit zero; change bit one
2260     * 2  Reference bit one; change bit zero
2261     * 3  Reference bit one; change bit one
2262     */
2263
2264    return re >> 1;
2265}
2266
2267uint32_t HELPER(mvcs)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2)
2268{
2269    const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2270    S390Access srca, desta;
2271    uintptr_t ra = GETPC();
2272    int cc = 0;
2273
2274    HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2275               __func__, l, a1, a2);
2276
2277    if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2278        psw_as == AS_HOME || psw_as == AS_ACCREG) {
2279        s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2280    }
2281
2282    l = wrap_length32(env, l);
2283    if (l > 256) {
2284        /* max 256 */
2285        l = 256;
2286        cc = 3;
2287    } else if (!l) {
2288        return cc;
2289    }
2290
2291    /* TODO: Access key handling */
2292    srca = access_prepare(env, a2, l, MMU_DATA_LOAD, MMU_PRIMARY_IDX, ra);
2293    desta = access_prepare(env, a1, l, MMU_DATA_STORE, MMU_SECONDARY_IDX, ra);
2294    access_memmove(env, &desta, &srca, ra);
2295    return cc;
2296}
2297
2298uint32_t HELPER(mvcp)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2)
2299{
2300    const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2301    S390Access srca, desta;
2302    uintptr_t ra = GETPC();
2303    int cc = 0;
2304
2305    HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2306               __func__, l, a1, a2);
2307
2308    if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2309        psw_as == AS_HOME || psw_as == AS_ACCREG) {
2310        s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2311    }
2312
2313    l = wrap_length32(env, l);
2314    if (l > 256) {
2315        /* max 256 */
2316        l = 256;
2317        cc = 3;
2318    } else if (!l) {
2319        return cc;
2320    }
2321
2322    /* TODO: Access key handling */
2323    srca = access_prepare(env, a2, l, MMU_DATA_LOAD, MMU_SECONDARY_IDX, ra);
2324    desta = access_prepare(env, a1, l, MMU_DATA_STORE, MMU_PRIMARY_IDX, ra);
2325    access_memmove(env, &desta, &srca, ra);
2326    return cc;
2327}
2328
2329void HELPER(idte)(CPUS390XState *env, uint64_t r1, uint64_t r2, uint32_t m4)
2330{
2331    CPUState *cs = env_cpu(env);
2332    const uintptr_t ra = GETPC();
2333    uint64_t table, entry, raddr;
2334    uint16_t entries, i, index = 0;
2335
2336    if (r2 & 0xff000) {
2337        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2338    }
2339
2340    if (!(r2 & 0x800)) {
2341        /* invalidation-and-clearing operation */
2342        table = r1 & ASCE_ORIGIN;
2343        entries = (r2 & 0x7ff) + 1;
2344
2345        switch (r1 & ASCE_TYPE_MASK) {
2346        case ASCE_TYPE_REGION1:
2347            index = (r2 >> 53) & 0x7ff;
2348            break;
2349        case ASCE_TYPE_REGION2:
2350            index = (r2 >> 42) & 0x7ff;
2351            break;
2352        case ASCE_TYPE_REGION3:
2353            index = (r2 >> 31) & 0x7ff;
2354            break;
2355        case ASCE_TYPE_SEGMENT:
2356            index = (r2 >> 20) & 0x7ff;
2357            break;
2358        }
2359        for (i = 0; i < entries; i++) {
2360            /* addresses are not wrapped in 24/31bit mode but table index is */
2361            raddr = table + ((index + i) & 0x7ff) * sizeof(entry);
2362            entry = cpu_ldq_mmuidx_ra(env, raddr, MMU_REAL_IDX, ra);
2363            if (!(entry & REGION_ENTRY_I)) {
2364                /* we are allowed to not store if already invalid */
2365                entry |= REGION_ENTRY_I;
2366                cpu_stq_mmuidx_ra(env, raddr, entry, MMU_REAL_IDX, ra);
2367            }
2368        }
2369    }
2370
2371    /* We simply flush the complete tlb, therefore we can ignore r3. */
2372    if (m4 & 1) {
2373        tlb_flush(cs);
2374    } else {
2375        tlb_flush_all_cpus_synced(cs);
2376    }
2377}
2378
2379/* invalidate pte */
2380void HELPER(ipte)(CPUS390XState *env, uint64_t pto, uint64_t vaddr,
2381                  uint32_t m4)
2382{
2383    CPUState *cs = env_cpu(env);
2384    const uintptr_t ra = GETPC();
2385    uint64_t page = vaddr & TARGET_PAGE_MASK;
2386    uint64_t pte_addr, pte;
2387
2388    /* Compute the page table entry address */
2389    pte_addr = (pto & SEGMENT_ENTRY_ORIGIN);
2390    pte_addr += VADDR_PAGE_TX(vaddr) * 8;
2391
2392    /* Mark the page table entry as invalid */
2393    pte = cpu_ldq_mmuidx_ra(env, pte_addr, MMU_REAL_IDX, ra);
2394    pte |= PAGE_ENTRY_I;
2395    cpu_stq_mmuidx_ra(env, pte_addr, pte, MMU_REAL_IDX, ra);
2396
2397    /* XXX we exploit the fact that Linux passes the exact virtual
2398       address here - it's not obliged to! */
2399    if (m4 & 1) {
2400        if (vaddr & ~VADDR_PAGE_TX_MASK) {
2401            tlb_flush_page(cs, page);
2402            /* XXX 31-bit hack */
2403            tlb_flush_page(cs, page ^ 0x80000000);
2404        } else {
2405            /* looks like we don't have a valid virtual address */
2406            tlb_flush(cs);
2407        }
2408    } else {
2409        if (vaddr & ~VADDR_PAGE_TX_MASK) {
2410            tlb_flush_page_all_cpus_synced(cs, page);
2411            /* XXX 31-bit hack */
2412            tlb_flush_page_all_cpus_synced(cs, page ^ 0x80000000);
2413        } else {
2414            /* looks like we don't have a valid virtual address */
2415            tlb_flush_all_cpus_synced(cs);
2416        }
2417    }
2418}
2419
2420/* flush local tlb */
2421void HELPER(ptlb)(CPUS390XState *env)
2422{
2423    tlb_flush(env_cpu(env));
2424}
2425
2426/* flush global tlb */
2427void HELPER(purge)(CPUS390XState *env)
2428{
2429    tlb_flush_all_cpus_synced(env_cpu(env));
2430}
2431
2432/* load real address */
2433uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr)
2434{
2435    uint64_t asc = env->psw.mask & PSW_MASK_ASC;
2436    uint64_t ret, tec;
2437    int flags, exc, cc;
2438
2439    /* XXX incomplete - has more corner cases */
2440    if (!(env->psw.mask & PSW_MASK_64) && (addr >> 32)) {
2441        tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, GETPC());
2442    }
2443
2444    exc = mmu_translate(env, addr, 0, asc, &ret, &flags, &tec);
2445    if (exc) {
2446        cc = 3;
2447        ret = exc | 0x80000000;
2448    } else {
2449        cc = 0;
2450        ret |= addr & ~TARGET_PAGE_MASK;
2451    }
2452
2453    env->cc_op = cc;
2454    return ret;
2455}
2456#endif
2457
2458/* load pair from quadword */
2459uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr)
2460{
2461    uintptr_t ra = GETPC();
2462    uint64_t hi, lo;
2463
2464    check_alignment(env, addr, 16, ra);
2465    hi = cpu_ldq_data_ra(env, addr + 0, ra);
2466    lo = cpu_ldq_data_ra(env, addr + 8, ra);
2467
2468    env->retxl = lo;
2469    return hi;
2470}
2471
2472uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr)
2473{
2474    uintptr_t ra = GETPC();
2475    uint64_t hi, lo;
2476    int mem_idx;
2477    TCGMemOpIdx oi;
2478    Int128 v;
2479
2480    assert(HAVE_ATOMIC128);
2481
2482    mem_idx = cpu_mmu_index(env, false);
2483    oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
2484    v = cpu_atomic_ldo_be_mmu(env, addr, oi, ra);
2485    hi = int128_gethi(v);
2486    lo = int128_getlo(v);
2487
2488    env->retxl = lo;
2489    return hi;
2490}
2491
2492/* store pair to quadword */
2493void HELPER(stpq)(CPUS390XState *env, uint64_t addr,
2494                  uint64_t low, uint64_t high)
2495{
2496    uintptr_t ra = GETPC();
2497
2498    check_alignment(env, addr, 16, ra);
2499    cpu_stq_data_ra(env, addr + 0, high, ra);
2500    cpu_stq_data_ra(env, addr + 8, low, ra);
2501}
2502
2503void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr,
2504                           uint64_t low, uint64_t high)
2505{
2506    uintptr_t ra = GETPC();
2507    int mem_idx;
2508    TCGMemOpIdx oi;
2509    Int128 v;
2510
2511    assert(HAVE_ATOMIC128);
2512
2513    mem_idx = cpu_mmu_index(env, false);
2514    oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
2515    v = int128_make128(low, high);
2516    cpu_atomic_sto_be_mmu(env, addr, v, oi, ra);
2517}
2518
2519/* Execute instruction.  This instruction executes an insn modified with
2520   the contents of r1.  It does not change the executed instruction in memory;
2521   it does not change the program counter.
2522
2523   Perform this by recording the modified instruction in env->ex_value.
2524   This will be noticed by cpu_get_tb_cpu_state and thus tb translation.
2525*/
2526void HELPER(ex)(CPUS390XState *env, uint32_t ilen, uint64_t r1, uint64_t addr)
2527{
2528    uint64_t insn = cpu_lduw_code(env, addr);
2529    uint8_t opc = insn >> 8;
2530
2531    /* Or in the contents of R1[56:63].  */
2532    insn |= r1 & 0xff;
2533
2534    /* Load the rest of the instruction.  */
2535    insn <<= 48;
2536    switch (get_ilen(opc)) {
2537    case 2:
2538        break;
2539    case 4:
2540        insn |= (uint64_t)cpu_lduw_code(env, addr + 2) << 32;
2541        break;
2542    case 6:
2543        insn |= (uint64_t)(uint32_t)cpu_ldl_code(env, addr + 2) << 16;
2544        break;
2545    default:
2546        g_assert_not_reached();
2547    }
2548
2549    /* The very most common cases can be sped up by avoiding a new TB.  */
2550    if ((opc & 0xf0) == 0xd0) {
2551        typedef uint32_t (*dx_helper)(CPUS390XState *, uint32_t, uint64_t,
2552                                      uint64_t, uintptr_t);
2553        static const dx_helper dx[16] = {
2554            [0x0] = do_helper_trt_bkwd,
2555            [0x2] = do_helper_mvc,
2556            [0x4] = do_helper_nc,
2557            [0x5] = do_helper_clc,
2558            [0x6] = do_helper_oc,
2559            [0x7] = do_helper_xc,
2560            [0xc] = do_helper_tr,
2561            [0xd] = do_helper_trt_fwd,
2562        };
2563        dx_helper helper = dx[opc & 0xf];
2564
2565        if (helper) {
2566            uint32_t l = extract64(insn, 48, 8);
2567            uint32_t b1 = extract64(insn, 44, 4);
2568            uint32_t d1 = extract64(insn, 32, 12);
2569            uint32_t b2 = extract64(insn, 28, 4);
2570            uint32_t d2 = extract64(insn, 16, 12);
2571            uint64_t a1 = wrap_address(env, (b1 ? env->regs[b1] : 0) + d1);
2572            uint64_t a2 = wrap_address(env, (b2 ? env->regs[b2] : 0) + d2);
2573
2574            env->cc_op = helper(env, l, a1, a2, 0);
2575            env->psw.addr += ilen;
2576            return;
2577        }
2578    } else if (opc == 0x0a) {
2579        env->int_svc_code = extract64(insn, 48, 8);
2580        env->int_svc_ilen = ilen;
2581        helper_exception(env, EXCP_SVC);
2582        g_assert_not_reached();
2583    }
2584
2585    /* Record the insn we want to execute as well as the ilen to use
2586       during the execution of the target insn.  This will also ensure
2587       that ex_value is non-zero, which flags that we are in a state
2588       that requires such execution.  */
2589    env->ex_value = insn | ilen;
2590}
2591
2592uint32_t HELPER(mvcos)(CPUS390XState *env, uint64_t dest, uint64_t src,
2593                       uint64_t len)
2594{
2595    const uint8_t psw_key = (env->psw.mask & PSW_MASK_KEY) >> PSW_SHIFT_KEY;
2596    const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2597    const uint64_t r0 = env->regs[0];
2598    const uintptr_t ra = GETPC();
2599    uint8_t dest_key, dest_as, dest_k, dest_a;
2600    uint8_t src_key, src_as, src_k, src_a;
2601    uint64_t val;
2602    int cc = 0;
2603
2604    HELPER_LOG("%s dest %" PRIx64 ", src %" PRIx64 ", len %" PRIx64 "\n",
2605               __func__, dest, src, len);
2606
2607    if (!(env->psw.mask & PSW_MASK_DAT)) {
2608        tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2609    }
2610
2611    /* OAC (operand access control) for the first operand -> dest */
2612    val = (r0 & 0xffff0000ULL) >> 16;
2613    dest_key = (val >> 12) & 0xf;
2614    dest_as = (val >> 6) & 0x3;
2615    dest_k = (val >> 1) & 0x1;
2616    dest_a = val & 0x1;
2617
2618    /* OAC (operand access control) for the second operand -> src */
2619    val = (r0 & 0x0000ffffULL);
2620    src_key = (val >> 12) & 0xf;
2621    src_as = (val >> 6) & 0x3;
2622    src_k = (val >> 1) & 0x1;
2623    src_a = val & 0x1;
2624
2625    if (!dest_k) {
2626        dest_key = psw_key;
2627    }
2628    if (!src_k) {
2629        src_key = psw_key;
2630    }
2631    if (!dest_a) {
2632        dest_as = psw_as;
2633    }
2634    if (!src_a) {
2635        src_as = psw_as;
2636    }
2637
2638    if (dest_a && dest_as == AS_HOME && (env->psw.mask & PSW_MASK_PSTATE)) {
2639        tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2640    }
2641    if (!(env->cregs[0] & CR0_SECONDARY) &&
2642        (dest_as == AS_SECONDARY || src_as == AS_SECONDARY)) {
2643        tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2644    }
2645    if (!psw_key_valid(env, dest_key) || !psw_key_valid(env, src_key)) {
2646        tcg_s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2647    }
2648
2649    len = wrap_length32(env, len);
2650    if (len > 4096) {
2651        cc = 3;
2652        len = 4096;
2653    }
2654
2655    /* FIXME: AR-mode and proper problem state mode (using PSW keys) missing */
2656    if (src_as == AS_ACCREG || dest_as == AS_ACCREG ||
2657        (env->psw.mask & PSW_MASK_PSTATE)) {
2658        qemu_log_mask(LOG_UNIMP, "%s: AR-mode and PSTATE support missing\n",
2659                      __func__);
2660        tcg_s390_program_interrupt(env, PGM_ADDRESSING, ra);
2661    }
2662
2663    /* FIXME: Access using correct keys and AR-mode */
2664    if (len) {
2665        S390Access srca = access_prepare(env, src, len, MMU_DATA_LOAD,
2666                                         mmu_idx_from_as(src_as), ra);
2667        S390Access desta = access_prepare(env, dest, len, MMU_DATA_STORE,
2668                                          mmu_idx_from_as(dest_as), ra);
2669
2670        access_memmove(env, &desta, &srca, ra);
2671    }
2672
2673    return cc;
2674}
2675
2676/* Decode a Unicode character.  A return value < 0 indicates success, storing
2677   the UTF-32 result into OCHAR and the input length into OLEN.  A return
2678   value >= 0 indicates failure, and the CC value to be returned.  */
2679typedef int (*decode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2680                                 uint64_t ilen, bool enh_check, uintptr_t ra,
2681                                 uint32_t *ochar, uint32_t *olen);
2682
2683/* Encode a Unicode character.  A return value < 0 indicates success, storing
2684   the bytes into ADDR and the output length into OLEN.  A return value >= 0
2685   indicates failure, and the CC value to be returned.  */
2686typedef int (*encode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2687                                 uint64_t ilen, uintptr_t ra, uint32_t c,
2688                                 uint32_t *olen);
2689
2690static int decode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2691                       bool enh_check, uintptr_t ra,
2692                       uint32_t *ochar, uint32_t *olen)
2693{
2694    uint8_t s0, s1, s2, s3;
2695    uint32_t c, l;
2696
2697    if (ilen < 1) {
2698        return 0;
2699    }
2700    s0 = cpu_ldub_data_ra(env, addr, ra);
2701    if (s0 <= 0x7f) {
2702        /* one byte character */
2703        l = 1;
2704        c = s0;
2705    } else if (s0 <= (enh_check ? 0xc1 : 0xbf)) {
2706        /* invalid character */
2707        return 2;
2708    } else if (s0 <= 0xdf) {
2709        /* two byte character */
2710        l = 2;
2711        if (ilen < 2) {
2712            return 0;
2713        }
2714        s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2715        c = s0 & 0x1f;
2716        c = (c << 6) | (s1 & 0x3f);
2717        if (enh_check && (s1 & 0xc0) != 0x80) {
2718            return 2;
2719        }
2720    } else if (s0 <= 0xef) {
2721        /* three byte character */
2722        l = 3;
2723        if (ilen < 3) {
2724            return 0;
2725        }
2726        s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2727        s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2728        c = s0 & 0x0f;
2729        c = (c << 6) | (s1 & 0x3f);
2730        c = (c << 6) | (s2 & 0x3f);
2731        /* Fold the byte-by-byte range descriptions in the PoO into
2732           tests against the complete value.  It disallows encodings
2733           that could be smaller, and the UTF-16 surrogates.  */
2734        if (enh_check
2735            && ((s1 & 0xc0) != 0x80
2736                || (s2 & 0xc0) != 0x80
2737                || c < 0x1000
2738                || (c >= 0xd800 && c <= 0xdfff))) {
2739            return 2;
2740        }
2741    } else if (s0 <= (enh_check ? 0xf4 : 0xf7)) {
2742        /* four byte character */
2743        l = 4;
2744        if (ilen < 4) {
2745            return 0;
2746        }
2747        s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2748        s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2749        s3 = cpu_ldub_data_ra(env, addr + 3, ra);
2750        c = s0 & 0x07;
2751        c = (c << 6) | (s1 & 0x3f);
2752        c = (c << 6) | (s2 & 0x3f);
2753        c = (c << 6) | (s3 & 0x3f);
2754        /* See above.  */
2755        if (enh_check
2756            && ((s1 & 0xc0) != 0x80
2757                || (s2 & 0xc0) != 0x80
2758                || (s3 & 0xc0) != 0x80
2759                || c < 0x010000
2760                || c > 0x10ffff)) {
2761            return 2;
2762        }
2763    } else {
2764        /* invalid character */
2765        return 2;
2766    }
2767
2768    *ochar = c;
2769    *olen = l;
2770    return -1;
2771}
2772
2773static int decode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2774                        bool enh_check, uintptr_t ra,
2775                        uint32_t *ochar, uint32_t *olen)
2776{
2777    uint16_t s0, s1;
2778    uint32_t c, l;
2779
2780    if (ilen < 2) {
2781        return 0;
2782    }
2783    s0 = cpu_lduw_data_ra(env, addr, ra);
2784    if ((s0 & 0xfc00) != 0xd800) {
2785        /* one word character */
2786        l = 2;
2787        c = s0;
2788    } else {
2789        /* two word character */
2790        l = 4;
2791        if (ilen < 4) {
2792            return 0;
2793        }
2794        s1 = cpu_lduw_data_ra(env, addr + 2, ra);
2795        c = extract32(s0, 6, 4) + 1;
2796        c = (c << 6) | (s0 & 0x3f);
2797        c = (c << 10) | (s1 & 0x3ff);
2798        if (enh_check && (s1 & 0xfc00) != 0xdc00) {
2799            /* invalid surrogate character */
2800            return 2;
2801        }
2802    }
2803
2804    *ochar = c;
2805    *olen = l;
2806    return -1;
2807}
2808
2809static int decode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2810                        bool enh_check, uintptr_t ra,
2811                        uint32_t *ochar, uint32_t *olen)
2812{
2813    uint32_t c;
2814
2815    if (ilen < 4) {
2816        return 0;
2817    }
2818    c = cpu_ldl_data_ra(env, addr, ra);
2819    if ((c >= 0xd800 && c <= 0xdbff) || c > 0x10ffff) {
2820        /* invalid unicode character */
2821        return 2;
2822    }
2823
2824    *ochar = c;
2825    *olen = 4;
2826    return -1;
2827}
2828
2829static int encode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2830                       uintptr_t ra, uint32_t c, uint32_t *olen)
2831{
2832    uint8_t d[4];
2833    uint32_t l, i;
2834
2835    if (c <= 0x7f) {
2836        /* one byte character */
2837        l = 1;
2838        d[0] = c;
2839    } else if (c <= 0x7ff) {
2840        /* two byte character */
2841        l = 2;
2842        d[1] = 0x80 | extract32(c, 0, 6);
2843        d[0] = 0xc0 | extract32(c, 6, 5);
2844    } else if (c <= 0xffff) {
2845        /* three byte character */
2846        l = 3;
2847        d[2] = 0x80 | extract32(c, 0, 6);
2848        d[1] = 0x80 | extract32(c, 6, 6);
2849        d[0] = 0xe0 | extract32(c, 12, 4);
2850    } else {
2851        /* four byte character */
2852        l = 4;
2853        d[3] = 0x80 | extract32(c, 0, 6);
2854        d[2] = 0x80 | extract32(c, 6, 6);
2855        d[1] = 0x80 | extract32(c, 12, 6);
2856        d[0] = 0xf0 | extract32(c, 18, 3);
2857    }
2858
2859    if (ilen < l) {
2860        return 1;
2861    }
2862    for (i = 0; i < l; ++i) {
2863        cpu_stb_data_ra(env, addr + i, d[i], ra);
2864    }
2865
2866    *olen = l;
2867    return -1;
2868}
2869
2870static int encode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2871                        uintptr_t ra, uint32_t c, uint32_t *olen)
2872{
2873    uint16_t d0, d1;
2874
2875    if (c <= 0xffff) {
2876        /* one word character */
2877        if (ilen < 2) {
2878            return 1;
2879        }
2880        cpu_stw_data_ra(env, addr, c, ra);
2881        *olen = 2;
2882    } else {
2883        /* two word character */
2884        if (ilen < 4) {
2885            return 1;
2886        }
2887        d1 = 0xdc00 | extract32(c, 0, 10);
2888        d0 = 0xd800 | extract32(c, 10, 6);
2889        d0 = deposit32(d0, 6, 4, extract32(c, 16, 5) - 1);
2890        cpu_stw_data_ra(env, addr + 0, d0, ra);
2891        cpu_stw_data_ra(env, addr + 2, d1, ra);
2892        *olen = 4;
2893    }
2894
2895    return -1;
2896}
2897
2898static int encode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2899                        uintptr_t ra, uint32_t c, uint32_t *olen)
2900{
2901    if (ilen < 4) {
2902        return 1;
2903    }
2904    cpu_stl_data_ra(env, addr, c, ra);
2905    *olen = 4;
2906    return -1;
2907}
2908
2909static inline uint32_t convert_unicode(CPUS390XState *env, uint32_t r1,
2910                                       uint32_t r2, uint32_t m3, uintptr_t ra,
2911                                       decode_unicode_fn decode,
2912                                       encode_unicode_fn encode)
2913{
2914    uint64_t dst = get_address(env, r1);
2915    uint64_t dlen = get_length(env, r1 + 1);
2916    uint64_t src = get_address(env, r2);
2917    uint64_t slen = get_length(env, r2 + 1);
2918    bool enh_check = m3 & 1;
2919    int cc, i;
2920
2921    /* Lest we fail to service interrupts in a timely manner, limit the
2922       amount of work we're willing to do.  For now, let's cap at 256.  */
2923    for (i = 0; i < 256; ++i) {
2924        uint32_t c, ilen, olen;
2925
2926        cc = decode(env, src, slen, enh_check, ra, &c, &ilen);
2927        if (unlikely(cc >= 0)) {
2928            break;
2929        }
2930        cc = encode(env, dst, dlen, ra, c, &olen);
2931        if (unlikely(cc >= 0)) {
2932            break;
2933        }
2934
2935        src += ilen;
2936        slen -= ilen;
2937        dst += olen;
2938        dlen -= olen;
2939        cc = 3;
2940    }
2941
2942    set_address(env, r1, dst);
2943    set_length(env, r1 + 1, dlen);
2944    set_address(env, r2, src);
2945    set_length(env, r2 + 1, slen);
2946
2947    return cc;
2948}
2949
2950uint32_t HELPER(cu12)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2951{
2952    return convert_unicode(env, r1, r2, m3, GETPC(),
2953                           decode_utf8, encode_utf16);
2954}
2955
2956uint32_t HELPER(cu14)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2957{
2958    return convert_unicode(env, r1, r2, m3, GETPC(),
2959                           decode_utf8, encode_utf32);
2960}
2961
2962uint32_t HELPER(cu21)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2963{
2964    return convert_unicode(env, r1, r2, m3, GETPC(),
2965                           decode_utf16, encode_utf8);
2966}
2967
2968uint32_t HELPER(cu24)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2969{
2970    return convert_unicode(env, r1, r2, m3, GETPC(),
2971                           decode_utf16, encode_utf32);
2972}
2973
2974uint32_t HELPER(cu41)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2975{
2976    return convert_unicode(env, r1, r2, m3, GETPC(),
2977                           decode_utf32, encode_utf8);
2978}
2979
2980uint32_t HELPER(cu42)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2981{
2982    return convert_unicode(env, r1, r2, m3, GETPC(),
2983                           decode_utf32, encode_utf16);
2984}
2985
2986void probe_write_access(CPUS390XState *env, uint64_t addr, uint64_t len,
2987                        uintptr_t ra)
2988{
2989    /* test the actual access, not just any access to the page due to LAP */
2990    while (len) {
2991        const uint64_t pagelen = -(addr | TARGET_PAGE_MASK);
2992        const uint64_t curlen = MIN(pagelen, len);
2993
2994        probe_write(env, addr, curlen, cpu_mmu_index(env, false), ra);
2995        addr = wrap_address(env, addr + curlen);
2996        len -= curlen;
2997    }
2998}
2999
3000void HELPER(probe_write_access)(CPUS390XState *env, uint64_t addr, uint64_t len)
3001{
3002    probe_write_access(env, addr, len, GETPC());
3003}
3004