qemu/target/s390x/tcg/mem_helper.c
<<
>>
Prefs
   1/*
   2 *  S/390 memory access helper routines
   3 *
   4 *  Copyright (c) 2009 Ulrich Hecht
   5 *  Copyright (c) 2009 Alexander Graf
   6 *
   7 * This library is free software; you can redistribute it and/or
   8 * modify it under the terms of the GNU Lesser General Public
   9 * License as published by the Free Software Foundation; either
  10 * version 2.1 of the License, or (at your option) any later version.
  11 *
  12 * This library is distributed in the hope that it will be useful,
  13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 * Lesser General Public License for more details.
  16 *
  17 * You should have received a copy of the GNU Lesser General Public
  18 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  19 */
  20
  21#include "qemu/osdep.h"
  22#include "qemu/log.h"
  23#include "cpu.h"
  24#include "s390x-internal.h"
  25#include "tcg_s390x.h"
  26#include "exec/helper-proto.h"
  27#include "exec/exec-all.h"
  28#include "exec/cpu_ldst.h"
  29#include "hw/core/tcg-cpu-ops.h"
  30#include "qemu/int128.h"
  31#include "qemu/atomic128.h"
  32#include "trace.h"
  33
  34#if !defined(CONFIG_USER_ONLY)
  35#include "hw/s390x/storage-keys.h"
  36#include "hw/boards.h"
  37#endif
  38
  39#ifdef CONFIG_USER_ONLY
  40# define user_or_likely(X)    true
  41#else
  42# define user_or_likely(X)    likely(X)
  43#endif
  44
  45/*****************************************************************************/
  46/* Softmmu support */
  47
  48/* #define DEBUG_HELPER */
  49#ifdef DEBUG_HELPER
  50#define HELPER_LOG(x...) qemu_log(x)
  51#else
  52#define HELPER_LOG(x...)
  53#endif
  54
  55static inline bool psw_key_valid(CPUS390XState *env, uint8_t psw_key)
  56{
  57    uint16_t pkm = env->cregs[3] >> 16;
  58
  59    if (env->psw.mask & PSW_MASK_PSTATE) {
  60        /* PSW key has range 0..15, it is valid if the bit is 1 in the PKM */
  61        return pkm & (0x8000 >> psw_key);
  62    }
  63    return true;
  64}
  65
  66static bool is_destructive_overlap(CPUS390XState *env, uint64_t dest,
  67                                   uint64_t src, uint32_t len)
  68{
  69    if (!len || src == dest) {
  70        return false;
  71    }
  72    /* Take care of wrapping at the end of address space. */
  73    if (unlikely(wrap_address(env, src + len - 1) < src)) {
  74        return dest > src || dest <= wrap_address(env, src + len - 1);
  75    }
  76    return dest > src && dest <= src + len - 1;
  77}
  78
  79/* Trigger a SPECIFICATION exception if an address or a length is not
  80   naturally aligned.  */
  81static inline void check_alignment(CPUS390XState *env, uint64_t v,
  82                                   int wordsize, uintptr_t ra)
  83{
  84    if (v % wordsize) {
  85        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
  86    }
  87}
  88
  89/* Load a value from memory according to its size.  */
  90static inline uint64_t cpu_ldusize_data_ra(CPUS390XState *env, uint64_t addr,
  91                                           int wordsize, uintptr_t ra)
  92{
  93    switch (wordsize) {
  94    case 1:
  95        return cpu_ldub_data_ra(env, addr, ra);
  96    case 2:
  97        return cpu_lduw_data_ra(env, addr, ra);
  98    default:
  99        abort();
 100    }
 101}
 102
 103/* Store a to memory according to its size.  */
 104static inline void cpu_stsize_data_ra(CPUS390XState *env, uint64_t addr,
 105                                      uint64_t value, int wordsize,
 106                                      uintptr_t ra)
 107{
 108    switch (wordsize) {
 109    case 1:
 110        cpu_stb_data_ra(env, addr, value, ra);
 111        break;
 112    case 2:
 113        cpu_stw_data_ra(env, addr, value, ra);
 114        break;
 115    default:
 116        abort();
 117    }
 118}
 119
 120/* An access covers at most 4096 bytes and therefore at most two pages. */
 121typedef struct S390Access {
 122    target_ulong vaddr1;
 123    target_ulong vaddr2;
 124    void *haddr1;
 125    void *haddr2;
 126    uint16_t size1;
 127    uint16_t size2;
 128    /*
 129     * If we can't access the host page directly, we'll have to do I/O access
 130     * via ld/st helpers. These are internal details, so we store the
 131     * mmu idx to do the access here instead of passing it around in the
 132     * helpers.
 133     */
 134    int mmu_idx;
 135} S390Access;
 136
 137/*
 138 * With nonfault=1, return the PGM_ exception that would have been injected
 139 * into the guest; return 0 if no exception was detected.
 140 *
 141 * For !CONFIG_USER_ONLY, the TEC is stored stored to env->tlb_fill_tec.
 142 * For CONFIG_USER_ONLY, the faulting address is stored to env->__excp_addr.
 143 */
 144static inline int s390_probe_access(CPUArchState *env, target_ulong addr,
 145                                    int size, MMUAccessType access_type,
 146                                    int mmu_idx, bool nonfault,
 147                                    void **phost, uintptr_t ra)
 148{
 149    int flags = probe_access_flags(env, addr, 0, access_type, mmu_idx,
 150                                   nonfault, phost, ra);
 151
 152    if (unlikely(flags & TLB_INVALID_MASK)) {
 153#ifdef CONFIG_USER_ONLY
 154        /* Address is in TEC in system mode; see s390_cpu_record_sigsegv. */
 155        env->__excp_addr = addr & TARGET_PAGE_MASK;
 156        return (page_get_flags(addr) & PAGE_VALID
 157                ? PGM_PROTECTION : PGM_ADDRESSING);
 158#else
 159        return env->tlb_fill_exc;
 160#endif
 161    }
 162
 163#ifndef CONFIG_USER_ONLY
 164    if (unlikely(flags & TLB_WATCHPOINT)) {
 165        /* S390 does not presently use transaction attributes. */
 166        cpu_check_watchpoint(env_cpu(env), addr, size,
 167                             MEMTXATTRS_UNSPECIFIED,
 168                             (access_type == MMU_DATA_STORE
 169                              ? BP_MEM_WRITE : BP_MEM_READ), ra);
 170    }
 171#endif
 172
 173    return 0;
 174}
 175
 176static int access_prepare_nf(S390Access *access, CPUS390XState *env,
 177                             bool nonfault, vaddr vaddr1, int size,
 178                             MMUAccessType access_type,
 179                             int mmu_idx, uintptr_t ra)
 180{
 181    int size1, size2, exc;
 182
 183    assert(size > 0 && size <= 4096);
 184
 185    size1 = MIN(size, -(vaddr1 | TARGET_PAGE_MASK)),
 186    size2 = size - size1;
 187
 188    memset(access, 0, sizeof(*access));
 189    access->vaddr1 = vaddr1;
 190    access->size1 = size1;
 191    access->size2 = size2;
 192    access->mmu_idx = mmu_idx;
 193
 194    exc = s390_probe_access(env, vaddr1, size1, access_type, mmu_idx, nonfault,
 195                            &access->haddr1, ra);
 196    if (unlikely(exc)) {
 197        return exc;
 198    }
 199    if (unlikely(size2)) {
 200        /* The access crosses page boundaries. */
 201        vaddr vaddr2 = wrap_address(env, vaddr1 + size1);
 202
 203        access->vaddr2 = vaddr2;
 204        exc = s390_probe_access(env, vaddr2, size2, access_type, mmu_idx,
 205                                nonfault, &access->haddr2, ra);
 206        if (unlikely(exc)) {
 207            return exc;
 208        }
 209    }
 210    return 0;
 211}
 212
 213static inline void access_prepare(S390Access *ret, CPUS390XState *env,
 214                                  vaddr vaddr, int size,
 215                                  MMUAccessType access_type, int mmu_idx,
 216                                  uintptr_t ra)
 217{
 218    int exc = access_prepare_nf(ret, env, false, vaddr, size,
 219                                access_type, mmu_idx, ra);
 220    assert(!exc);
 221}
 222
 223/* Helper to handle memset on a single page. */
 224static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr,
 225                             uint8_t byte, uint16_t size, int mmu_idx,
 226                             uintptr_t ra)
 227{
 228#ifdef CONFIG_USER_ONLY
 229    memset(haddr, byte, size);
 230#else
 231    if (likely(haddr)) {
 232        memset(haddr, byte, size);
 233    } else {
 234        MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
 235        for (int i = 0; i < size; i++) {
 236            cpu_stb_mmu(env, vaddr + i, byte, oi, ra);
 237        }
 238    }
 239#endif
 240}
 241
 242static void access_memset(CPUS390XState *env, S390Access *desta,
 243                          uint8_t byte, uintptr_t ra)
 244{
 245
 246    do_access_memset(env, desta->vaddr1, desta->haddr1, byte, desta->size1,
 247                     desta->mmu_idx, ra);
 248    if (likely(!desta->size2)) {
 249        return;
 250    }
 251    do_access_memset(env, desta->vaddr2, desta->haddr2, byte, desta->size2,
 252                     desta->mmu_idx, ra);
 253}
 254
 255static uint8_t access_get_byte(CPUS390XState *env, S390Access *access,
 256                               int offset, uintptr_t ra)
 257{
 258    target_ulong vaddr = access->vaddr1;
 259    void *haddr = access->haddr1;
 260
 261    if (unlikely(offset >= access->size1)) {
 262        offset -= access->size1;
 263        vaddr = access->vaddr2;
 264        haddr = access->haddr2;
 265    }
 266
 267    if (user_or_likely(haddr)) {
 268        return ldub_p(haddr + offset);
 269    } else {
 270        MemOpIdx oi = make_memop_idx(MO_UB, access->mmu_idx);
 271        return cpu_ldb_mmu(env, vaddr + offset, oi, ra);
 272    }
 273}
 274
 275static void access_set_byte(CPUS390XState *env, S390Access *access,
 276                            int offset, uint8_t byte, uintptr_t ra)
 277{
 278    target_ulong vaddr = access->vaddr1;
 279    void *haddr = access->haddr1;
 280
 281    if (unlikely(offset >= access->size1)) {
 282        offset -= access->size1;
 283        vaddr = access->vaddr2;
 284        haddr = access->haddr2;
 285    }
 286
 287    if (user_or_likely(haddr)) {
 288        stb_p(haddr + offset, byte);
 289    } else {
 290        MemOpIdx oi = make_memop_idx(MO_UB, access->mmu_idx);
 291        cpu_stb_mmu(env, vaddr + offset, byte, oi, ra);
 292    }
 293}
 294
 295/*
 296 * Move data with the same semantics as memmove() in case ranges don't overlap
 297 * or src > dest. Undefined behavior on destructive overlaps.
 298 */
 299static void access_memmove(CPUS390XState *env, S390Access *desta,
 300                           S390Access *srca, uintptr_t ra)
 301{
 302    int len = desta->size1 + desta->size2;
 303    int diff;
 304
 305    assert(len == srca->size1 + srca->size2);
 306
 307    /* Fallback to slow access in case we don't have access to all host pages */
 308    if (unlikely(!desta->haddr1 || (desta->size2 && !desta->haddr2) ||
 309                 !srca->haddr1 || (srca->size2 && !srca->haddr2))) {
 310        int i;
 311
 312        for (i = 0; i < len; i++) {
 313            uint8_t byte = access_get_byte(env, srca, i, ra);
 314
 315            access_set_byte(env, desta, i, byte, ra);
 316        }
 317        return;
 318    }
 319
 320    diff = desta->size1 - srca->size1;
 321    if (likely(diff == 0)) {
 322        memmove(desta->haddr1, srca->haddr1, srca->size1);
 323        if (unlikely(srca->size2)) {
 324            memmove(desta->haddr2, srca->haddr2, srca->size2);
 325        }
 326    } else if (diff > 0) {
 327        memmove(desta->haddr1, srca->haddr1, srca->size1);
 328        memmove(desta->haddr1 + srca->size1, srca->haddr2, diff);
 329        if (likely(desta->size2)) {
 330            memmove(desta->haddr2, srca->haddr2 + diff, desta->size2);
 331        }
 332    } else {
 333        diff = -diff;
 334        memmove(desta->haddr1, srca->haddr1, desta->size1);
 335        memmove(desta->haddr2, srca->haddr1 + desta->size1, diff);
 336        if (likely(srca->size2)) {
 337            memmove(desta->haddr2 + diff, srca->haddr2, srca->size2);
 338        }
 339    }
 340}
 341
 342static int mmu_idx_from_as(uint8_t as)
 343{
 344    switch (as) {
 345    case AS_PRIMARY:
 346        return MMU_PRIMARY_IDX;
 347    case AS_SECONDARY:
 348        return MMU_SECONDARY_IDX;
 349    case AS_HOME:
 350        return MMU_HOME_IDX;
 351    default:
 352        /* FIXME AS_ACCREG */
 353        g_assert_not_reached();
 354    }
 355}
 356
 357/* and on array */
 358static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest,
 359                             uint64_t src, uintptr_t ra)
 360{
 361    const int mmu_idx = cpu_mmu_index(env, false);
 362    S390Access srca1, srca2, desta;
 363    uint32_t i;
 364    uint8_t c = 0;
 365
 366    HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
 367               __func__, l, dest, src);
 368
 369    /* NC always processes one more byte than specified - maximum is 256 */
 370    l++;
 371
 372    access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 373    access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
 374    access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 375    for (i = 0; i < l; i++) {
 376        const uint8_t x = access_get_byte(env, &srca1, i, ra) &
 377                          access_get_byte(env, &srca2, i, ra);
 378
 379        c |= x;
 380        access_set_byte(env, &desta, i, x, ra);
 381    }
 382    return c != 0;
 383}
 384
 385uint32_t HELPER(nc)(CPUS390XState *env, uint32_t l, uint64_t dest,
 386                    uint64_t src)
 387{
 388    return do_helper_nc(env, l, dest, src, GETPC());
 389}
 390
 391/* xor on array */
 392static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest,
 393                             uint64_t src, uintptr_t ra)
 394{
 395    const int mmu_idx = cpu_mmu_index(env, false);
 396    S390Access srca1, srca2, desta;
 397    uint32_t i;
 398    uint8_t c = 0;
 399
 400    HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
 401               __func__, l, dest, src);
 402
 403    /* XC always processes one more byte than specified - maximum is 256 */
 404    l++;
 405
 406    access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 407    access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
 408    access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 409
 410    /* xor with itself is the same as memset(0) */
 411    if (src == dest) {
 412        access_memset(env, &desta, 0, ra);
 413        return 0;
 414    }
 415
 416    for (i = 0; i < l; i++) {
 417        const uint8_t x = access_get_byte(env, &srca1, i, ra) ^
 418                          access_get_byte(env, &srca2, i, ra);
 419
 420        c |= x;
 421        access_set_byte(env, &desta, i, x, ra);
 422    }
 423    return c != 0;
 424}
 425
 426uint32_t HELPER(xc)(CPUS390XState *env, uint32_t l, uint64_t dest,
 427                    uint64_t src)
 428{
 429    return do_helper_xc(env, l, dest, src, GETPC());
 430}
 431
 432/* or on array */
 433static uint32_t do_helper_oc(CPUS390XState *env, uint32_t l, uint64_t dest,
 434                             uint64_t src, uintptr_t ra)
 435{
 436    const int mmu_idx = cpu_mmu_index(env, false);
 437    S390Access srca1, srca2, desta;
 438    uint32_t i;
 439    uint8_t c = 0;
 440
 441    HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
 442               __func__, l, dest, src);
 443
 444    /* OC always processes one more byte than specified - maximum is 256 */
 445    l++;
 446
 447    access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 448    access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
 449    access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 450    for (i = 0; i < l; i++) {
 451        const uint8_t x = access_get_byte(env, &srca1, i, ra) |
 452                          access_get_byte(env, &srca2, i, ra);
 453
 454        c |= x;
 455        access_set_byte(env, &desta, i, x, ra);
 456    }
 457    return c != 0;
 458}
 459
 460uint32_t HELPER(oc)(CPUS390XState *env, uint32_t l, uint64_t dest,
 461                    uint64_t src)
 462{
 463    return do_helper_oc(env, l, dest, src, GETPC());
 464}
 465
 466/* memmove */
 467static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
 468                              uint64_t src, uintptr_t ra)
 469{
 470    const int mmu_idx = cpu_mmu_index(env, false);
 471    S390Access srca, desta;
 472    uint32_t i;
 473
 474    HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
 475               __func__, l, dest, src);
 476
 477    /* MVC always copies one more byte than specified - maximum is 256 */
 478    l++;
 479
 480    access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 481    access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 482
 483    /*
 484     * "When the operands overlap, the result is obtained as if the operands
 485     * were processed one byte at a time". Only non-destructive overlaps
 486     * behave like memmove().
 487     */
 488    if (dest == src + 1) {
 489        access_memset(env, &desta, access_get_byte(env, &srca, 0, ra), ra);
 490    } else if (!is_destructive_overlap(env, dest, src, l)) {
 491        access_memmove(env, &desta, &srca, ra);
 492    } else {
 493        for (i = 0; i < l; i++) {
 494            uint8_t byte = access_get_byte(env, &srca, i, ra);
 495
 496            access_set_byte(env, &desta, i, byte, ra);
 497        }
 498    }
 499
 500    return env->cc_op;
 501}
 502
 503void HELPER(mvc)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 504{
 505    do_helper_mvc(env, l, dest, src, GETPC());
 506}
 507
 508/* move right to left */
 509void HELPER(mvcrl)(CPUS390XState *env, uint64_t l, uint64_t dest, uint64_t src)
 510{
 511    const int mmu_idx = cpu_mmu_index(env, false);
 512    const uint64_t ra = GETPC();
 513    S390Access srca, desta;
 514    int32_t i;
 515
 516    /* MVCRL always copies one more byte than specified - maximum is 256 */
 517    l &= 0xff;
 518    l++;
 519
 520    access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 521    access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 522
 523    for (i = l - 1; i >= 0; i--) {
 524        uint8_t byte = access_get_byte(env, &srca, i, ra);
 525        access_set_byte(env, &desta, i, byte, ra);
 526    }
 527}
 528
 529/* move inverse  */
 530void HELPER(mvcin)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 531{
 532    const int mmu_idx = cpu_mmu_index(env, false);
 533    S390Access srca, desta;
 534    uintptr_t ra = GETPC();
 535    int i;
 536
 537    /* MVCIN always copies one more byte than specified - maximum is 256 */
 538    l++;
 539
 540    src = wrap_address(env, src - l + 1);
 541    access_prepare(&srca, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 542    access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 543    for (i = 0; i < l; i++) {
 544        const uint8_t x = access_get_byte(env, &srca, l - i - 1, ra);
 545
 546        access_set_byte(env, &desta, i, x, ra);
 547    }
 548}
 549
 550/* move numerics  */
 551void HELPER(mvn)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 552{
 553    const int mmu_idx = cpu_mmu_index(env, false);
 554    S390Access srca1, srca2, desta;
 555    uintptr_t ra = GETPC();
 556    int i;
 557
 558    /* MVN always copies one more byte than specified - maximum is 256 */
 559    l++;
 560
 561    access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 562    access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
 563    access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 564    for (i = 0; i < l; i++) {
 565        const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0x0f) |
 566                          (access_get_byte(env, &srca2, i, ra) & 0xf0);
 567
 568        access_set_byte(env, &desta, i, x, ra);
 569    }
 570}
 571
 572/* move with offset  */
 573void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 574{
 575    const int mmu_idx = cpu_mmu_index(env, false);
 576    /* MVO always processes one more byte than specified - maximum is 16 */
 577    const int len_dest = (l >> 4) + 1;
 578    const int len_src = (l & 0xf) + 1;
 579    uintptr_t ra = GETPC();
 580    uint8_t byte_dest, byte_src;
 581    S390Access srca, desta;
 582    int i, j;
 583
 584    access_prepare(&srca, env, src, len_src, MMU_DATA_LOAD, mmu_idx, ra);
 585    access_prepare(&desta, env, dest, len_dest, MMU_DATA_STORE, mmu_idx, ra);
 586
 587    /* Handle rightmost byte */
 588    byte_dest = cpu_ldub_data_ra(env, dest + len_dest - 1, ra);
 589    byte_src = access_get_byte(env, &srca, len_src - 1, ra);
 590    byte_dest = (byte_dest & 0x0f) | (byte_src << 4);
 591    access_set_byte(env, &desta, len_dest - 1, byte_dest, ra);
 592
 593    /* Process remaining bytes from right to left */
 594    for (i = len_dest - 2, j = len_src - 2; i >= 0; i--, j--) {
 595        byte_dest = byte_src >> 4;
 596        if (j >= 0) {
 597            byte_src = access_get_byte(env, &srca, j, ra);
 598        } else {
 599            byte_src = 0;
 600        }
 601        byte_dest |= byte_src << 4;
 602        access_set_byte(env, &desta, i, byte_dest, ra);
 603    }
 604}
 605
 606/* move zones  */
 607void HELPER(mvz)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 608{
 609    const int mmu_idx = cpu_mmu_index(env, false);
 610    S390Access srca1, srca2, desta;
 611    uintptr_t ra = GETPC();
 612    int i;
 613
 614    /* MVZ always copies one more byte than specified - maximum is 256 */
 615    l++;
 616
 617    access_prepare(&srca1, env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 618    access_prepare(&srca2, env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
 619    access_prepare(&desta, env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 620    for (i = 0; i < l; i++) {
 621        const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0xf0) |
 622                          (access_get_byte(env, &srca2, i, ra) & 0x0f);
 623
 624        access_set_byte(env, &desta, i, x, ra);
 625    }
 626}
 627
 628/* compare unsigned byte arrays */
 629static uint32_t do_helper_clc(CPUS390XState *env, uint32_t l, uint64_t s1,
 630                              uint64_t s2, uintptr_t ra)
 631{
 632    uint32_t i;
 633    uint32_t cc = 0;
 634
 635    HELPER_LOG("%s l %d s1 %" PRIx64 " s2 %" PRIx64 "\n",
 636               __func__, l, s1, s2);
 637
 638    for (i = 0; i <= l; i++) {
 639        uint8_t x = cpu_ldub_data_ra(env, s1 + i, ra);
 640        uint8_t y = cpu_ldub_data_ra(env, s2 + i, ra);
 641        HELPER_LOG("%02x (%c)/%02x (%c) ", x, x, y, y);
 642        if (x < y) {
 643            cc = 1;
 644            break;
 645        } else if (x > y) {
 646            cc = 2;
 647            break;
 648        }
 649    }
 650
 651    HELPER_LOG("\n");
 652    return cc;
 653}
 654
 655uint32_t HELPER(clc)(CPUS390XState *env, uint32_t l, uint64_t s1, uint64_t s2)
 656{
 657    return do_helper_clc(env, l, s1, s2, GETPC());
 658}
 659
 660/* compare logical under mask */
 661uint32_t HELPER(clm)(CPUS390XState *env, uint32_t r1, uint32_t mask,
 662                     uint64_t addr)
 663{
 664    uintptr_t ra = GETPC();
 665    uint32_t cc = 0;
 666
 667    HELPER_LOG("%s: r1 0x%x mask 0x%x addr 0x%" PRIx64 "\n", __func__, r1,
 668               mask, addr);
 669
 670    if (!mask) {
 671        /* Recognize access exceptions for the first byte */
 672        probe_read(env, addr, 1, cpu_mmu_index(env, false), ra);
 673    }
 674
 675    while (mask) {
 676        if (mask & 8) {
 677            uint8_t d = cpu_ldub_data_ra(env, addr, ra);
 678            uint8_t r = extract32(r1, 24, 8);
 679            HELPER_LOG("mask 0x%x %02x/%02x (0x%" PRIx64 ") ", mask, r, d,
 680                       addr);
 681            if (r < d) {
 682                cc = 1;
 683                break;
 684            } else if (r > d) {
 685                cc = 2;
 686                break;
 687            }
 688            addr++;
 689        }
 690        mask = (mask << 1) & 0xf;
 691        r1 <<= 8;
 692    }
 693
 694    HELPER_LOG("\n");
 695    return cc;
 696}
 697
 698static inline uint64_t get_address(CPUS390XState *env, int reg)
 699{
 700    return wrap_address(env, env->regs[reg]);
 701}
 702
 703/*
 704 * Store the address to the given register, zeroing out unused leftmost
 705 * bits in bit positions 32-63 (24-bit and 31-bit mode only).
 706 */
 707static inline void set_address_zero(CPUS390XState *env, int reg,
 708                                    uint64_t address)
 709{
 710    if (env->psw.mask & PSW_MASK_64) {
 711        env->regs[reg] = address;
 712    } else {
 713        if (!(env->psw.mask & PSW_MASK_32)) {
 714            address &= 0x00ffffff;
 715        } else {
 716            address &= 0x7fffffff;
 717        }
 718        env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
 719    }
 720}
 721
 722static inline void set_address(CPUS390XState *env, int reg, uint64_t address)
 723{
 724    if (env->psw.mask & PSW_MASK_64) {
 725        /* 64-Bit mode */
 726        env->regs[reg] = address;
 727    } else {
 728        if (!(env->psw.mask & PSW_MASK_32)) {
 729            /* 24-Bit mode. According to the PoO it is implementation
 730            dependent if bits 32-39 remain unchanged or are set to
 731            zeros.  Choose the former so that the function can also be
 732            used for TRT.  */
 733            env->regs[reg] = deposit64(env->regs[reg], 0, 24, address);
 734        } else {
 735            /* 31-Bit mode. According to the PoO it is implementation
 736            dependent if bit 32 remains unchanged or is set to zero.
 737            Choose the latter so that the function can also be used for
 738            TRT.  */
 739            address &= 0x7fffffff;
 740            env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
 741        }
 742    }
 743}
 744
 745static inline uint64_t wrap_length32(CPUS390XState *env, uint64_t length)
 746{
 747    if (!(env->psw.mask & PSW_MASK_64)) {
 748        return (uint32_t)length;
 749    }
 750    return length;
 751}
 752
 753static inline uint64_t wrap_length31(CPUS390XState *env, uint64_t length)
 754{
 755    if (!(env->psw.mask & PSW_MASK_64)) {
 756        /* 24-Bit and 31-Bit mode */
 757        length &= 0x7fffffff;
 758    }
 759    return length;
 760}
 761
 762static inline uint64_t get_length(CPUS390XState *env, int reg)
 763{
 764    return wrap_length31(env, env->regs[reg]);
 765}
 766
 767static inline void set_length(CPUS390XState *env, int reg, uint64_t length)
 768{
 769    if (env->psw.mask & PSW_MASK_64) {
 770        /* 64-Bit mode */
 771        env->regs[reg] = length;
 772    } else {
 773        /* 24-Bit and 31-Bit mode */
 774        env->regs[reg] = deposit64(env->regs[reg], 0, 32, length);
 775    }
 776}
 777
 778/* search string (c is byte to search, r2 is string, r1 end of string) */
 779void HELPER(srst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
 780{
 781    uintptr_t ra = GETPC();
 782    uint64_t end, str;
 783    uint32_t len;
 784    uint8_t v, c = env->regs[0];
 785
 786    /* Bits 32-55 must contain all 0.  */
 787    if (env->regs[0] & 0xffffff00u) {
 788        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
 789    }
 790
 791    str = get_address(env, r2);
 792    end = get_address(env, r1);
 793
 794    /* Lest we fail to service interrupts in a timely manner, limit the
 795       amount of work we're willing to do.  For now, let's cap at 8k.  */
 796    for (len = 0; len < 0x2000; ++len) {
 797        if (str + len == end) {
 798            /* Character not found.  R1 & R2 are unmodified.  */
 799            env->cc_op = 2;
 800            return;
 801        }
 802        v = cpu_ldub_data_ra(env, str + len, ra);
 803        if (v == c) {
 804            /* Character found.  Set R1 to the location; R2 is unmodified.  */
 805            env->cc_op = 1;
 806            set_address(env, r1, str + len);
 807            return;
 808        }
 809    }
 810
 811    /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
 812    env->cc_op = 3;
 813    set_address(env, r2, str + len);
 814}
 815
 816void HELPER(srstu)(CPUS390XState *env, uint32_t r1, uint32_t r2)
 817{
 818    uintptr_t ra = GETPC();
 819    uint32_t len;
 820    uint16_t v, c = env->regs[0];
 821    uint64_t end, str, adj_end;
 822
 823    /* Bits 32-47 of R0 must be zero.  */
 824    if (env->regs[0] & 0xffff0000u) {
 825        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
 826    }
 827
 828    str = get_address(env, r2);
 829    end = get_address(env, r1);
 830
 831    /* If the LSB of the two addresses differ, use one extra byte.  */
 832    adj_end = end + ((str ^ end) & 1);
 833
 834    /* Lest we fail to service interrupts in a timely manner, limit the
 835       amount of work we're willing to do.  For now, let's cap at 8k.  */
 836    for (len = 0; len < 0x2000; len += 2) {
 837        if (str + len == adj_end) {
 838            /* End of input found.  */
 839            env->cc_op = 2;
 840            return;
 841        }
 842        v = cpu_lduw_data_ra(env, str + len, ra);
 843        if (v == c) {
 844            /* Character found.  Set R1 to the location; R2 is unmodified.  */
 845            env->cc_op = 1;
 846            set_address(env, r1, str + len);
 847            return;
 848        }
 849    }
 850
 851    /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
 852    env->cc_op = 3;
 853    set_address(env, r2, str + len);
 854}
 855
 856/* unsigned string compare (c is string terminator) */
 857Int128 HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2)
 858{
 859    uintptr_t ra = GETPC();
 860    uint32_t len;
 861
 862    c = c & 0xff;
 863    s1 = wrap_address(env, s1);
 864    s2 = wrap_address(env, s2);
 865
 866    /* Lest we fail to service interrupts in a timely manner, limit the
 867       amount of work we're willing to do.  For now, let's cap at 8k.  */
 868    for (len = 0; len < 0x2000; ++len) {
 869        uint8_t v1 = cpu_ldub_data_ra(env, s1 + len, ra);
 870        uint8_t v2 = cpu_ldub_data_ra(env, s2 + len, ra);
 871        if (v1 == v2) {
 872            if (v1 == c) {
 873                /* Equal.  CC=0, and don't advance the registers.  */
 874                env->cc_op = 0;
 875                return int128_make128(s2, s1);
 876            }
 877        } else {
 878            /* Unequal.  CC={1,2}, and advance the registers.  Note that
 879               the terminator need not be zero, but the string that contains
 880               the terminator is by definition "low".  */
 881            env->cc_op = (v1 == c ? 1 : v2 == c ? 2 : v1 < v2 ? 1 : 2);
 882            return int128_make128(s2 + len, s1 + len);
 883        }
 884    }
 885
 886    /* CPU-determined bytes equal; advance the registers.  */
 887    env->cc_op = 3;
 888    return int128_make128(s2 + len, s1 + len);
 889}
 890
 891/* move page */
 892uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint32_t r1, uint32_t r2)
 893{
 894    const uint64_t src = get_address(env, r2) & TARGET_PAGE_MASK;
 895    const uint64_t dst = get_address(env, r1) & TARGET_PAGE_MASK;
 896    const int mmu_idx = cpu_mmu_index(env, false);
 897    const bool f = extract64(r0, 11, 1);
 898    const bool s = extract64(r0, 10, 1);
 899    const bool cco = extract64(r0, 8, 1);
 900    uintptr_t ra = GETPC();
 901    S390Access srca, desta;
 902    int exc;
 903
 904    if ((f && s) || extract64(r0, 12, 4)) {
 905        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
 906    }
 907
 908    /*
 909     * We always manually handle exceptions such that we can properly store
 910     * r1/r2 to the lowcore on page-translation exceptions.
 911     *
 912     * TODO: Access key handling
 913     */
 914    exc = access_prepare_nf(&srca, env, true, src, TARGET_PAGE_SIZE,
 915                            MMU_DATA_LOAD, mmu_idx, ra);
 916    if (exc) {
 917        if (cco) {
 918            return 2;
 919        }
 920        goto inject_exc;
 921    }
 922    exc = access_prepare_nf(&desta, env, true, dst, TARGET_PAGE_SIZE,
 923                            MMU_DATA_STORE, mmu_idx, ra);
 924    if (exc) {
 925        if (cco && exc != PGM_PROTECTION) {
 926            return 1;
 927        }
 928        goto inject_exc;
 929    }
 930    access_memmove(env, &desta, &srca, ra);
 931    return 0; /* data moved */
 932inject_exc:
 933#if !defined(CONFIG_USER_ONLY)
 934    if (exc != PGM_ADDRESSING) {
 935        stq_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, trans_exc_code),
 936                 env->tlb_fill_tec);
 937    }
 938    if (exc == PGM_PAGE_TRANS) {
 939        stb_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, op_access_id),
 940                 r1 << 4 | r2);
 941    }
 942#endif
 943    tcg_s390_program_interrupt(env, exc, ra);
 944}
 945
 946/* string copy */
 947uint32_t HELPER(mvst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
 948{
 949    const int mmu_idx = cpu_mmu_index(env, false);
 950    const uint64_t d = get_address(env, r1);
 951    const uint64_t s = get_address(env, r2);
 952    const uint8_t c = env->regs[0];
 953    const int len = MIN(-(d | TARGET_PAGE_MASK), -(s | TARGET_PAGE_MASK));
 954    S390Access srca, desta;
 955    uintptr_t ra = GETPC();
 956    int i;
 957
 958    if (env->regs[0] & 0xffffff00ull) {
 959        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
 960    }
 961
 962    /*
 963     * Our access should not exceed single pages, as we must not report access
 964     * exceptions exceeding the actually copied range (which we don't know at
 965     * this point). We might over-indicate watchpoints within the pages
 966     * (if we ever care, we have to limit processing to a single byte).
 967     */
 968    access_prepare(&srca, env, s, len, MMU_DATA_LOAD, mmu_idx, ra);
 969    access_prepare(&desta, env, d, len, MMU_DATA_STORE, mmu_idx, ra);
 970    for (i = 0; i < len; i++) {
 971        const uint8_t v = access_get_byte(env, &srca, i, ra);
 972
 973        access_set_byte(env, &desta, i, v, ra);
 974        if (v == c) {
 975            set_address_zero(env, r1, d + i);
 976            return 1;
 977        }
 978    }
 979    set_address_zero(env, r1, d + len);
 980    set_address_zero(env, r2, s + len);
 981    return 3;
 982}
 983
 984/* load access registers r1 to r3 from memory at a2 */
 985void HELPER(lam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
 986{
 987    uintptr_t ra = GETPC();
 988    int i;
 989
 990    if (a2 & 0x3) {
 991        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
 992    }
 993
 994    for (i = r1;; i = (i + 1) % 16) {
 995        env->aregs[i] = cpu_ldl_data_ra(env, a2, ra);
 996        a2 += 4;
 997
 998        if (i == r3) {
 999            break;
1000        }
1001    }
1002}
1003
1004/* store access registers r1 to r3 in memory at a2 */
1005void HELPER(stam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1006{
1007    uintptr_t ra = GETPC();
1008    int i;
1009
1010    if (a2 & 0x3) {
1011        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1012    }
1013
1014    for (i = r1;; i = (i + 1) % 16) {
1015        cpu_stl_data_ra(env, a2, env->aregs[i], ra);
1016        a2 += 4;
1017
1018        if (i == r3) {
1019            break;
1020        }
1021    }
1022}
1023
1024/* move long helper */
1025static inline uint32_t do_mvcl(CPUS390XState *env,
1026                               uint64_t *dest, uint64_t *destlen,
1027                               uint64_t *src, uint64_t *srclen,
1028                               uint16_t pad, int wordsize, uintptr_t ra)
1029{
1030    const int mmu_idx = cpu_mmu_index(env, false);
1031    int len = MIN(*destlen, -(*dest | TARGET_PAGE_MASK));
1032    S390Access srca, desta;
1033    int i, cc;
1034
1035    if (*destlen == *srclen) {
1036        cc = 0;
1037    } else if (*destlen < *srclen) {
1038        cc = 1;
1039    } else {
1040        cc = 2;
1041    }
1042
1043    if (!*destlen) {
1044        return cc;
1045    }
1046
1047    /*
1048     * Only perform one type of type of operation (move/pad) at a time.
1049     * Stay within single pages.
1050     */
1051    if (*srclen) {
1052        /* Copy the src array */
1053        len = MIN(MIN(*srclen, -(*src | TARGET_PAGE_MASK)), len);
1054        *destlen -= len;
1055        *srclen -= len;
1056        access_prepare(&srca, env, *src, len, MMU_DATA_LOAD, mmu_idx, ra);
1057        access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1058        access_memmove(env, &desta, &srca, ra);
1059        *src = wrap_address(env, *src + len);
1060        *dest = wrap_address(env, *dest + len);
1061    } else if (wordsize == 1) {
1062        /* Pad the remaining area */
1063        *destlen -= len;
1064        access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1065        access_memset(env, &desta, pad, ra);
1066        *dest = wrap_address(env, *dest + len);
1067    } else {
1068        access_prepare(&desta, env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1069
1070        /* The remaining length selects the padding byte. */
1071        for (i = 0; i < len; (*destlen)--, i++) {
1072            if (*destlen & 1) {
1073                access_set_byte(env, &desta, i, pad, ra);
1074            } else {
1075                access_set_byte(env, &desta, i, pad >> 8, ra);
1076            }
1077        }
1078        *dest = wrap_address(env, *dest + len);
1079    }
1080
1081    return *destlen ? 3 : cc;
1082}
1083
1084/* move long */
1085uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1086{
1087    const int mmu_idx = cpu_mmu_index(env, false);
1088    uintptr_t ra = GETPC();
1089    uint64_t destlen = env->regs[r1 + 1] & 0xffffff;
1090    uint64_t dest = get_address(env, r1);
1091    uint64_t srclen = env->regs[r2 + 1] & 0xffffff;
1092    uint64_t src = get_address(env, r2);
1093    uint8_t pad = env->regs[r2 + 1] >> 24;
1094    CPUState *cs = env_cpu(env);
1095    S390Access srca, desta;
1096    uint32_t cc, cur_len;
1097
1098    if (is_destructive_overlap(env, dest, src, MIN(srclen, destlen))) {
1099        cc = 3;
1100    } else if (srclen == destlen) {
1101        cc = 0;
1102    } else if (destlen < srclen) {
1103        cc = 1;
1104    } else {
1105        cc = 2;
1106    }
1107
1108    /* We might have to zero-out some bits even if there was no action. */
1109    if (unlikely(!destlen || cc == 3)) {
1110        set_address_zero(env, r2, src);
1111        set_address_zero(env, r1, dest);
1112        return cc;
1113    } else if (!srclen) {
1114        set_address_zero(env, r2, src);
1115    }
1116
1117    /*
1118     * Only perform one type of type of operation (move/pad) in one step.
1119     * Stay within single pages.
1120     */
1121    while (destlen) {
1122        cur_len = MIN(destlen, -(dest | TARGET_PAGE_MASK));
1123        if (!srclen) {
1124            access_prepare(&desta, env, dest, cur_len,
1125                           MMU_DATA_STORE, mmu_idx, ra);
1126            access_memset(env, &desta, pad, ra);
1127        } else {
1128            cur_len = MIN(MIN(srclen, -(src | TARGET_PAGE_MASK)), cur_len);
1129
1130            access_prepare(&srca, env, src, cur_len,
1131                           MMU_DATA_LOAD, mmu_idx, ra);
1132            access_prepare(&desta, env, dest, cur_len,
1133                           MMU_DATA_STORE, mmu_idx, ra);
1134            access_memmove(env, &desta, &srca, ra);
1135            src = wrap_address(env, src + cur_len);
1136            srclen -= cur_len;
1137            env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen);
1138            set_address_zero(env, r2, src);
1139        }
1140        dest = wrap_address(env, dest + cur_len);
1141        destlen -= cur_len;
1142        env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, destlen);
1143        set_address_zero(env, r1, dest);
1144
1145        /*
1146         * MVCL is interruptible. Return to the main loop if requested after
1147         * writing back all state to registers. If no interrupt will get
1148         * injected, we'll end up back in this handler and continue processing
1149         * the remaining parts.
1150         */
1151        if (destlen && unlikely(cpu_loop_exit_requested(cs))) {
1152            cpu_loop_exit_restore(cs, ra);
1153        }
1154    }
1155    return cc;
1156}
1157
1158/* move long extended */
1159uint32_t HELPER(mvcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1160                       uint32_t r3)
1161{
1162    uintptr_t ra = GETPC();
1163    uint64_t destlen = get_length(env, r1 + 1);
1164    uint64_t dest = get_address(env, r1);
1165    uint64_t srclen = get_length(env, r3 + 1);
1166    uint64_t src = get_address(env, r3);
1167    uint8_t pad = a2;
1168    uint32_t cc;
1169
1170    cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 1, ra);
1171
1172    set_length(env, r1 + 1, destlen);
1173    set_length(env, r3 + 1, srclen);
1174    set_address(env, r1, dest);
1175    set_address(env, r3, src);
1176
1177    return cc;
1178}
1179
1180/* move long unicode */
1181uint32_t HELPER(mvclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1182                       uint32_t r3)
1183{
1184    uintptr_t ra = GETPC();
1185    uint64_t destlen = get_length(env, r1 + 1);
1186    uint64_t dest = get_address(env, r1);
1187    uint64_t srclen = get_length(env, r3 + 1);
1188    uint64_t src = get_address(env, r3);
1189    uint16_t pad = a2;
1190    uint32_t cc;
1191
1192    cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 2, ra);
1193
1194    set_length(env, r1 + 1, destlen);
1195    set_length(env, r3 + 1, srclen);
1196    set_address(env, r1, dest);
1197    set_address(env, r3, src);
1198
1199    return cc;
1200}
1201
1202/* compare logical long helper */
1203static inline uint32_t do_clcl(CPUS390XState *env,
1204                               uint64_t *src1, uint64_t *src1len,
1205                               uint64_t *src3, uint64_t *src3len,
1206                               uint16_t pad, uint64_t limit,
1207                               int wordsize, uintptr_t ra)
1208{
1209    uint64_t len = MAX(*src1len, *src3len);
1210    uint32_t cc = 0;
1211
1212    check_alignment(env, *src1len | *src3len, wordsize, ra);
1213
1214    if (!len) {
1215        return cc;
1216    }
1217
1218    /* Lest we fail to service interrupts in a timely manner, limit the
1219       amount of work we're willing to do.  */
1220    if (len > limit) {
1221        len = limit;
1222        cc = 3;
1223    }
1224
1225    for (; len; len -= wordsize) {
1226        uint16_t v1 = pad;
1227        uint16_t v3 = pad;
1228
1229        if (*src1len) {
1230            v1 = cpu_ldusize_data_ra(env, *src1, wordsize, ra);
1231        }
1232        if (*src3len) {
1233            v3 = cpu_ldusize_data_ra(env, *src3, wordsize, ra);
1234        }
1235
1236        if (v1 != v3) {
1237            cc = (v1 < v3) ? 1 : 2;
1238            break;
1239        }
1240
1241        if (*src1len) {
1242            *src1 += wordsize;
1243            *src1len -= wordsize;
1244        }
1245        if (*src3len) {
1246            *src3 += wordsize;
1247            *src3len -= wordsize;
1248        }
1249    }
1250
1251    return cc;
1252}
1253
1254
1255/* compare logical long */
1256uint32_t HELPER(clcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1257{
1258    uintptr_t ra = GETPC();
1259    uint64_t src1len = extract64(env->regs[r1 + 1], 0, 24);
1260    uint64_t src1 = get_address(env, r1);
1261    uint64_t src3len = extract64(env->regs[r2 + 1], 0, 24);
1262    uint64_t src3 = get_address(env, r2);
1263    uint8_t pad = env->regs[r2 + 1] >> 24;
1264    uint32_t cc;
1265
1266    cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, -1, 1, ra);
1267
1268    env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, src1len);
1269    env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, src3len);
1270    set_address(env, r1, src1);
1271    set_address(env, r2, src3);
1272
1273    return cc;
1274}
1275
1276/* compare logical long extended memcompare insn with padding */
1277uint32_t HELPER(clcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1278                       uint32_t r3)
1279{
1280    uintptr_t ra = GETPC();
1281    uint64_t src1len = get_length(env, r1 + 1);
1282    uint64_t src1 = get_address(env, r1);
1283    uint64_t src3len = get_length(env, r3 + 1);
1284    uint64_t src3 = get_address(env, r3);
1285    uint8_t pad = a2;
1286    uint32_t cc;
1287
1288    cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x2000, 1, ra);
1289
1290    set_length(env, r1 + 1, src1len);
1291    set_length(env, r3 + 1, src3len);
1292    set_address(env, r1, src1);
1293    set_address(env, r3, src3);
1294
1295    return cc;
1296}
1297
1298/* compare logical long unicode memcompare insn with padding */
1299uint32_t HELPER(clclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1300                       uint32_t r3)
1301{
1302    uintptr_t ra = GETPC();
1303    uint64_t src1len = get_length(env, r1 + 1);
1304    uint64_t src1 = get_address(env, r1);
1305    uint64_t src3len = get_length(env, r3 + 1);
1306    uint64_t src3 = get_address(env, r3);
1307    uint16_t pad = a2;
1308    uint32_t cc = 0;
1309
1310    cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x1000, 2, ra);
1311
1312    set_length(env, r1 + 1, src1len);
1313    set_length(env, r3 + 1, src3len);
1314    set_address(env, r1, src1);
1315    set_address(env, r3, src3);
1316
1317    return cc;
1318}
1319
1320/* checksum */
1321Int128 HELPER(cksm)(CPUS390XState *env, uint64_t r1,
1322                    uint64_t src, uint64_t src_len)
1323{
1324    uintptr_t ra = GETPC();
1325    uint64_t max_len, len;
1326    uint64_t cksm = (uint32_t)r1;
1327
1328    /* Lest we fail to service interrupts in a timely manner, limit the
1329       amount of work we're willing to do.  For now, let's cap at 8k.  */
1330    max_len = (src_len > 0x2000 ? 0x2000 : src_len);
1331
1332    /* Process full words as available.  */
1333    for (len = 0; len + 4 <= max_len; len += 4, src += 4) {
1334        cksm += (uint32_t)cpu_ldl_data_ra(env, src, ra);
1335    }
1336
1337    switch (max_len - len) {
1338    case 1:
1339        cksm += cpu_ldub_data_ra(env, src, ra) << 24;
1340        len += 1;
1341        break;
1342    case 2:
1343        cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1344        len += 2;
1345        break;
1346    case 3:
1347        cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1348        cksm += cpu_ldub_data_ra(env, src + 2, ra) << 8;
1349        len += 3;
1350        break;
1351    }
1352
1353    /* Fold the carry from the checksum.  Note that we can see carry-out
1354       during folding more than once (but probably not more than twice).  */
1355    while (cksm > 0xffffffffull) {
1356        cksm = (uint32_t)cksm + (cksm >> 32);
1357    }
1358
1359    /* Indicate whether or not we've processed everything.  */
1360    env->cc_op = (len == src_len ? 0 : 3);
1361
1362    /* Return both cksm and processed length.  */
1363    return int128_make128(cksm, len);
1364}
1365
1366void HELPER(pack)(CPUS390XState *env, uint32_t len, uint64_t dest, uint64_t src)
1367{
1368    uintptr_t ra = GETPC();
1369    int len_dest = len >> 4;
1370    int len_src = len & 0xf;
1371    uint8_t b;
1372
1373    dest += len_dest;
1374    src += len_src;
1375
1376    /* last byte is special, it only flips the nibbles */
1377    b = cpu_ldub_data_ra(env, src, ra);
1378    cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1379    src--;
1380    len_src--;
1381
1382    /* now pack every value */
1383    while (len_dest > 0) {
1384        b = 0;
1385
1386        if (len_src >= 0) {
1387            b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1388            src--;
1389            len_src--;
1390        }
1391        if (len_src >= 0) {
1392            b |= cpu_ldub_data_ra(env, src, ra) << 4;
1393            src--;
1394            len_src--;
1395        }
1396
1397        len_dest--;
1398        dest--;
1399        cpu_stb_data_ra(env, dest, b, ra);
1400    }
1401}
1402
1403static inline void do_pkau(CPUS390XState *env, uint64_t dest, uint64_t src,
1404                           uint32_t srclen, int ssize, uintptr_t ra)
1405{
1406    int i;
1407    /* The destination operand is always 16 bytes long.  */
1408    const int destlen = 16;
1409
1410    /* The operands are processed from right to left.  */
1411    src += srclen - 1;
1412    dest += destlen - 1;
1413
1414    for (i = 0; i < destlen; i++) {
1415        uint8_t b = 0;
1416
1417        /* Start with a positive sign */
1418        if (i == 0) {
1419            b = 0xc;
1420        } else if (srclen > ssize) {
1421            b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1422            src -= ssize;
1423            srclen -= ssize;
1424        }
1425
1426        if (srclen > ssize) {
1427            b |= cpu_ldub_data_ra(env, src, ra) << 4;
1428            src -= ssize;
1429            srclen -= ssize;
1430        }
1431
1432        cpu_stb_data_ra(env, dest, b, ra);
1433        dest--;
1434    }
1435}
1436
1437
1438void HELPER(pka)(CPUS390XState *env, uint64_t dest, uint64_t src,
1439                 uint32_t srclen)
1440{
1441    do_pkau(env, dest, src, srclen, 1, GETPC());
1442}
1443
1444void HELPER(pku)(CPUS390XState *env, uint64_t dest, uint64_t src,
1445                 uint32_t srclen)
1446{
1447    do_pkau(env, dest, src, srclen, 2, GETPC());
1448}
1449
1450void HELPER(unpk)(CPUS390XState *env, uint32_t len, uint64_t dest,
1451                  uint64_t src)
1452{
1453    uintptr_t ra = GETPC();
1454    int len_dest = len >> 4;
1455    int len_src = len & 0xf;
1456    uint8_t b;
1457    int second_nibble = 0;
1458
1459    dest += len_dest;
1460    src += len_src;
1461
1462    /* last byte is special, it only flips the nibbles */
1463    b = cpu_ldub_data_ra(env, src, ra);
1464    cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1465    src--;
1466    len_src--;
1467
1468    /* now pad every nibble with 0xf0 */
1469
1470    while (len_dest > 0) {
1471        uint8_t cur_byte = 0;
1472
1473        if (len_src > 0) {
1474            cur_byte = cpu_ldub_data_ra(env, src, ra);
1475        }
1476
1477        len_dest--;
1478        dest--;
1479
1480        /* only advance one nibble at a time */
1481        if (second_nibble) {
1482            cur_byte >>= 4;
1483            len_src--;
1484            src--;
1485        }
1486        second_nibble = !second_nibble;
1487
1488        /* digit */
1489        cur_byte = (cur_byte & 0xf);
1490        /* zone bits */
1491        cur_byte |= 0xf0;
1492
1493        cpu_stb_data_ra(env, dest, cur_byte, ra);
1494    }
1495}
1496
1497static inline uint32_t do_unpkau(CPUS390XState *env, uint64_t dest,
1498                                 uint32_t destlen, int dsize, uint64_t src,
1499                                 uintptr_t ra)
1500{
1501    int i;
1502    uint32_t cc;
1503    uint8_t b;
1504    /* The source operand is always 16 bytes long.  */
1505    const int srclen = 16;
1506
1507    /* The operands are processed from right to left.  */
1508    src += srclen - 1;
1509    dest += destlen - dsize;
1510
1511    /* Check for the sign.  */
1512    b = cpu_ldub_data_ra(env, src, ra);
1513    src--;
1514    switch (b & 0xf) {
1515    case 0xa:
1516    case 0xc:
1517    case 0xe ... 0xf:
1518        cc = 0;  /* plus */
1519        break;
1520    case 0xb:
1521    case 0xd:
1522        cc = 1;  /* minus */
1523        break;
1524    default:
1525    case 0x0 ... 0x9:
1526        cc = 3;  /* invalid */
1527        break;
1528    }
1529
1530    /* Now pad every nibble with 0x30, advancing one nibble at a time. */
1531    for (i = 0; i < destlen; i += dsize) {
1532        if (i == (31 * dsize)) {
1533            /* If length is 32/64 bytes, the leftmost byte is 0. */
1534            b = 0;
1535        } else if (i % (2 * dsize)) {
1536            b = cpu_ldub_data_ra(env, src, ra);
1537            src--;
1538        } else {
1539            b >>= 4;
1540        }
1541        cpu_stsize_data_ra(env, dest, 0x30 + (b & 0xf), dsize, ra);
1542        dest -= dsize;
1543    }
1544
1545    return cc;
1546}
1547
1548uint32_t HELPER(unpka)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1549                       uint64_t src)
1550{
1551    return do_unpkau(env, dest, destlen, 1, src, GETPC());
1552}
1553
1554uint32_t HELPER(unpku)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1555                       uint64_t src)
1556{
1557    return do_unpkau(env, dest, destlen, 2, src, GETPC());
1558}
1559
1560uint32_t HELPER(tp)(CPUS390XState *env, uint64_t dest, uint32_t destlen)
1561{
1562    uintptr_t ra = GETPC();
1563    uint32_t cc = 0;
1564    int i;
1565
1566    for (i = 0; i < destlen; i++) {
1567        uint8_t b = cpu_ldub_data_ra(env, dest + i, ra);
1568        /* digit */
1569        cc |= (b & 0xf0) > 0x90 ? 2 : 0;
1570
1571        if (i == (destlen - 1)) {
1572            /* sign */
1573            cc |= (b & 0xf) < 0xa ? 1 : 0;
1574        } else {
1575            /* digit */
1576            cc |= (b & 0xf) > 0x9 ? 2 : 0;
1577        }
1578    }
1579
1580    return cc;
1581}
1582
1583static uint32_t do_helper_tr(CPUS390XState *env, uint32_t len, uint64_t array,
1584                             uint64_t trans, uintptr_t ra)
1585{
1586    uint32_t i;
1587
1588    for (i = 0; i <= len; i++) {
1589        uint8_t byte = cpu_ldub_data_ra(env, array + i, ra);
1590        uint8_t new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1591        cpu_stb_data_ra(env, array + i, new_byte, ra);
1592    }
1593
1594    return env->cc_op;
1595}
1596
1597void HELPER(tr)(CPUS390XState *env, uint32_t len, uint64_t array,
1598                uint64_t trans)
1599{
1600    do_helper_tr(env, len, array, trans, GETPC());
1601}
1602
1603Int128 HELPER(tre)(CPUS390XState *env, uint64_t array,
1604                   uint64_t len, uint64_t trans)
1605{
1606    uintptr_t ra = GETPC();
1607    uint8_t end = env->regs[0] & 0xff;
1608    uint64_t l = len;
1609    uint64_t i;
1610    uint32_t cc = 0;
1611
1612    if (!(env->psw.mask & PSW_MASK_64)) {
1613        array &= 0x7fffffff;
1614        l = (uint32_t)l;
1615    }
1616
1617    /* Lest we fail to service interrupts in a timely manner, limit the
1618       amount of work we're willing to do.  For now, let's cap at 8k.  */
1619    if (l > 0x2000) {
1620        l = 0x2000;
1621        cc = 3;
1622    }
1623
1624    for (i = 0; i < l; i++) {
1625        uint8_t byte, new_byte;
1626
1627        byte = cpu_ldub_data_ra(env, array + i, ra);
1628
1629        if (byte == end) {
1630            cc = 1;
1631            break;
1632        }
1633
1634        new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1635        cpu_stb_data_ra(env, array + i, new_byte, ra);
1636    }
1637
1638    env->cc_op = cc;
1639    return int128_make128(len - i, array + i);
1640}
1641
1642static inline uint32_t do_helper_trt(CPUS390XState *env, int len,
1643                                     uint64_t array, uint64_t trans,
1644                                     int inc, uintptr_t ra)
1645{
1646    int i;
1647
1648    for (i = 0; i <= len; i++) {
1649        uint8_t byte = cpu_ldub_data_ra(env, array + i * inc, ra);
1650        uint8_t sbyte = cpu_ldub_data_ra(env, trans + byte, ra);
1651
1652        if (sbyte != 0) {
1653            set_address(env, 1, array + i * inc);
1654            env->regs[2] = deposit64(env->regs[2], 0, 8, sbyte);
1655            return (i == len) ? 2 : 1;
1656        }
1657    }
1658
1659    return 0;
1660}
1661
1662static uint32_t do_helper_trt_fwd(CPUS390XState *env, uint32_t len,
1663                                  uint64_t array, uint64_t trans,
1664                                  uintptr_t ra)
1665{
1666    return do_helper_trt(env, len, array, trans, 1, ra);
1667}
1668
1669uint32_t HELPER(trt)(CPUS390XState *env, uint32_t len, uint64_t array,
1670                     uint64_t trans)
1671{
1672    return do_helper_trt(env, len, array, trans, 1, GETPC());
1673}
1674
1675static uint32_t do_helper_trt_bkwd(CPUS390XState *env, uint32_t len,
1676                                   uint64_t array, uint64_t trans,
1677                                   uintptr_t ra)
1678{
1679    return do_helper_trt(env, len, array, trans, -1, ra);
1680}
1681
1682uint32_t HELPER(trtr)(CPUS390XState *env, uint32_t len, uint64_t array,
1683                      uint64_t trans)
1684{
1685    return do_helper_trt(env, len, array, trans, -1, GETPC());
1686}
1687
1688/* Translate one/two to one/two */
1689uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
1690                      uint32_t tst, uint32_t sizes)
1691{
1692    uintptr_t ra = GETPC();
1693    int dsize = (sizes & 1) ? 1 : 2;
1694    int ssize = (sizes & 2) ? 1 : 2;
1695    uint64_t tbl = get_address(env, 1);
1696    uint64_t dst = get_address(env, r1);
1697    uint64_t len = get_length(env, r1 + 1);
1698    uint64_t src = get_address(env, r2);
1699    uint32_t cc = 3;
1700    int i;
1701
1702    /* The lower address bits of TBL are ignored.  For TROO, TROT, it's
1703       the low 3 bits (double-word aligned).  For TRTO, TRTT, it's either
1704       the low 12 bits (4K, without ETF2-ENH) or 3 bits (with ETF2-ENH).  */
1705    if (ssize == 2 && !s390_has_feat(S390_FEAT_ETF2_ENH)) {
1706        tbl &= -4096;
1707    } else {
1708        tbl &= -8;
1709    }
1710
1711    check_alignment(env, len, ssize, ra);
1712
1713    /* Lest we fail to service interrupts in a timely manner, */
1714    /* limit the amount of work we're willing to do.   */
1715    for (i = 0; i < 0x2000; i++) {
1716        uint16_t sval = cpu_ldusize_data_ra(env, src, ssize, ra);
1717        uint64_t tble = tbl + (sval * dsize);
1718        uint16_t dval = cpu_ldusize_data_ra(env, tble, dsize, ra);
1719        if (dval == tst) {
1720            cc = 1;
1721            break;
1722        }
1723        cpu_stsize_data_ra(env, dst, dval, dsize, ra);
1724
1725        len -= ssize;
1726        src += ssize;
1727        dst += dsize;
1728
1729        if (len == 0) {
1730            cc = 0;
1731            break;
1732        }
1733    }
1734
1735    set_address(env, r1, dst);
1736    set_length(env, r1 + 1, len);
1737    set_address(env, r2, src);
1738
1739    return cc;
1740}
1741
1742static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
1743                        uint64_t a2, bool parallel)
1744{
1745    uint32_t mem_idx = cpu_mmu_index(env, false);
1746    MemOpIdx oi16 = make_memop_idx(MO_TE | MO_128, mem_idx);
1747    MemOpIdx oi8 = make_memop_idx(MO_TE | MO_64, mem_idx);
1748    MemOpIdx oi4 = make_memop_idx(MO_TE | MO_32, mem_idx);
1749    MemOpIdx oi2 = make_memop_idx(MO_TE | MO_16, mem_idx);
1750    MemOpIdx oi1 = make_memop_idx(MO_8, mem_idx);
1751    uintptr_t ra = GETPC();
1752    uint32_t fc = extract32(env->regs[0], 0, 8);
1753    uint32_t sc = extract32(env->regs[0], 8, 8);
1754    uint64_t pl = get_address(env, 1) & -16;
1755    uint64_t svh, svl;
1756    uint32_t cc;
1757
1758    /* Sanity check the function code and storage characteristic.  */
1759    if (fc > 1 || sc > 3) {
1760        if (!s390_has_feat(S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2)) {
1761            goto spec_exception;
1762        }
1763        if (fc > 2 || sc > 4 || (fc == 2 && (r3 & 1))) {
1764            goto spec_exception;
1765        }
1766    }
1767
1768    /* Sanity check the alignments.  */
1769    if (extract32(a1, 0, fc + 2) || extract32(a2, 0, sc)) {
1770        goto spec_exception;
1771    }
1772
1773    /* Sanity check writability of the store address.  */
1774    probe_write(env, a2, 1 << sc, mem_idx, ra);
1775
1776    /*
1777     * Note that the compare-and-swap is atomic, and the store is atomic,
1778     * but the complete operation is not.  Therefore we do not need to
1779     * assert serial context in order to implement this.  That said,
1780     * restart early if we can't support either operation that is supposed
1781     * to be atomic.
1782     */
1783    if (parallel) {
1784        uint32_t max = 2;
1785#ifdef CONFIG_ATOMIC64
1786        max = 3;
1787#endif
1788        if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) ||
1789            (HAVE_ATOMIC128_RW ? 0 : sc > max)) {
1790            cpu_loop_exit_atomic(env_cpu(env), ra);
1791        }
1792    }
1793
1794    /*
1795     * All loads happen before all stores.  For simplicity, load the entire
1796     * store value area from the parameter list.
1797     */
1798    svh = cpu_ldq_mmu(env, pl + 16, oi8, ra);
1799    svl = cpu_ldq_mmu(env, pl + 24, oi8, ra);
1800
1801    switch (fc) {
1802    case 0:
1803        {
1804            uint32_t nv = cpu_ldl_mmu(env, pl, oi4, ra);
1805            uint32_t cv = env->regs[r3];
1806            uint32_t ov;
1807
1808            if (parallel) {
1809                ov = cpu_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi4, ra);
1810            } else {
1811                ov = cpu_ldl_mmu(env, a1, oi4, ra);
1812                cpu_stl_mmu(env, a1, (ov == cv ? nv : ov), oi4, ra);
1813            }
1814            cc = (ov != cv);
1815            env->regs[r3] = deposit64(env->regs[r3], 32, 32, ov);
1816        }
1817        break;
1818
1819    case 1:
1820        {
1821            uint64_t nv = cpu_ldq_mmu(env, pl, oi8, ra);
1822            uint64_t cv = env->regs[r3];
1823            uint64_t ov;
1824
1825            if (parallel) {
1826#ifdef CONFIG_ATOMIC64
1827                ov = cpu_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi8, ra);
1828#else
1829                /* Note that we asserted !parallel above.  */
1830                g_assert_not_reached();
1831#endif
1832            } else {
1833                ov = cpu_ldq_mmu(env, a1, oi8, ra);
1834                cpu_stq_mmu(env, a1, (ov == cv ? nv : ov), oi8, ra);
1835            }
1836            cc = (ov != cv);
1837            env->regs[r3] = ov;
1838        }
1839        break;
1840
1841    case 2:
1842        {
1843            Int128 nv = cpu_ld16_mmu(env, pl, oi16, ra);
1844            Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1845            Int128 ov;
1846
1847            if (!parallel) {
1848                ov = cpu_ld16_mmu(env, a1, oi16, ra);
1849                cc = !int128_eq(ov, cv);
1850                if (cc) {
1851                    nv = ov;
1852                }
1853                cpu_st16_mmu(env, a1, nv, oi16, ra);
1854            } else if (HAVE_CMPXCHG128) {
1855                ov = cpu_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi16, ra);
1856                cc = !int128_eq(ov, cv);
1857            } else {
1858                /* Note that we asserted !parallel above.  */
1859                g_assert_not_reached();
1860            }
1861
1862            env->regs[r3 + 0] = int128_gethi(ov);
1863            env->regs[r3 + 1] = int128_getlo(ov);
1864        }
1865        break;
1866
1867    default:
1868        g_assert_not_reached();
1869    }
1870
1871    /* Store only if the comparison succeeded.  Note that above we use a pair
1872       of 64-bit big-endian loads, so for sc < 3 we must extract the value
1873       from the most-significant bits of svh.  */
1874    if (cc == 0) {
1875        switch (sc) {
1876        case 0:
1877            cpu_stb_mmu(env, a2, svh >> 56, oi1, ra);
1878            break;
1879        case 1:
1880            cpu_stw_mmu(env, a2, svh >> 48, oi2, ra);
1881            break;
1882        case 2:
1883            cpu_stl_mmu(env, a2, svh >> 32, oi4, ra);
1884            break;
1885        case 3:
1886            cpu_stq_mmu(env, a2, svh, oi8, ra);
1887            break;
1888        case 4:
1889            cpu_st16_mmu(env, a2, int128_make128(svl, svh), oi16, ra);
1890            break;
1891        default:
1892            g_assert_not_reached();
1893        }
1894    }
1895
1896    return cc;
1897
1898 spec_exception:
1899    tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1900}
1901
1902uint32_t HELPER(csst)(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t a2)
1903{
1904    return do_csst(env, r3, a1, a2, false);
1905}
1906
1907uint32_t HELPER(csst_parallel)(CPUS390XState *env, uint32_t r3, uint64_t a1,
1908                               uint64_t a2)
1909{
1910    return do_csst(env, r3, a1, a2, true);
1911}
1912
1913#if !defined(CONFIG_USER_ONLY)
1914void HELPER(lctlg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1915{
1916    uintptr_t ra = GETPC();
1917    bool PERchanged = false;
1918    uint64_t src = a2;
1919    uint32_t i;
1920
1921    if (src & 0x7) {
1922        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1923    }
1924
1925    for (i = r1;; i = (i + 1) % 16) {
1926        uint64_t val = cpu_ldq_data_ra(env, src, ra);
1927        if (env->cregs[i] != val && i >= 9 && i <= 11) {
1928            PERchanged = true;
1929        }
1930        env->cregs[i] = val;
1931        HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%" PRIx64 "\n",
1932                   i, src, val);
1933        src += sizeof(uint64_t);
1934
1935        if (i == r3) {
1936            break;
1937        }
1938    }
1939
1940    if (PERchanged && env->psw.mask & PSW_MASK_PER) {
1941        s390_cpu_recompute_watchpoints(env_cpu(env));
1942    }
1943
1944    tlb_flush(env_cpu(env));
1945}
1946
1947void HELPER(lctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1948{
1949    uintptr_t ra = GETPC();
1950    bool PERchanged = false;
1951    uint64_t src = a2;
1952    uint32_t i;
1953
1954    if (src & 0x3) {
1955        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1956    }
1957
1958    for (i = r1;; i = (i + 1) % 16) {
1959        uint32_t val = cpu_ldl_data_ra(env, src, ra);
1960        if ((uint32_t)env->cregs[i] != val && i >= 9 && i <= 11) {
1961            PERchanged = true;
1962        }
1963        env->cregs[i] = deposit64(env->cregs[i], 0, 32, val);
1964        HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%x\n", i, src, val);
1965        src += sizeof(uint32_t);
1966
1967        if (i == r3) {
1968            break;
1969        }
1970    }
1971
1972    if (PERchanged && env->psw.mask & PSW_MASK_PER) {
1973        s390_cpu_recompute_watchpoints(env_cpu(env));
1974    }
1975
1976    tlb_flush(env_cpu(env));
1977}
1978
1979void HELPER(stctg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1980{
1981    uintptr_t ra = GETPC();
1982    uint64_t dest = a2;
1983    uint32_t i;
1984
1985    if (dest & 0x7) {
1986        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1987    }
1988
1989    for (i = r1;; i = (i + 1) % 16) {
1990        cpu_stq_data_ra(env, dest, env->cregs[i], ra);
1991        dest += sizeof(uint64_t);
1992
1993        if (i == r3) {
1994            break;
1995        }
1996    }
1997}
1998
1999void HELPER(stctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2000{
2001    uintptr_t ra = GETPC();
2002    uint64_t dest = a2;
2003    uint32_t i;
2004
2005    if (dest & 0x3) {
2006        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2007    }
2008
2009    for (i = r1;; i = (i + 1) % 16) {
2010        cpu_stl_data_ra(env, dest, env->cregs[i], ra);
2011        dest += sizeof(uint32_t);
2012
2013        if (i == r3) {
2014            break;
2015        }
2016    }
2017}
2018
2019uint32_t HELPER(testblock)(CPUS390XState *env, uint64_t real_addr)
2020{
2021    uintptr_t ra = GETPC();
2022    int i;
2023
2024    real_addr = wrap_address(env, real_addr) & TARGET_PAGE_MASK;
2025
2026    for (i = 0; i < TARGET_PAGE_SIZE; i += 8) {
2027        cpu_stq_mmuidx_ra(env, real_addr + i, 0, MMU_REAL_IDX, ra);
2028    }
2029
2030    return 0;
2031}
2032
2033uint32_t HELPER(tprot)(CPUS390XState *env, uint64_t a1, uint64_t a2)
2034{
2035    S390CPU *cpu = env_archcpu(env);
2036    CPUState *cs = env_cpu(env);
2037
2038    /*
2039     * TODO: we currently don't handle all access protection types
2040     * (including access-list and key-controlled) as well as AR mode.
2041     */
2042    if (!s390_cpu_virt_mem_check_write(cpu, a1, 0, 1)) {
2043        /* Fetching permitted; storing permitted */
2044        return 0;
2045    }
2046
2047    if (env->int_pgm_code == PGM_PROTECTION) {
2048        /* retry if reading is possible */
2049        cs->exception_index = -1;
2050        if (!s390_cpu_virt_mem_check_read(cpu, a1, 0, 1)) {
2051            /* Fetching permitted; storing not permitted */
2052            return 1;
2053        }
2054    }
2055
2056    switch (env->int_pgm_code) {
2057    case PGM_PROTECTION:
2058        /* Fetching not permitted; storing not permitted */
2059        cs->exception_index = -1;
2060        return 2;
2061    case PGM_ADDRESSING:
2062    case PGM_TRANS_SPEC:
2063        /* exceptions forwarded to the guest */
2064        s390_cpu_virt_mem_handle_exc(cpu, GETPC());
2065        return 0;
2066    }
2067
2068    /* Translation not available */
2069    cs->exception_index = -1;
2070    return 3;
2071}
2072
2073/* insert storage key extended */
2074uint64_t HELPER(iske)(CPUS390XState *env, uint64_t r2)
2075{
2076    static S390SKeysState *ss;
2077    static S390SKeysClass *skeyclass;
2078    uint64_t addr = wrap_address(env, r2);
2079    uint8_t key;
2080    int rc;
2081
2082    addr = mmu_real2abs(env, addr);
2083    if (!mmu_absolute_addr_valid(addr, false)) {
2084        tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2085    }
2086
2087    if (unlikely(!ss)) {
2088        ss = s390_get_skeys_device();
2089        skeyclass = S390_SKEYS_GET_CLASS(ss);
2090        if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2091            tlb_flush_all_cpus_synced(env_cpu(env));
2092        }
2093    }
2094
2095    rc = skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2096    if (rc) {
2097        trace_get_skeys_nonzero(rc);
2098        return 0;
2099    }
2100    return key;
2101}
2102
2103/* set storage key extended */
2104void HELPER(sske)(CPUS390XState *env, uint64_t r1, uint64_t r2)
2105{
2106    static S390SKeysState *ss;
2107    static S390SKeysClass *skeyclass;
2108    uint64_t addr = wrap_address(env, r2);
2109    uint8_t key;
2110    int rc;
2111
2112    addr = mmu_real2abs(env, addr);
2113    if (!mmu_absolute_addr_valid(addr, false)) {
2114        tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2115    }
2116
2117    if (unlikely(!ss)) {
2118        ss = s390_get_skeys_device();
2119        skeyclass = S390_SKEYS_GET_CLASS(ss);
2120        if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2121            tlb_flush_all_cpus_synced(env_cpu(env));
2122        }
2123    }
2124
2125    key = r1 & 0xfe;
2126    rc = skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2127    if (rc) {
2128        trace_set_skeys_nonzero(rc);
2129    }
2130   /*
2131    * As we can only flush by virtual address and not all the entries
2132    * that point to a physical address we have to flush the whole TLB.
2133    */
2134    tlb_flush_all_cpus_synced(env_cpu(env));
2135}
2136
2137/* reset reference bit extended */
2138uint32_t HELPER(rrbe)(CPUS390XState *env, uint64_t r2)
2139{
2140    uint64_t addr = wrap_address(env, r2);
2141    static S390SKeysState *ss;
2142    static S390SKeysClass *skeyclass;
2143    uint8_t re, key;
2144    int rc;
2145
2146    addr = mmu_real2abs(env, addr);
2147    if (!mmu_absolute_addr_valid(addr, false)) {
2148        tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2149    }
2150
2151    if (unlikely(!ss)) {
2152        ss = s390_get_skeys_device();
2153        skeyclass = S390_SKEYS_GET_CLASS(ss);
2154        if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2155            tlb_flush_all_cpus_synced(env_cpu(env));
2156        }
2157    }
2158
2159    rc = skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2160    if (rc) {
2161        trace_get_skeys_nonzero(rc);
2162        return 0;
2163    }
2164
2165    re = key & (SK_R | SK_C);
2166    key &= ~SK_R;
2167
2168    rc = skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2169    if (rc) {
2170        trace_set_skeys_nonzero(rc);
2171        return 0;
2172    }
2173   /*
2174    * As we can only flush by virtual address and not all the entries
2175    * that point to a physical address we have to flush the whole TLB.
2176    */
2177    tlb_flush_all_cpus_synced(env_cpu(env));
2178
2179    /*
2180     * cc
2181     *
2182     * 0  Reference bit zero; change bit zero
2183     * 1  Reference bit zero; change bit one
2184     * 2  Reference bit one; change bit zero
2185     * 3  Reference bit one; change bit one
2186     */
2187
2188    return re >> 1;
2189}
2190
2191uint32_t HELPER(mvcs)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2,
2192                      uint64_t key)
2193{
2194    const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2195    S390Access srca, desta;
2196    uintptr_t ra = GETPC();
2197    int cc = 0;
2198
2199    HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2200               __func__, l, a1, a2);
2201
2202    if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2203        psw_as == AS_HOME || psw_as == AS_ACCREG) {
2204        s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2205    }
2206
2207    if (!psw_key_valid(env, (key >> 4) & 0xf)) {
2208        s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2209    }
2210
2211    l = wrap_length32(env, l);
2212    if (l > 256) {
2213        /* max 256 */
2214        l = 256;
2215        cc = 3;
2216    } else if (!l) {
2217        return cc;
2218    }
2219
2220    access_prepare(&srca, env, a2, l, MMU_DATA_LOAD, MMU_PRIMARY_IDX, ra);
2221    access_prepare(&desta, env, a1, l, MMU_DATA_STORE, MMU_SECONDARY_IDX, ra);
2222    access_memmove(env, &desta, &srca, ra);
2223    return cc;
2224}
2225
2226uint32_t HELPER(mvcp)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2,
2227                      uint64_t key)
2228{
2229    const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2230    S390Access srca, desta;
2231    uintptr_t ra = GETPC();
2232    int cc = 0;
2233
2234    HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2235               __func__, l, a1, a2);
2236
2237    if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2238        psw_as == AS_HOME || psw_as == AS_ACCREG) {
2239        s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2240    }
2241
2242    if (!psw_key_valid(env, (key >> 4) & 0xf)) {
2243        s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2244    }
2245
2246    l = wrap_length32(env, l);
2247    if (l > 256) {
2248        /* max 256 */
2249        l = 256;
2250        cc = 3;
2251    } else if (!l) {
2252        return cc;
2253    }
2254    access_prepare(&srca, env, a2, l, MMU_DATA_LOAD, MMU_SECONDARY_IDX, ra);
2255    access_prepare(&desta, env, a1, l, MMU_DATA_STORE, MMU_PRIMARY_IDX, ra);
2256    access_memmove(env, &desta, &srca, ra);
2257    return cc;
2258}
2259
2260void HELPER(idte)(CPUS390XState *env, uint64_t r1, uint64_t r2, uint32_t m4)
2261{
2262    CPUState *cs = env_cpu(env);
2263    const uintptr_t ra = GETPC();
2264    uint64_t table, entry, raddr;
2265    uint16_t entries, i, index = 0;
2266
2267    if (r2 & 0xff000) {
2268        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2269    }
2270
2271    if (!(r2 & 0x800)) {
2272        /* invalidation-and-clearing operation */
2273        table = r1 & ASCE_ORIGIN;
2274        entries = (r2 & 0x7ff) + 1;
2275
2276        switch (r1 & ASCE_TYPE_MASK) {
2277        case ASCE_TYPE_REGION1:
2278            index = (r2 >> 53) & 0x7ff;
2279            break;
2280        case ASCE_TYPE_REGION2:
2281            index = (r2 >> 42) & 0x7ff;
2282            break;
2283        case ASCE_TYPE_REGION3:
2284            index = (r2 >> 31) & 0x7ff;
2285            break;
2286        case ASCE_TYPE_SEGMENT:
2287            index = (r2 >> 20) & 0x7ff;
2288            break;
2289        }
2290        for (i = 0; i < entries; i++) {
2291            /* addresses are not wrapped in 24/31bit mode but table index is */
2292            raddr = table + ((index + i) & 0x7ff) * sizeof(entry);
2293            entry = cpu_ldq_mmuidx_ra(env, raddr, MMU_REAL_IDX, ra);
2294            if (!(entry & REGION_ENTRY_I)) {
2295                /* we are allowed to not store if already invalid */
2296                entry |= REGION_ENTRY_I;
2297                cpu_stq_mmuidx_ra(env, raddr, entry, MMU_REAL_IDX, ra);
2298            }
2299        }
2300    }
2301
2302    /* We simply flush the complete tlb, therefore we can ignore r3. */
2303    if (m4 & 1) {
2304        tlb_flush(cs);
2305    } else {
2306        tlb_flush_all_cpus_synced(cs);
2307    }
2308}
2309
2310/* invalidate pte */
2311void HELPER(ipte)(CPUS390XState *env, uint64_t pto, uint64_t vaddr,
2312                  uint32_t m4)
2313{
2314    CPUState *cs = env_cpu(env);
2315    const uintptr_t ra = GETPC();
2316    uint64_t page = vaddr & TARGET_PAGE_MASK;
2317    uint64_t pte_addr, pte;
2318
2319    /* Compute the page table entry address */
2320    pte_addr = (pto & SEGMENT_ENTRY_ORIGIN);
2321    pte_addr += VADDR_PAGE_TX(vaddr) * 8;
2322
2323    /* Mark the page table entry as invalid */
2324    pte = cpu_ldq_mmuidx_ra(env, pte_addr, MMU_REAL_IDX, ra);
2325    pte |= PAGE_ENTRY_I;
2326    cpu_stq_mmuidx_ra(env, pte_addr, pte, MMU_REAL_IDX, ra);
2327
2328    /* XXX we exploit the fact that Linux passes the exact virtual
2329       address here - it's not obliged to! */
2330    if (m4 & 1) {
2331        if (vaddr & ~VADDR_PAGE_TX_MASK) {
2332            tlb_flush_page(cs, page);
2333            /* XXX 31-bit hack */
2334            tlb_flush_page(cs, page ^ 0x80000000);
2335        } else {
2336            /* looks like we don't have a valid virtual address */
2337            tlb_flush(cs);
2338        }
2339    } else {
2340        if (vaddr & ~VADDR_PAGE_TX_MASK) {
2341            tlb_flush_page_all_cpus_synced(cs, page);
2342            /* XXX 31-bit hack */
2343            tlb_flush_page_all_cpus_synced(cs, page ^ 0x80000000);
2344        } else {
2345            /* looks like we don't have a valid virtual address */
2346            tlb_flush_all_cpus_synced(cs);
2347        }
2348    }
2349}
2350
2351/* flush local tlb */
2352void HELPER(ptlb)(CPUS390XState *env)
2353{
2354    tlb_flush(env_cpu(env));
2355}
2356
2357/* flush global tlb */
2358void HELPER(purge)(CPUS390XState *env)
2359{
2360    tlb_flush_all_cpus_synced(env_cpu(env));
2361}
2362
2363/* load real address */
2364uint64_t HELPER(lra)(CPUS390XState *env, uint64_t r1, uint64_t addr)
2365{
2366    uint64_t asc = env->psw.mask & PSW_MASK_ASC;
2367    uint64_t ret, tec;
2368    int flags, exc, cc;
2369
2370    /* XXX incomplete - has more corner cases */
2371    if (!(env->psw.mask & PSW_MASK_64) && (addr >> 32)) {
2372        tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, GETPC());
2373    }
2374
2375    exc = mmu_translate(env, addr, MMU_S390_LRA, asc, &ret, &flags, &tec);
2376    if (exc) {
2377        cc = 3;
2378        ret = (r1 & 0xFFFFFFFF00000000ULL) | exc | 0x80000000;
2379    } else {
2380        cc = 0;
2381        ret |= addr & ~TARGET_PAGE_MASK;
2382    }
2383
2384    env->cc_op = cc;
2385    return ret;
2386}
2387#endif
2388
2389/* Execute instruction.  This instruction executes an insn modified with
2390   the contents of r1.  It does not change the executed instruction in memory;
2391   it does not change the program counter.
2392
2393   Perform this by recording the modified instruction in env->ex_value.
2394   This will be noticed by cpu_get_tb_cpu_state and thus tb translation.
2395*/
2396void HELPER(ex)(CPUS390XState *env, uint32_t ilen, uint64_t r1, uint64_t addr)
2397{
2398    uint64_t insn;
2399    uint8_t opc;
2400
2401    /* EXECUTE targets must be at even addresses.  */
2402    if (addr & 1) {
2403        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
2404    }
2405
2406    insn = cpu_lduw_code(env, addr);
2407    opc = insn >> 8;
2408
2409    /* Or in the contents of R1[56:63].  */
2410    insn |= r1 & 0xff;
2411
2412    /* Load the rest of the instruction.  */
2413    insn <<= 48;
2414    switch (get_ilen(opc)) {
2415    case 2:
2416        break;
2417    case 4:
2418        insn |= (uint64_t)cpu_lduw_code(env, addr + 2) << 32;
2419        break;
2420    case 6:
2421        insn |= (uint64_t)(uint32_t)cpu_ldl_code(env, addr + 2) << 16;
2422        break;
2423    default:
2424        g_assert_not_reached();
2425    }
2426
2427    /* The very most common cases can be sped up by avoiding a new TB.  */
2428    if ((opc & 0xf0) == 0xd0) {
2429        typedef uint32_t (*dx_helper)(CPUS390XState *, uint32_t, uint64_t,
2430                                      uint64_t, uintptr_t);
2431        static const dx_helper dx[16] = {
2432            [0x0] = do_helper_trt_bkwd,
2433            [0x2] = do_helper_mvc,
2434            [0x4] = do_helper_nc,
2435            [0x5] = do_helper_clc,
2436            [0x6] = do_helper_oc,
2437            [0x7] = do_helper_xc,
2438            [0xc] = do_helper_tr,
2439            [0xd] = do_helper_trt_fwd,
2440        };
2441        dx_helper helper = dx[opc & 0xf];
2442
2443        if (helper) {
2444            uint32_t l = extract64(insn, 48, 8);
2445            uint32_t b1 = extract64(insn, 44, 4);
2446            uint32_t d1 = extract64(insn, 32, 12);
2447            uint32_t b2 = extract64(insn, 28, 4);
2448            uint32_t d2 = extract64(insn, 16, 12);
2449            uint64_t a1 = wrap_address(env, (b1 ? env->regs[b1] : 0) + d1);
2450            uint64_t a2 = wrap_address(env, (b2 ? env->regs[b2] : 0) + d2);
2451
2452            env->cc_op = helper(env, l, a1, a2, 0);
2453            env->psw.addr += ilen;
2454            return;
2455        }
2456    } else if (opc == 0x0a) {
2457        env->int_svc_code = extract64(insn, 48, 8);
2458        env->int_svc_ilen = ilen;
2459        helper_exception(env, EXCP_SVC);
2460        g_assert_not_reached();
2461    }
2462
2463    /* Record the insn we want to execute as well as the ilen to use
2464       during the execution of the target insn.  This will also ensure
2465       that ex_value is non-zero, which flags that we are in a state
2466       that requires such execution.  */
2467    env->ex_value = insn | ilen;
2468    env->ex_target = addr;
2469}
2470
2471uint32_t HELPER(mvcos)(CPUS390XState *env, uint64_t dest, uint64_t src,
2472                       uint64_t len)
2473{
2474    const uint8_t psw_key = (env->psw.mask & PSW_MASK_KEY) >> PSW_SHIFT_KEY;
2475    const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2476    const uint64_t r0 = env->regs[0];
2477    const uintptr_t ra = GETPC();
2478    uint8_t dest_key, dest_as, dest_k, dest_a;
2479    uint8_t src_key, src_as, src_k, src_a;
2480    uint64_t val;
2481    int cc = 0;
2482
2483    HELPER_LOG("%s dest %" PRIx64 ", src %" PRIx64 ", len %" PRIx64 "\n",
2484               __func__, dest, src, len);
2485
2486    if (!(env->psw.mask & PSW_MASK_DAT)) {
2487        tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2488    }
2489
2490    /* OAC (operand access control) for the first operand -> dest */
2491    val = (r0 & 0xffff0000ULL) >> 16;
2492    dest_key = (val >> 12) & 0xf;
2493    dest_as = (val >> 6) & 0x3;
2494    dest_k = (val >> 1) & 0x1;
2495    dest_a = val & 0x1;
2496
2497    /* OAC (operand access control) for the second operand -> src */
2498    val = (r0 & 0x0000ffffULL);
2499    src_key = (val >> 12) & 0xf;
2500    src_as = (val >> 6) & 0x3;
2501    src_k = (val >> 1) & 0x1;
2502    src_a = val & 0x1;
2503
2504    if (!dest_k) {
2505        dest_key = psw_key;
2506    }
2507    if (!src_k) {
2508        src_key = psw_key;
2509    }
2510    if (!dest_a) {
2511        dest_as = psw_as;
2512    }
2513    if (!src_a) {
2514        src_as = psw_as;
2515    }
2516
2517    if (dest_a && dest_as == AS_HOME && (env->psw.mask & PSW_MASK_PSTATE)) {
2518        tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2519    }
2520    if (!(env->cregs[0] & CR0_SECONDARY) &&
2521        (dest_as == AS_SECONDARY || src_as == AS_SECONDARY)) {
2522        tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2523    }
2524    if (!psw_key_valid(env, dest_key) || !psw_key_valid(env, src_key)) {
2525        tcg_s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2526    }
2527
2528    len = wrap_length32(env, len);
2529    if (len > 4096) {
2530        cc = 3;
2531        len = 4096;
2532    }
2533
2534    /* FIXME: AR-mode and proper problem state mode (using PSW keys) missing */
2535    if (src_as == AS_ACCREG || dest_as == AS_ACCREG ||
2536        (env->psw.mask & PSW_MASK_PSTATE)) {
2537        qemu_log_mask(LOG_UNIMP, "%s: AR-mode and PSTATE support missing\n",
2538                      __func__);
2539        tcg_s390_program_interrupt(env, PGM_ADDRESSING, ra);
2540    }
2541
2542    /* FIXME: Access using correct keys and AR-mode */
2543    if (len) {
2544        S390Access srca, desta;
2545
2546        access_prepare(&srca, env, src, len, MMU_DATA_LOAD,
2547                       mmu_idx_from_as(src_as), ra);
2548        access_prepare(&desta, env, dest, len, MMU_DATA_STORE,
2549                       mmu_idx_from_as(dest_as), ra);
2550
2551        access_memmove(env, &desta, &srca, ra);
2552    }
2553
2554    return cc;
2555}
2556
2557/* Decode a Unicode character.  A return value < 0 indicates success, storing
2558   the UTF-32 result into OCHAR and the input length into OLEN.  A return
2559   value >= 0 indicates failure, and the CC value to be returned.  */
2560typedef int (*decode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2561                                 uint64_t ilen, bool enh_check, uintptr_t ra,
2562                                 uint32_t *ochar, uint32_t *olen);
2563
2564/* Encode a Unicode character.  A return value < 0 indicates success, storing
2565   the bytes into ADDR and the output length into OLEN.  A return value >= 0
2566   indicates failure, and the CC value to be returned.  */
2567typedef int (*encode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2568                                 uint64_t ilen, uintptr_t ra, uint32_t c,
2569                                 uint32_t *olen);
2570
2571static int decode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2572                       bool enh_check, uintptr_t ra,
2573                       uint32_t *ochar, uint32_t *olen)
2574{
2575    uint8_t s0, s1, s2, s3;
2576    uint32_t c, l;
2577
2578    if (ilen < 1) {
2579        return 0;
2580    }
2581    s0 = cpu_ldub_data_ra(env, addr, ra);
2582    if (s0 <= 0x7f) {
2583        /* one byte character */
2584        l = 1;
2585        c = s0;
2586    } else if (s0 <= (enh_check ? 0xc1 : 0xbf)) {
2587        /* invalid character */
2588        return 2;
2589    } else if (s0 <= 0xdf) {
2590        /* two byte character */
2591        l = 2;
2592        if (ilen < 2) {
2593            return 0;
2594        }
2595        s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2596        c = s0 & 0x1f;
2597        c = (c << 6) | (s1 & 0x3f);
2598        if (enh_check && (s1 & 0xc0) != 0x80) {
2599            return 2;
2600        }
2601    } else if (s0 <= 0xef) {
2602        /* three byte character */
2603        l = 3;
2604        if (ilen < 3) {
2605            return 0;
2606        }
2607        s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2608        s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2609        c = s0 & 0x0f;
2610        c = (c << 6) | (s1 & 0x3f);
2611        c = (c << 6) | (s2 & 0x3f);
2612        /* Fold the byte-by-byte range descriptions in the PoO into
2613           tests against the complete value.  It disallows encodings
2614           that could be smaller, and the UTF-16 surrogates.  */
2615        if (enh_check
2616            && ((s1 & 0xc0) != 0x80
2617                || (s2 & 0xc0) != 0x80
2618                || c < 0x1000
2619                || (c >= 0xd800 && c <= 0xdfff))) {
2620            return 2;
2621        }
2622    } else if (s0 <= (enh_check ? 0xf4 : 0xf7)) {
2623        /* four byte character */
2624        l = 4;
2625        if (ilen < 4) {
2626            return 0;
2627        }
2628        s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2629        s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2630        s3 = cpu_ldub_data_ra(env, addr + 3, ra);
2631        c = s0 & 0x07;
2632        c = (c << 6) | (s1 & 0x3f);
2633        c = (c << 6) | (s2 & 0x3f);
2634        c = (c << 6) | (s3 & 0x3f);
2635        /* See above.  */
2636        if (enh_check
2637            && ((s1 & 0xc0) != 0x80
2638                || (s2 & 0xc0) != 0x80
2639                || (s3 & 0xc0) != 0x80
2640                || c < 0x010000
2641                || c > 0x10ffff)) {
2642            return 2;
2643        }
2644    } else {
2645        /* invalid character */
2646        return 2;
2647    }
2648
2649    *ochar = c;
2650    *olen = l;
2651    return -1;
2652}
2653
2654static int decode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2655                        bool enh_check, uintptr_t ra,
2656                        uint32_t *ochar, uint32_t *olen)
2657{
2658    uint16_t s0, s1;
2659    uint32_t c, l;
2660
2661    if (ilen < 2) {
2662        return 0;
2663    }
2664    s0 = cpu_lduw_data_ra(env, addr, ra);
2665    if ((s0 & 0xfc00) != 0xd800) {
2666        /* one word character */
2667        l = 2;
2668        c = s0;
2669    } else {
2670        /* two word character */
2671        l = 4;
2672        if (ilen < 4) {
2673            return 0;
2674        }
2675        s1 = cpu_lduw_data_ra(env, addr + 2, ra);
2676        c = extract32(s0, 6, 4) + 1;
2677        c = (c << 6) | (s0 & 0x3f);
2678        c = (c << 10) | (s1 & 0x3ff);
2679        if (enh_check && (s1 & 0xfc00) != 0xdc00) {
2680            /* invalid surrogate character */
2681            return 2;
2682        }
2683    }
2684
2685    *ochar = c;
2686    *olen = l;
2687    return -1;
2688}
2689
2690static int decode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2691                        bool enh_check, uintptr_t ra,
2692                        uint32_t *ochar, uint32_t *olen)
2693{
2694    uint32_t c;
2695
2696    if (ilen < 4) {
2697        return 0;
2698    }
2699    c = cpu_ldl_data_ra(env, addr, ra);
2700    if ((c >= 0xd800 && c <= 0xdbff) || c > 0x10ffff) {
2701        /* invalid unicode character */
2702        return 2;
2703    }
2704
2705    *ochar = c;
2706    *olen = 4;
2707    return -1;
2708}
2709
2710static int encode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2711                       uintptr_t ra, uint32_t c, uint32_t *olen)
2712{
2713    uint8_t d[4];
2714    uint32_t l, i;
2715
2716    if (c <= 0x7f) {
2717        /* one byte character */
2718        l = 1;
2719        d[0] = c;
2720    } else if (c <= 0x7ff) {
2721        /* two byte character */
2722        l = 2;
2723        d[1] = 0x80 | extract32(c, 0, 6);
2724        d[0] = 0xc0 | extract32(c, 6, 5);
2725    } else if (c <= 0xffff) {
2726        /* three byte character */
2727        l = 3;
2728        d[2] = 0x80 | extract32(c, 0, 6);
2729        d[1] = 0x80 | extract32(c, 6, 6);
2730        d[0] = 0xe0 | extract32(c, 12, 4);
2731    } else {
2732        /* four byte character */
2733        l = 4;
2734        d[3] = 0x80 | extract32(c, 0, 6);
2735        d[2] = 0x80 | extract32(c, 6, 6);
2736        d[1] = 0x80 | extract32(c, 12, 6);
2737        d[0] = 0xf0 | extract32(c, 18, 3);
2738    }
2739
2740    if (ilen < l) {
2741        return 1;
2742    }
2743    for (i = 0; i < l; ++i) {
2744        cpu_stb_data_ra(env, addr + i, d[i], ra);
2745    }
2746
2747    *olen = l;
2748    return -1;
2749}
2750
2751static int encode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2752                        uintptr_t ra, uint32_t c, uint32_t *olen)
2753{
2754    uint16_t d0, d1;
2755
2756    if (c <= 0xffff) {
2757        /* one word character */
2758        if (ilen < 2) {
2759            return 1;
2760        }
2761        cpu_stw_data_ra(env, addr, c, ra);
2762        *olen = 2;
2763    } else {
2764        /* two word character */
2765        if (ilen < 4) {
2766            return 1;
2767        }
2768        d1 = 0xdc00 | extract32(c, 0, 10);
2769        d0 = 0xd800 | extract32(c, 10, 6);
2770        d0 = deposit32(d0, 6, 4, extract32(c, 16, 5) - 1);
2771        cpu_stw_data_ra(env, addr + 0, d0, ra);
2772        cpu_stw_data_ra(env, addr + 2, d1, ra);
2773        *olen = 4;
2774    }
2775
2776    return -1;
2777}
2778
2779static int encode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2780                        uintptr_t ra, uint32_t c, uint32_t *olen)
2781{
2782    if (ilen < 4) {
2783        return 1;
2784    }
2785    cpu_stl_data_ra(env, addr, c, ra);
2786    *olen = 4;
2787    return -1;
2788}
2789
2790static inline uint32_t convert_unicode(CPUS390XState *env, uint32_t r1,
2791                                       uint32_t r2, uint32_t m3, uintptr_t ra,
2792                                       decode_unicode_fn decode,
2793                                       encode_unicode_fn encode)
2794{
2795    uint64_t dst = get_address(env, r1);
2796    uint64_t dlen = get_length(env, r1 + 1);
2797    uint64_t src = get_address(env, r2);
2798    uint64_t slen = get_length(env, r2 + 1);
2799    bool enh_check = m3 & 1;
2800    int cc, i;
2801
2802    /* Lest we fail to service interrupts in a timely manner, limit the
2803       amount of work we're willing to do.  For now, let's cap at 256.  */
2804    for (i = 0; i < 256; ++i) {
2805        uint32_t c, ilen, olen;
2806
2807        cc = decode(env, src, slen, enh_check, ra, &c, &ilen);
2808        if (unlikely(cc >= 0)) {
2809            break;
2810        }
2811        cc = encode(env, dst, dlen, ra, c, &olen);
2812        if (unlikely(cc >= 0)) {
2813            break;
2814        }
2815
2816        src += ilen;
2817        slen -= ilen;
2818        dst += olen;
2819        dlen -= olen;
2820        cc = 3;
2821    }
2822
2823    set_address(env, r1, dst);
2824    set_length(env, r1 + 1, dlen);
2825    set_address(env, r2, src);
2826    set_length(env, r2 + 1, slen);
2827
2828    return cc;
2829}
2830
2831uint32_t HELPER(cu12)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2832{
2833    return convert_unicode(env, r1, r2, m3, GETPC(),
2834                           decode_utf8, encode_utf16);
2835}
2836
2837uint32_t HELPER(cu14)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2838{
2839    return convert_unicode(env, r1, r2, m3, GETPC(),
2840                           decode_utf8, encode_utf32);
2841}
2842
2843uint32_t HELPER(cu21)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2844{
2845    return convert_unicode(env, r1, r2, m3, GETPC(),
2846                           decode_utf16, encode_utf8);
2847}
2848
2849uint32_t HELPER(cu24)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2850{
2851    return convert_unicode(env, r1, r2, m3, GETPC(),
2852                           decode_utf16, encode_utf32);
2853}
2854
2855uint32_t HELPER(cu41)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2856{
2857    return convert_unicode(env, r1, r2, m3, GETPC(),
2858                           decode_utf32, encode_utf8);
2859}
2860
2861uint32_t HELPER(cu42)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2862{
2863    return convert_unicode(env, r1, r2, m3, GETPC(),
2864                           decode_utf32, encode_utf16);
2865}
2866
2867void probe_write_access(CPUS390XState *env, uint64_t addr, uint64_t len,
2868                        uintptr_t ra)
2869{
2870    /* test the actual access, not just any access to the page due to LAP */
2871    while (len) {
2872        const uint64_t pagelen = -(addr | TARGET_PAGE_MASK);
2873        const uint64_t curlen = MIN(pagelen, len);
2874
2875        probe_write(env, addr, curlen, cpu_mmu_index(env, false), ra);
2876        addr = wrap_address(env, addr + curlen);
2877        len -= curlen;
2878    }
2879}
2880
2881void HELPER(probe_write_access)(CPUS390XState *env, uint64_t addr, uint64_t len)
2882{
2883    probe_write_access(env, addr, len, GETPC());
2884}
2885