qemu/target/s390x/mem_helper.c
<<
>>
Prefs
   1/*
   2 *  S/390 memory access helper routines
   3 *
   4 *  Copyright (c) 2009 Ulrich Hecht
   5 *  Copyright (c) 2009 Alexander Graf
   6 *
   7 * This library is free software; you can redistribute it and/or
   8 * modify it under the terms of the GNU Lesser General Public
   9 * License as published by the Free Software Foundation; either
  10 * version 2.1 of the License, or (at your option) any later version.
  11 *
  12 * This library is distributed in the hope that it will be useful,
  13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 * Lesser General Public License for more details.
  16 *
  17 * You should have received a copy of the GNU Lesser General Public
  18 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  19 */
  20
  21#include "qemu/osdep.h"
  22#include "cpu.h"
  23#include "internal.h"
  24#include "tcg_s390x.h"
  25#include "exec/helper-proto.h"
  26#include "exec/exec-all.h"
  27#include "exec/cpu_ldst.h"
  28#include "qemu/int128.h"
  29#include "qemu/atomic128.h"
  30#include "tcg/tcg.h"
  31
  32#if !defined(CONFIG_USER_ONLY)
  33#include "hw/s390x/storage-keys.h"
  34#include "hw/boards.h"
  35#endif
  36
  37/*****************************************************************************/
  38/* Softmmu support */
  39
  40/* #define DEBUG_HELPER */
  41#ifdef DEBUG_HELPER
  42#define HELPER_LOG(x...) qemu_log(x)
  43#else
  44#define HELPER_LOG(x...)
  45#endif
  46
  47static inline bool psw_key_valid(CPUS390XState *env, uint8_t psw_key)
  48{
  49    uint16_t pkm = env->cregs[3] >> 16;
  50
  51    if (env->psw.mask & PSW_MASK_PSTATE) {
  52        /* PSW key has range 0..15, it is valid if the bit is 1 in the PKM */
  53        return pkm & (0x80 >> psw_key);
  54    }
  55    return true;
  56}
  57
  58static bool is_destructive_overlap(CPUS390XState *env, uint64_t dest,
  59                                   uint64_t src, uint32_t len)
  60{
  61    if (!len || src == dest) {
  62        return false;
  63    }
  64    /* Take care of wrapping at the end of address space. */
  65    if (unlikely(wrap_address(env, src + len - 1) < src)) {
  66        return dest > src || dest <= wrap_address(env, src + len - 1);
  67    }
  68    return dest > src && dest <= src + len - 1;
  69}
  70
  71/* Trigger a SPECIFICATION exception if an address or a length is not
  72   naturally aligned.  */
  73static inline void check_alignment(CPUS390XState *env, uint64_t v,
  74                                   int wordsize, uintptr_t ra)
  75{
  76    if (v % wordsize) {
  77        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
  78    }
  79}
  80
  81/* Load a value from memory according to its size.  */
  82static inline uint64_t cpu_ldusize_data_ra(CPUS390XState *env, uint64_t addr,
  83                                           int wordsize, uintptr_t ra)
  84{
  85    switch (wordsize) {
  86    case 1:
  87        return cpu_ldub_data_ra(env, addr, ra);
  88    case 2:
  89        return cpu_lduw_data_ra(env, addr, ra);
  90    default:
  91        abort();
  92    }
  93}
  94
  95/* Store a to memory according to its size.  */
  96static inline void cpu_stsize_data_ra(CPUS390XState *env, uint64_t addr,
  97                                      uint64_t value, int wordsize,
  98                                      uintptr_t ra)
  99{
 100    switch (wordsize) {
 101    case 1:
 102        cpu_stb_data_ra(env, addr, value, ra);
 103        break;
 104    case 2:
 105        cpu_stw_data_ra(env, addr, value, ra);
 106        break;
 107    default:
 108        abort();
 109    }
 110}
 111
 112/* An access covers at most 4096 bytes and therefore at most two pages. */
 113typedef struct S390Access {
 114    target_ulong vaddr1;
 115    target_ulong vaddr2;
 116    char *haddr1;
 117    char *haddr2;
 118    uint16_t size1;
 119    uint16_t size2;
 120    /*
 121     * If we can't access the host page directly, we'll have to do I/O access
 122     * via ld/st helpers. These are internal details, so we store the
 123     * mmu idx to do the access here instead of passing it around in the
 124     * helpers. Maybe, one day we can get rid of ld/st access - once we can
 125     * handle TLB_NOTDIRTY differently. We don't expect these special accesses
 126     * to trigger exceptions - only if we would have TLB_NOTDIRTY on LAP
 127     * pages, we might trigger a new MMU translation - very unlikely that
 128     * the mapping changes in between and we would trigger a fault.
 129     */
 130    int mmu_idx;
 131} S390Access;
 132
 133/*
 134 * With nonfault=1, return the PGM_ exception that would have been injected
 135 * into the guest; return 0 if no exception was detected.
 136 *
 137 * For !CONFIG_USER_ONLY, the TEC is stored stored to env->tlb_fill_tec.
 138 * For CONFIG_USER_ONLY, the faulting address is stored to env->__excp_addr.
 139 */
 140static int s390_probe_access(CPUArchState *env, target_ulong addr, int size,
 141                             MMUAccessType access_type, int mmu_idx,
 142                             bool nonfault, void **phost, uintptr_t ra)
 143{
 144    int flags;
 145
 146#if defined(CONFIG_USER_ONLY)
 147    flags = page_get_flags(addr);
 148    if (!(flags & (access_type == MMU_DATA_LOAD ?  PAGE_READ : PAGE_WRITE_ORG))) {
 149        env->__excp_addr = addr;
 150        flags = (flags & PAGE_VALID) ? PGM_PROTECTION : PGM_ADDRESSING;
 151        if (nonfault) {
 152            return flags;
 153        }
 154        tcg_s390_program_interrupt(env, flags, ra);
 155    }
 156    *phost = g2h(env_cpu(env), addr);
 157#else
 158    /*
 159     * For !CONFIG_USER_ONLY, we cannot rely on TLB_INVALID_MASK or haddr==NULL
 160     * to detect if there was an exception during tlb_fill().
 161     */
 162    env->tlb_fill_exc = 0;
 163    flags = probe_access_flags(env, addr, access_type, mmu_idx, nonfault, phost,
 164                               ra);
 165    if (env->tlb_fill_exc) {
 166        return env->tlb_fill_exc;
 167    }
 168
 169    if (unlikely(flags & TLB_WATCHPOINT)) {
 170        /* S390 does not presently use transaction attributes. */
 171        cpu_check_watchpoint(env_cpu(env), addr, size,
 172                             MEMTXATTRS_UNSPECIFIED,
 173                             (access_type == MMU_DATA_STORE
 174                              ? BP_MEM_WRITE : BP_MEM_READ), ra);
 175    }
 176#endif
 177    return 0;
 178}
 179
 180static int access_prepare_nf(S390Access *access, CPUS390XState *env,
 181                             bool nonfault, vaddr vaddr1, int size,
 182                             MMUAccessType access_type,
 183                             int mmu_idx, uintptr_t ra)
 184{
 185    void *haddr1, *haddr2 = NULL;
 186    int size1, size2, exc;
 187    vaddr vaddr2 = 0;
 188
 189    assert(size > 0 && size <= 4096);
 190
 191    size1 = MIN(size, -(vaddr1 | TARGET_PAGE_MASK)),
 192    size2 = size - size1;
 193
 194    exc = s390_probe_access(env, vaddr1, size1, access_type, mmu_idx, nonfault,
 195                            &haddr1, ra);
 196    if (exc) {
 197        return exc;
 198    }
 199    if (unlikely(size2)) {
 200        /* The access crosses page boundaries. */
 201        vaddr2 = wrap_address(env, vaddr1 + size1);
 202        exc = s390_probe_access(env, vaddr2, size2, access_type, mmu_idx,
 203                                nonfault, &haddr2, ra);
 204        if (exc) {
 205            return exc;
 206        }
 207    }
 208
 209    *access = (S390Access) {
 210        .vaddr1 = vaddr1,
 211        .vaddr2 = vaddr2,
 212        .haddr1 = haddr1,
 213        .haddr2 = haddr2,
 214        .size1 = size1,
 215        .size2 = size2,
 216        .mmu_idx = mmu_idx
 217    };
 218    return 0;
 219}
 220
 221static S390Access access_prepare(CPUS390XState *env, vaddr vaddr, int size,
 222                                 MMUAccessType access_type, int mmu_idx,
 223                                 uintptr_t ra)
 224{
 225    S390Access ret;
 226    int exc = access_prepare_nf(&ret, env, false, vaddr, size,
 227                                access_type, mmu_idx, ra);
 228    assert(!exc);
 229    return ret;
 230}
 231
 232/* Helper to handle memset on a single page. */
 233static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr,
 234                             uint8_t byte, uint16_t size, int mmu_idx,
 235                             uintptr_t ra)
 236{
 237#ifdef CONFIG_USER_ONLY
 238    g_assert(haddr);
 239    memset(haddr, byte, size);
 240#else
 241    TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
 242    int i;
 243
 244    if (likely(haddr)) {
 245        memset(haddr, byte, size);
 246    } else {
 247        /*
 248         * Do a single access and test if we can then get access to the
 249         * page. This is especially relevant to speed up TLB_NOTDIRTY.
 250         */
 251        g_assert(size > 0);
 252        helper_ret_stb_mmu(env, vaddr, byte, oi, ra);
 253        haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
 254        if (likely(haddr)) {
 255            memset(haddr + 1, byte, size - 1);
 256        } else {
 257            for (i = 1; i < size; i++) {
 258                helper_ret_stb_mmu(env, vaddr + i, byte, oi, ra);
 259            }
 260        }
 261    }
 262#endif
 263}
 264
 265static void access_memset(CPUS390XState *env, S390Access *desta,
 266                          uint8_t byte, uintptr_t ra)
 267{
 268
 269    do_access_memset(env, desta->vaddr1, desta->haddr1, byte, desta->size1,
 270                     desta->mmu_idx, ra);
 271    if (likely(!desta->size2)) {
 272        return;
 273    }
 274    do_access_memset(env, desta->vaddr2, desta->haddr2, byte, desta->size2,
 275                     desta->mmu_idx, ra);
 276}
 277
 278static uint8_t do_access_get_byte(CPUS390XState *env, vaddr vaddr, char **haddr,
 279                                  int offset, int mmu_idx, uintptr_t ra)
 280{
 281#ifdef CONFIG_USER_ONLY
 282    return ldub_p(*haddr + offset);
 283#else
 284    TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
 285    uint8_t byte;
 286
 287    if (likely(*haddr)) {
 288        return ldub_p(*haddr + offset);
 289    }
 290    /*
 291     * Do a single access and test if we can then get access to the
 292     * page. This is especially relevant to speed up TLB_NOTDIRTY.
 293     */
 294    byte = helper_ret_ldub_mmu(env, vaddr + offset, oi, ra);
 295    *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_LOAD, mmu_idx);
 296    return byte;
 297#endif
 298}
 299
 300static uint8_t access_get_byte(CPUS390XState *env, S390Access *access,
 301                               int offset, uintptr_t ra)
 302{
 303    if (offset < access->size1) {
 304        return do_access_get_byte(env, access->vaddr1, &access->haddr1,
 305                                  offset, access->mmu_idx, ra);
 306    }
 307    return do_access_get_byte(env, access->vaddr2, &access->haddr2,
 308                              offset - access->size1, access->mmu_idx, ra);
 309}
 310
 311static void do_access_set_byte(CPUS390XState *env, vaddr vaddr, char **haddr,
 312                               int offset, uint8_t byte, int mmu_idx,
 313                               uintptr_t ra)
 314{
 315#ifdef CONFIG_USER_ONLY
 316    stb_p(*haddr + offset, byte);
 317#else
 318    TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
 319
 320    if (likely(*haddr)) {
 321        stb_p(*haddr + offset, byte);
 322        return;
 323    }
 324    /*
 325     * Do a single access and test if we can then get access to the
 326     * page. This is especially relevant to speed up TLB_NOTDIRTY.
 327     */
 328    helper_ret_stb_mmu(env, vaddr + offset, byte, oi, ra);
 329    *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
 330#endif
 331}
 332
 333static void access_set_byte(CPUS390XState *env, S390Access *access,
 334                            int offset, uint8_t byte, uintptr_t ra)
 335{
 336    if (offset < access->size1) {
 337        do_access_set_byte(env, access->vaddr1, &access->haddr1, offset, byte,
 338                           access->mmu_idx, ra);
 339    } else {
 340        do_access_set_byte(env, access->vaddr2, &access->haddr2,
 341                           offset - access->size1, byte, access->mmu_idx, ra);
 342    }
 343}
 344
 345/*
 346 * Move data with the same semantics as memmove() in case ranges don't overlap
 347 * or src > dest. Undefined behavior on destructive overlaps.
 348 */
 349static void access_memmove(CPUS390XState *env, S390Access *desta,
 350                           S390Access *srca, uintptr_t ra)
 351{
 352    int diff;
 353
 354    g_assert(desta->size1 + desta->size2 == srca->size1 + srca->size2);
 355
 356    /* Fallback to slow access in case we don't have access to all host pages */
 357    if (unlikely(!desta->haddr1 || (desta->size2 && !desta->haddr2) ||
 358                 !srca->haddr1 || (srca->size2 && !srca->haddr2))) {
 359        int i;
 360
 361        for (i = 0; i < desta->size1 + desta->size2; i++) {
 362            uint8_t byte = access_get_byte(env, srca, i, ra);
 363
 364            access_set_byte(env, desta, i, byte, ra);
 365        }
 366        return;
 367    }
 368
 369    if (srca->size1 == desta->size1) {
 370        memmove(desta->haddr1, srca->haddr1, srca->size1);
 371        if (unlikely(srca->size2)) {
 372            memmove(desta->haddr2, srca->haddr2, srca->size2);
 373        }
 374    } else if (srca->size1 < desta->size1) {
 375        diff = desta->size1 - srca->size1;
 376        memmove(desta->haddr1, srca->haddr1, srca->size1);
 377        memmove(desta->haddr1 + srca->size1, srca->haddr2, diff);
 378        if (likely(desta->size2)) {
 379            memmove(desta->haddr2, srca->haddr2 + diff, desta->size2);
 380        }
 381    } else {
 382        diff = srca->size1 - desta->size1;
 383        memmove(desta->haddr1, srca->haddr1, desta->size1);
 384        memmove(desta->haddr2, srca->haddr1 + desta->size1, diff);
 385        if (likely(srca->size2)) {
 386            memmove(desta->haddr2 + diff, srca->haddr2, srca->size2);
 387        }
 388    }
 389}
 390
 391static int mmu_idx_from_as(uint8_t as)
 392{
 393    switch (as) {
 394    case AS_PRIMARY:
 395        return MMU_PRIMARY_IDX;
 396    case AS_SECONDARY:
 397        return MMU_SECONDARY_IDX;
 398    case AS_HOME:
 399        return MMU_HOME_IDX;
 400    default:
 401        /* FIXME AS_ACCREG */
 402        g_assert_not_reached();
 403    }
 404}
 405
 406/* and on array */
 407static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest,
 408                             uint64_t src, uintptr_t ra)
 409{
 410    const int mmu_idx = cpu_mmu_index(env, false);
 411    S390Access srca1, srca2, desta;
 412    uint32_t i;
 413    uint8_t c = 0;
 414
 415    HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
 416               __func__, l, dest, src);
 417
 418    /* NC always processes one more byte than specified - maximum is 256 */
 419    l++;
 420
 421    srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 422    srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
 423    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 424    for (i = 0; i < l; i++) {
 425        const uint8_t x = access_get_byte(env, &srca1, i, ra) &
 426                          access_get_byte(env, &srca2, i, ra);
 427
 428        c |= x;
 429        access_set_byte(env, &desta, i, x, ra);
 430    }
 431    return c != 0;
 432}
 433
 434uint32_t HELPER(nc)(CPUS390XState *env, uint32_t l, uint64_t dest,
 435                    uint64_t src)
 436{
 437    return do_helper_nc(env, l, dest, src, GETPC());
 438}
 439
 440/* xor on array */
 441static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest,
 442                             uint64_t src, uintptr_t ra)
 443{
 444    const int mmu_idx = cpu_mmu_index(env, false);
 445    S390Access srca1, srca2, desta;
 446    uint32_t i;
 447    uint8_t c = 0;
 448
 449    HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
 450               __func__, l, dest, src);
 451
 452    /* XC always processes one more byte than specified - maximum is 256 */
 453    l++;
 454
 455    srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 456    srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
 457    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 458
 459    /* xor with itself is the same as memset(0) */
 460    if (src == dest) {
 461        access_memset(env, &desta, 0, ra);
 462        return 0;
 463    }
 464
 465    for (i = 0; i < l; i++) {
 466        const uint8_t x = access_get_byte(env, &srca1, i, ra) ^
 467                          access_get_byte(env, &srca2, i, ra);
 468
 469        c |= x;
 470        access_set_byte(env, &desta, i, x, ra);
 471    }
 472    return c != 0;
 473}
 474
 475uint32_t HELPER(xc)(CPUS390XState *env, uint32_t l, uint64_t dest,
 476                    uint64_t src)
 477{
 478    return do_helper_xc(env, l, dest, src, GETPC());
 479}
 480
 481/* or on array */
 482static uint32_t do_helper_oc(CPUS390XState *env, uint32_t l, uint64_t dest,
 483                             uint64_t src, uintptr_t ra)
 484{
 485    const int mmu_idx = cpu_mmu_index(env, false);
 486    S390Access srca1, srca2, desta;
 487    uint32_t i;
 488    uint8_t c = 0;
 489
 490    HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
 491               __func__, l, dest, src);
 492
 493    /* OC always processes one more byte than specified - maximum is 256 */
 494    l++;
 495
 496    srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 497    srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
 498    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 499    for (i = 0; i < l; i++) {
 500        const uint8_t x = access_get_byte(env, &srca1, i, ra) |
 501                          access_get_byte(env, &srca2, i, ra);
 502
 503        c |= x;
 504        access_set_byte(env, &desta, i, x, ra);
 505    }
 506    return c != 0;
 507}
 508
 509uint32_t HELPER(oc)(CPUS390XState *env, uint32_t l, uint64_t dest,
 510                    uint64_t src)
 511{
 512    return do_helper_oc(env, l, dest, src, GETPC());
 513}
 514
 515/* memmove */
 516static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
 517                              uint64_t src, uintptr_t ra)
 518{
 519    const int mmu_idx = cpu_mmu_index(env, false);
 520    S390Access srca, desta;
 521    uint32_t i;
 522
 523    HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
 524               __func__, l, dest, src);
 525
 526    /* MVC always copies one more byte than specified - maximum is 256 */
 527    l++;
 528
 529    srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 530    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 531
 532    /*
 533     * "When the operands overlap, the result is obtained as if the operands
 534     * were processed one byte at a time". Only non-destructive overlaps
 535     * behave like memmove().
 536     */
 537    if (dest == src + 1) {
 538        access_memset(env, &desta, access_get_byte(env, &srca, 0, ra), ra);
 539    } else if (!is_destructive_overlap(env, dest, src, l)) {
 540        access_memmove(env, &desta, &srca, ra);
 541    } else {
 542        for (i = 0; i < l; i++) {
 543            uint8_t byte = access_get_byte(env, &srca, i, ra);
 544
 545            access_set_byte(env, &desta, i, byte, ra);
 546        }
 547    }
 548
 549    return env->cc_op;
 550}
 551
 552void HELPER(mvc)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 553{
 554    do_helper_mvc(env, l, dest, src, GETPC());
 555}
 556
 557/* move inverse  */
 558void HELPER(mvcin)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 559{
 560    const int mmu_idx = cpu_mmu_index(env, false);
 561    S390Access srca, desta;
 562    uintptr_t ra = GETPC();
 563    int i;
 564
 565    /* MVCIN always copies one more byte than specified - maximum is 256 */
 566    l++;
 567
 568    src = wrap_address(env, src - l + 1);
 569    srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 570    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 571    for (i = 0; i < l; i++) {
 572        const uint8_t x = access_get_byte(env, &srca, l - i - 1, ra);
 573
 574        access_set_byte(env, &desta, i, x, ra);
 575    }
 576}
 577
 578/* move numerics  */
 579void HELPER(mvn)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 580{
 581    const int mmu_idx = cpu_mmu_index(env, false);
 582    S390Access srca1, srca2, desta;
 583    uintptr_t ra = GETPC();
 584    int i;
 585
 586    /* MVN always copies one more byte than specified - maximum is 256 */
 587    l++;
 588
 589    srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 590    srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
 591    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 592    for (i = 0; i < l; i++) {
 593        const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0x0f) |
 594                          (access_get_byte(env, &srca2, i, ra) & 0xf0);
 595
 596        access_set_byte(env, &desta, i, x, ra);
 597    }
 598}
 599
 600/* move with offset  */
 601void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 602{
 603    const int mmu_idx = cpu_mmu_index(env, false);
 604    /* MVO always processes one more byte than specified - maximum is 16 */
 605    const int len_dest = (l >> 4) + 1;
 606    const int len_src = (l & 0xf) + 1;
 607    uintptr_t ra = GETPC();
 608    uint8_t byte_dest, byte_src;
 609    S390Access srca, desta;
 610    int i, j;
 611
 612    srca = access_prepare(env, src, len_src, MMU_DATA_LOAD, mmu_idx, ra);
 613    desta = access_prepare(env, dest, len_dest, MMU_DATA_STORE, mmu_idx, ra);
 614
 615    /* Handle rightmost byte */
 616    byte_dest = cpu_ldub_data_ra(env, dest + len_dest - 1, ra);
 617    byte_src = access_get_byte(env, &srca, len_src - 1, ra);
 618    byte_dest = (byte_dest & 0x0f) | (byte_src << 4);
 619    access_set_byte(env, &desta, len_dest - 1, byte_dest, ra);
 620
 621    /* Process remaining bytes from right to left */
 622    for (i = len_dest - 2, j = len_src - 2; i >= 0; i--, j--) {
 623        byte_dest = byte_src >> 4;
 624        if (j >= 0) {
 625            byte_src = access_get_byte(env, &srca, j, ra);
 626        } else {
 627            byte_src = 0;
 628        }
 629        byte_dest |= byte_src << 4;
 630        access_set_byte(env, &desta, i, byte_dest, ra);
 631    }
 632}
 633
 634/* move zones  */
 635void HELPER(mvz)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 636{
 637    const int mmu_idx = cpu_mmu_index(env, false);
 638    S390Access srca1, srca2, desta;
 639    uintptr_t ra = GETPC();
 640    int i;
 641
 642    /* MVZ always copies one more byte than specified - maximum is 256 */
 643    l++;
 644
 645    srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 646    srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
 647    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 648    for (i = 0; i < l; i++) {
 649        const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0xf0) |
 650                          (access_get_byte(env, &srca2, i, ra) & 0x0f);
 651
 652        access_set_byte(env, &desta, i, x, ra);
 653    }
 654}
 655
 656/* compare unsigned byte arrays */
 657static uint32_t do_helper_clc(CPUS390XState *env, uint32_t l, uint64_t s1,
 658                              uint64_t s2, uintptr_t ra)
 659{
 660    uint32_t i;
 661    uint32_t cc = 0;
 662
 663    HELPER_LOG("%s l %d s1 %" PRIx64 " s2 %" PRIx64 "\n",
 664               __func__, l, s1, s2);
 665
 666    for (i = 0; i <= l; i++) {
 667        uint8_t x = cpu_ldub_data_ra(env, s1 + i, ra);
 668        uint8_t y = cpu_ldub_data_ra(env, s2 + i, ra);
 669        HELPER_LOG("%02x (%c)/%02x (%c) ", x, x, y, y);
 670        if (x < y) {
 671            cc = 1;
 672            break;
 673        } else if (x > y) {
 674            cc = 2;
 675            break;
 676        }
 677    }
 678
 679    HELPER_LOG("\n");
 680    return cc;
 681}
 682
 683uint32_t HELPER(clc)(CPUS390XState *env, uint32_t l, uint64_t s1, uint64_t s2)
 684{
 685    return do_helper_clc(env, l, s1, s2, GETPC());
 686}
 687
 688/* compare logical under mask */
 689uint32_t HELPER(clm)(CPUS390XState *env, uint32_t r1, uint32_t mask,
 690                     uint64_t addr)
 691{
 692    uintptr_t ra = GETPC();
 693    uint32_t cc = 0;
 694
 695    HELPER_LOG("%s: r1 0x%x mask 0x%x addr 0x%" PRIx64 "\n", __func__, r1,
 696               mask, addr);
 697
 698    while (mask) {
 699        if (mask & 8) {
 700            uint8_t d = cpu_ldub_data_ra(env, addr, ra);
 701            uint8_t r = extract32(r1, 24, 8);
 702            HELPER_LOG("mask 0x%x %02x/%02x (0x%" PRIx64 ") ", mask, r, d,
 703                       addr);
 704            if (r < d) {
 705                cc = 1;
 706                break;
 707            } else if (r > d) {
 708                cc = 2;
 709                break;
 710            }
 711            addr++;
 712        }
 713        mask = (mask << 1) & 0xf;
 714        r1 <<= 8;
 715    }
 716
 717    HELPER_LOG("\n");
 718    return cc;
 719}
 720
 721static inline uint64_t get_address(CPUS390XState *env, int reg)
 722{
 723    return wrap_address(env, env->regs[reg]);
 724}
 725
 726/*
 727 * Store the address to the given register, zeroing out unused leftmost
 728 * bits in bit positions 32-63 (24-bit and 31-bit mode only).
 729 */
 730static inline void set_address_zero(CPUS390XState *env, int reg,
 731                                    uint64_t address)
 732{
 733    if (env->psw.mask & PSW_MASK_64) {
 734        env->regs[reg] = address;
 735    } else {
 736        if (!(env->psw.mask & PSW_MASK_32)) {
 737            address &= 0x00ffffff;
 738        } else {
 739            address &= 0x7fffffff;
 740        }
 741        env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
 742    }
 743}
 744
 745static inline void set_address(CPUS390XState *env, int reg, uint64_t address)
 746{
 747    if (env->psw.mask & PSW_MASK_64) {
 748        /* 64-Bit mode */
 749        env->regs[reg] = address;
 750    } else {
 751        if (!(env->psw.mask & PSW_MASK_32)) {
 752            /* 24-Bit mode. According to the PoO it is implementation
 753            dependent if bits 32-39 remain unchanged or are set to
 754            zeros.  Choose the former so that the function can also be
 755            used for TRT.  */
 756            env->regs[reg] = deposit64(env->regs[reg], 0, 24, address);
 757        } else {
 758            /* 31-Bit mode. According to the PoO it is implementation
 759            dependent if bit 32 remains unchanged or is set to zero.
 760            Choose the latter so that the function can also be used for
 761            TRT.  */
 762            address &= 0x7fffffff;
 763            env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
 764        }
 765    }
 766}
 767
 768static inline uint64_t wrap_length32(CPUS390XState *env, uint64_t length)
 769{
 770    if (!(env->psw.mask & PSW_MASK_64)) {
 771        return (uint32_t)length;
 772    }
 773    return length;
 774}
 775
 776static inline uint64_t wrap_length31(CPUS390XState *env, uint64_t length)
 777{
 778    if (!(env->psw.mask & PSW_MASK_64)) {
 779        /* 24-Bit and 31-Bit mode */
 780        length &= 0x7fffffff;
 781    }
 782    return length;
 783}
 784
 785static inline uint64_t get_length(CPUS390XState *env, int reg)
 786{
 787    return wrap_length31(env, env->regs[reg]);
 788}
 789
 790static inline void set_length(CPUS390XState *env, int reg, uint64_t length)
 791{
 792    if (env->psw.mask & PSW_MASK_64) {
 793        /* 64-Bit mode */
 794        env->regs[reg] = length;
 795    } else {
 796        /* 24-Bit and 31-Bit mode */
 797        env->regs[reg] = deposit64(env->regs[reg], 0, 32, length);
 798    }
 799}
 800
 801/* search string (c is byte to search, r2 is string, r1 end of string) */
 802void HELPER(srst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
 803{
 804    uintptr_t ra = GETPC();
 805    uint64_t end, str;
 806    uint32_t len;
 807    uint8_t v, c = env->regs[0];
 808
 809    /* Bits 32-55 must contain all 0.  */
 810    if (env->regs[0] & 0xffffff00u) {
 811        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
 812    }
 813
 814    str = get_address(env, r2);
 815    end = get_address(env, r1);
 816
 817    /* Lest we fail to service interrupts in a timely manner, limit the
 818       amount of work we're willing to do.  For now, let's cap at 8k.  */
 819    for (len = 0; len < 0x2000; ++len) {
 820        if (str + len == end) {
 821            /* Character not found.  R1 & R2 are unmodified.  */
 822            env->cc_op = 2;
 823            return;
 824        }
 825        v = cpu_ldub_data_ra(env, str + len, ra);
 826        if (v == c) {
 827            /* Character found.  Set R1 to the location; R2 is unmodified.  */
 828            env->cc_op = 1;
 829            set_address(env, r1, str + len);
 830            return;
 831        }
 832    }
 833
 834    /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
 835    env->cc_op = 3;
 836    set_address(env, r2, str + len);
 837}
 838
 839void HELPER(srstu)(CPUS390XState *env, uint32_t r1, uint32_t r2)
 840{
 841    uintptr_t ra = GETPC();
 842    uint32_t len;
 843    uint16_t v, c = env->regs[0];
 844    uint64_t end, str, adj_end;
 845
 846    /* Bits 32-47 of R0 must be zero.  */
 847    if (env->regs[0] & 0xffff0000u) {
 848        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
 849    }
 850
 851    str = get_address(env, r2);
 852    end = get_address(env, r1);
 853
 854    /* If the LSB of the two addresses differ, use one extra byte.  */
 855    adj_end = end + ((str ^ end) & 1);
 856
 857    /* Lest we fail to service interrupts in a timely manner, limit the
 858       amount of work we're willing to do.  For now, let's cap at 8k.  */
 859    for (len = 0; len < 0x2000; len += 2) {
 860        if (str + len == adj_end) {
 861            /* End of input found.  */
 862            env->cc_op = 2;
 863            return;
 864        }
 865        v = cpu_lduw_data_ra(env, str + len, ra);
 866        if (v == c) {
 867            /* Character found.  Set R1 to the location; R2 is unmodified.  */
 868            env->cc_op = 1;
 869            set_address(env, r1, str + len);
 870            return;
 871        }
 872    }
 873
 874    /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
 875    env->cc_op = 3;
 876    set_address(env, r2, str + len);
 877}
 878
 879/* unsigned string compare (c is string terminator) */
 880uint64_t HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2)
 881{
 882    uintptr_t ra = GETPC();
 883    uint32_t len;
 884
 885    c = c & 0xff;
 886    s1 = wrap_address(env, s1);
 887    s2 = wrap_address(env, s2);
 888
 889    /* Lest we fail to service interrupts in a timely manner, limit the
 890       amount of work we're willing to do.  For now, let's cap at 8k.  */
 891    for (len = 0; len < 0x2000; ++len) {
 892        uint8_t v1 = cpu_ldub_data_ra(env, s1 + len, ra);
 893        uint8_t v2 = cpu_ldub_data_ra(env, s2 + len, ra);
 894        if (v1 == v2) {
 895            if (v1 == c) {
 896                /* Equal.  CC=0, and don't advance the registers.  */
 897                env->cc_op = 0;
 898                env->retxl = s2;
 899                return s1;
 900            }
 901        } else {
 902            /* Unequal.  CC={1,2}, and advance the registers.  Note that
 903               the terminator need not be zero, but the string that contains
 904               the terminator is by definition "low".  */
 905            env->cc_op = (v1 == c ? 1 : v2 == c ? 2 : v1 < v2 ? 1 : 2);
 906            env->retxl = s2 + len;
 907            return s1 + len;
 908        }
 909    }
 910
 911    /* CPU-determined bytes equal; advance the registers.  */
 912    env->cc_op = 3;
 913    env->retxl = s2 + len;
 914    return s1 + len;
 915}
 916
 917/* move page */
 918uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint32_t r1, uint32_t r2)
 919{
 920    const uint64_t src = get_address(env, r2) & TARGET_PAGE_MASK;
 921    const uint64_t dst = get_address(env, r1) & TARGET_PAGE_MASK;
 922    const int mmu_idx = cpu_mmu_index(env, false);
 923    const bool f = extract64(r0, 11, 1);
 924    const bool s = extract64(r0, 10, 1);
 925    const bool cco = extract64(r0, 8, 1);
 926    uintptr_t ra = GETPC();
 927    S390Access srca, desta;
 928    int exc;
 929
 930    if ((f && s) || extract64(r0, 12, 4)) {
 931        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
 932    }
 933
 934    /*
 935     * We always manually handle exceptions such that we can properly store
 936     * r1/r2 to the lowcore on page-translation exceptions.
 937     *
 938     * TODO: Access key handling
 939     */
 940    exc = access_prepare_nf(&srca, env, true, src, TARGET_PAGE_SIZE,
 941                            MMU_DATA_LOAD, mmu_idx, ra);
 942    if (exc) {
 943        if (cco) {
 944            return 2;
 945        }
 946        goto inject_exc;
 947    }
 948    exc = access_prepare_nf(&desta, env, true, dst, TARGET_PAGE_SIZE,
 949                            MMU_DATA_STORE, mmu_idx, ra);
 950    if (exc) {
 951        if (cco && exc != PGM_PROTECTION) {
 952            return 1;
 953        }
 954        goto inject_exc;
 955    }
 956    access_memmove(env, &desta, &srca, ra);
 957    return 0; /* data moved */
 958inject_exc:
 959#if !defined(CONFIG_USER_ONLY)
 960    if (exc != PGM_ADDRESSING) {
 961        stq_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, trans_exc_code),
 962                 env->tlb_fill_tec);
 963    }
 964    if (exc == PGM_PAGE_TRANS) {
 965        stb_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, op_access_id),
 966                 r1 << 4 | r2);
 967    }
 968#endif
 969    tcg_s390_program_interrupt(env, exc, ra);
 970}
 971
 972/* string copy */
 973uint32_t HELPER(mvst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
 974{
 975    const int mmu_idx = cpu_mmu_index(env, false);
 976    const uint64_t d = get_address(env, r1);
 977    const uint64_t s = get_address(env, r2);
 978    const uint8_t c = env->regs[0];
 979    const int len = MIN(-(d | TARGET_PAGE_MASK), -(s | TARGET_PAGE_MASK));
 980    S390Access srca, desta;
 981    uintptr_t ra = GETPC();
 982    int i;
 983
 984    if (env->regs[0] & 0xffffff00ull) {
 985        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
 986    }
 987
 988    /*
 989     * Our access should not exceed single pages, as we must not report access
 990     * exceptions exceeding the actually copied range (which we don't know at
 991     * this point). We might over-indicate watchpoints within the pages
 992     * (if we ever care, we have to limit processing to a single byte).
 993     */
 994    srca = access_prepare(env, s, len, MMU_DATA_LOAD, mmu_idx, ra);
 995    desta = access_prepare(env, d, len, MMU_DATA_STORE, mmu_idx, ra);
 996    for (i = 0; i < len; i++) {
 997        const uint8_t v = access_get_byte(env, &srca, i, ra);
 998
 999        access_set_byte(env, &desta, i, v, ra);
1000        if (v == c) {
1001            set_address_zero(env, r1, d + i);
1002            return 1;
1003        }
1004    }
1005    set_address_zero(env, r1, d + len);
1006    set_address_zero(env, r2, s + len);
1007    return 3;
1008}
1009
1010/* load access registers r1 to r3 from memory at a2 */
1011void HELPER(lam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1012{
1013    uintptr_t ra = GETPC();
1014    int i;
1015
1016    if (a2 & 0x3) {
1017        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1018    }
1019
1020    for (i = r1;; i = (i + 1) % 16) {
1021        env->aregs[i] = cpu_ldl_data_ra(env, a2, ra);
1022        a2 += 4;
1023
1024        if (i == r3) {
1025            break;
1026        }
1027    }
1028}
1029
1030/* store access registers r1 to r3 in memory at a2 */
1031void HELPER(stam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1032{
1033    uintptr_t ra = GETPC();
1034    int i;
1035
1036    if (a2 & 0x3) {
1037        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1038    }
1039
1040    for (i = r1;; i = (i + 1) % 16) {
1041        cpu_stl_data_ra(env, a2, env->aregs[i], ra);
1042        a2 += 4;
1043
1044        if (i == r3) {
1045            break;
1046        }
1047    }
1048}
1049
1050/* move long helper */
1051static inline uint32_t do_mvcl(CPUS390XState *env,
1052                               uint64_t *dest, uint64_t *destlen,
1053                               uint64_t *src, uint64_t *srclen,
1054                               uint16_t pad, int wordsize, uintptr_t ra)
1055{
1056    const int mmu_idx = cpu_mmu_index(env, false);
1057    int len = MIN(*destlen, -(*dest | TARGET_PAGE_MASK));
1058    S390Access srca, desta;
1059    int i, cc;
1060
1061    if (*destlen == *srclen) {
1062        cc = 0;
1063    } else if (*destlen < *srclen) {
1064        cc = 1;
1065    } else {
1066        cc = 2;
1067    }
1068
1069    if (!*destlen) {
1070        return cc;
1071    }
1072
1073    /*
1074     * Only perform one type of type of operation (move/pad) at a time.
1075     * Stay within single pages.
1076     */
1077    if (*srclen) {
1078        /* Copy the src array */
1079        len = MIN(MIN(*srclen, -(*src | TARGET_PAGE_MASK)), len);
1080        *destlen -= len;
1081        *srclen -= len;
1082        srca = access_prepare(env, *src, len, MMU_DATA_LOAD, mmu_idx, ra);
1083        desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1084        access_memmove(env, &desta, &srca, ra);
1085        *src = wrap_address(env, *src + len);
1086        *dest = wrap_address(env, *dest + len);
1087    } else if (wordsize == 1) {
1088        /* Pad the remaining area */
1089        *destlen -= len;
1090        desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1091        access_memset(env, &desta, pad, ra);
1092        *dest = wrap_address(env, *dest + len);
1093    } else {
1094        desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1095
1096        /* The remaining length selects the padding byte. */
1097        for (i = 0; i < len; (*destlen)--, i++) {
1098            if (*destlen & 1) {
1099                access_set_byte(env, &desta, i, pad, ra);
1100            } else {
1101                access_set_byte(env, &desta, i, pad >> 8, ra);
1102            }
1103        }
1104        *dest = wrap_address(env, *dest + len);
1105    }
1106
1107    return *destlen ? 3 : cc;
1108}
1109
1110/* move long */
1111uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1112{
1113    const int mmu_idx = cpu_mmu_index(env, false);
1114    uintptr_t ra = GETPC();
1115    uint64_t destlen = env->regs[r1 + 1] & 0xffffff;
1116    uint64_t dest = get_address(env, r1);
1117    uint64_t srclen = env->regs[r2 + 1] & 0xffffff;
1118    uint64_t src = get_address(env, r2);
1119    uint8_t pad = env->regs[r2 + 1] >> 24;
1120    CPUState *cs = env_cpu(env);
1121    S390Access srca, desta;
1122    uint32_t cc, cur_len;
1123
1124    if (is_destructive_overlap(env, dest, src, MIN(srclen, destlen))) {
1125        cc = 3;
1126    } else if (srclen == destlen) {
1127        cc = 0;
1128    } else if (destlen < srclen) {
1129        cc = 1;
1130    } else {
1131        cc = 2;
1132    }
1133
1134    /* We might have to zero-out some bits even if there was no action. */
1135    if (unlikely(!destlen || cc == 3)) {
1136        set_address_zero(env, r2, src);
1137        set_address_zero(env, r1, dest);
1138        return cc;
1139    } else if (!srclen) {
1140        set_address_zero(env, r2, src);
1141    }
1142
1143    /*
1144     * Only perform one type of type of operation (move/pad) in one step.
1145     * Stay within single pages.
1146     */
1147    while (destlen) {
1148        cur_len = MIN(destlen, -(dest | TARGET_PAGE_MASK));
1149        if (!srclen) {
1150            desta = access_prepare(env, dest, cur_len, MMU_DATA_STORE, mmu_idx,
1151                                   ra);
1152            access_memset(env, &desta, pad, ra);
1153        } else {
1154            cur_len = MIN(MIN(srclen, -(src | TARGET_PAGE_MASK)), cur_len);
1155
1156            srca = access_prepare(env, src, cur_len, MMU_DATA_LOAD, mmu_idx,
1157                                  ra);
1158            desta = access_prepare(env, dest, cur_len, MMU_DATA_STORE, mmu_idx,
1159                                   ra);
1160            access_memmove(env, &desta, &srca, ra);
1161            src = wrap_address(env, src + cur_len);
1162            srclen -= cur_len;
1163            env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen);
1164            set_address_zero(env, r2, src);
1165        }
1166        dest = wrap_address(env, dest + cur_len);
1167        destlen -= cur_len;
1168        env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, destlen);
1169        set_address_zero(env, r1, dest);
1170
1171        /*
1172         * MVCL is interruptible. Return to the main loop if requested after
1173         * writing back all state to registers. If no interrupt will get
1174         * injected, we'll end up back in this handler and continue processing
1175         * the remaining parts.
1176         */
1177        if (destlen && unlikely(cpu_loop_exit_requested(cs))) {
1178            cpu_loop_exit_restore(cs, ra);
1179        }
1180    }
1181    return cc;
1182}
1183
1184/* move long extended */
1185uint32_t HELPER(mvcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1186                       uint32_t r3)
1187{
1188    uintptr_t ra = GETPC();
1189    uint64_t destlen = get_length(env, r1 + 1);
1190    uint64_t dest = get_address(env, r1);
1191    uint64_t srclen = get_length(env, r3 + 1);
1192    uint64_t src = get_address(env, r3);
1193    uint8_t pad = a2;
1194    uint32_t cc;
1195
1196    cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 1, ra);
1197
1198    set_length(env, r1 + 1, destlen);
1199    set_length(env, r3 + 1, srclen);
1200    set_address(env, r1, dest);
1201    set_address(env, r3, src);
1202
1203    return cc;
1204}
1205
1206/* move long unicode */
1207uint32_t HELPER(mvclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1208                       uint32_t r3)
1209{
1210    uintptr_t ra = GETPC();
1211    uint64_t destlen = get_length(env, r1 + 1);
1212    uint64_t dest = get_address(env, r1);
1213    uint64_t srclen = get_length(env, r3 + 1);
1214    uint64_t src = get_address(env, r3);
1215    uint16_t pad = a2;
1216    uint32_t cc;
1217
1218    cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 2, ra);
1219
1220    set_length(env, r1 + 1, destlen);
1221    set_length(env, r3 + 1, srclen);
1222    set_address(env, r1, dest);
1223    set_address(env, r3, src);
1224
1225    return cc;
1226}
1227
1228/* compare logical long helper */
1229static inline uint32_t do_clcl(CPUS390XState *env,
1230                               uint64_t *src1, uint64_t *src1len,
1231                               uint64_t *src3, uint64_t *src3len,
1232                               uint16_t pad, uint64_t limit,
1233                               int wordsize, uintptr_t ra)
1234{
1235    uint64_t len = MAX(*src1len, *src3len);
1236    uint32_t cc = 0;
1237
1238    check_alignment(env, *src1len | *src3len, wordsize, ra);
1239
1240    if (!len) {
1241        return cc;
1242    }
1243
1244    /* Lest we fail to service interrupts in a timely manner, limit the
1245       amount of work we're willing to do.  */
1246    if (len > limit) {
1247        len = limit;
1248        cc = 3;
1249    }
1250
1251    for (; len; len -= wordsize) {
1252        uint16_t v1 = pad;
1253        uint16_t v3 = pad;
1254
1255        if (*src1len) {
1256            v1 = cpu_ldusize_data_ra(env, *src1, wordsize, ra);
1257        }
1258        if (*src3len) {
1259            v3 = cpu_ldusize_data_ra(env, *src3, wordsize, ra);
1260        }
1261
1262        if (v1 != v3) {
1263            cc = (v1 < v3) ? 1 : 2;
1264            break;
1265        }
1266
1267        if (*src1len) {
1268            *src1 += wordsize;
1269            *src1len -= wordsize;
1270        }
1271        if (*src3len) {
1272            *src3 += wordsize;
1273            *src3len -= wordsize;
1274        }
1275    }
1276
1277    return cc;
1278}
1279
1280
1281/* compare logical long */
1282uint32_t HELPER(clcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1283{
1284    uintptr_t ra = GETPC();
1285    uint64_t src1len = extract64(env->regs[r1 + 1], 0, 24);
1286    uint64_t src1 = get_address(env, r1);
1287    uint64_t src3len = extract64(env->regs[r2 + 1], 0, 24);
1288    uint64_t src3 = get_address(env, r2);
1289    uint8_t pad = env->regs[r2 + 1] >> 24;
1290    uint32_t cc;
1291
1292    cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, -1, 1, ra);
1293
1294    env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, src1len);
1295    env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, src3len);
1296    set_address(env, r1, src1);
1297    set_address(env, r2, src3);
1298
1299    return cc;
1300}
1301
1302/* compare logical long extended memcompare insn with padding */
1303uint32_t HELPER(clcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1304                       uint32_t r3)
1305{
1306    uintptr_t ra = GETPC();
1307    uint64_t src1len = get_length(env, r1 + 1);
1308    uint64_t src1 = get_address(env, r1);
1309    uint64_t src3len = get_length(env, r3 + 1);
1310    uint64_t src3 = get_address(env, r3);
1311    uint8_t pad = a2;
1312    uint32_t cc;
1313
1314    cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x2000, 1, ra);
1315
1316    set_length(env, r1 + 1, src1len);
1317    set_length(env, r3 + 1, src3len);
1318    set_address(env, r1, src1);
1319    set_address(env, r3, src3);
1320
1321    return cc;
1322}
1323
1324/* compare logical long unicode memcompare insn with padding */
1325uint32_t HELPER(clclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1326                       uint32_t r3)
1327{
1328    uintptr_t ra = GETPC();
1329    uint64_t src1len = get_length(env, r1 + 1);
1330    uint64_t src1 = get_address(env, r1);
1331    uint64_t src3len = get_length(env, r3 + 1);
1332    uint64_t src3 = get_address(env, r3);
1333    uint16_t pad = a2;
1334    uint32_t cc = 0;
1335
1336    cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x1000, 2, ra);
1337
1338    set_length(env, r1 + 1, src1len);
1339    set_length(env, r3 + 1, src3len);
1340    set_address(env, r1, src1);
1341    set_address(env, r3, src3);
1342
1343    return cc;
1344}
1345
1346/* checksum */
1347uint64_t HELPER(cksm)(CPUS390XState *env, uint64_t r1,
1348                      uint64_t src, uint64_t src_len)
1349{
1350    uintptr_t ra = GETPC();
1351    uint64_t max_len, len;
1352    uint64_t cksm = (uint32_t)r1;
1353
1354    /* Lest we fail to service interrupts in a timely manner, limit the
1355       amount of work we're willing to do.  For now, let's cap at 8k.  */
1356    max_len = (src_len > 0x2000 ? 0x2000 : src_len);
1357
1358    /* Process full words as available.  */
1359    for (len = 0; len + 4 <= max_len; len += 4, src += 4) {
1360        cksm += (uint32_t)cpu_ldl_data_ra(env, src, ra);
1361    }
1362
1363    switch (max_len - len) {
1364    case 1:
1365        cksm += cpu_ldub_data_ra(env, src, ra) << 24;
1366        len += 1;
1367        break;
1368    case 2:
1369        cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1370        len += 2;
1371        break;
1372    case 3:
1373        cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1374        cksm += cpu_ldub_data_ra(env, src + 2, ra) << 8;
1375        len += 3;
1376        break;
1377    }
1378
1379    /* Fold the carry from the checksum.  Note that we can see carry-out
1380       during folding more than once (but probably not more than twice).  */
1381    while (cksm > 0xffffffffull) {
1382        cksm = (uint32_t)cksm + (cksm >> 32);
1383    }
1384
1385    /* Indicate whether or not we've processed everything.  */
1386    env->cc_op = (len == src_len ? 0 : 3);
1387
1388    /* Return both cksm and processed length.  */
1389    env->retxl = cksm;
1390    return len;
1391}
1392
1393void HELPER(pack)(CPUS390XState *env, uint32_t len, uint64_t dest, uint64_t src)
1394{
1395    uintptr_t ra = GETPC();
1396    int len_dest = len >> 4;
1397    int len_src = len & 0xf;
1398    uint8_t b;
1399
1400    dest += len_dest;
1401    src += len_src;
1402
1403    /* last byte is special, it only flips the nibbles */
1404    b = cpu_ldub_data_ra(env, src, ra);
1405    cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1406    src--;
1407    len_src--;
1408
1409    /* now pack every value */
1410    while (len_dest > 0) {
1411        b = 0;
1412
1413        if (len_src >= 0) {
1414            b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1415            src--;
1416            len_src--;
1417        }
1418        if (len_src >= 0) {
1419            b |= cpu_ldub_data_ra(env, src, ra) << 4;
1420            src--;
1421            len_src--;
1422        }
1423
1424        len_dest--;
1425        dest--;
1426        cpu_stb_data_ra(env, dest, b, ra);
1427    }
1428}
1429
1430static inline void do_pkau(CPUS390XState *env, uint64_t dest, uint64_t src,
1431                           uint32_t srclen, int ssize, uintptr_t ra)
1432{
1433    int i;
1434    /* The destination operand is always 16 bytes long.  */
1435    const int destlen = 16;
1436
1437    /* The operands are processed from right to left.  */
1438    src += srclen - 1;
1439    dest += destlen - 1;
1440
1441    for (i = 0; i < destlen; i++) {
1442        uint8_t b = 0;
1443
1444        /* Start with a positive sign */
1445        if (i == 0) {
1446            b = 0xc;
1447        } else if (srclen > ssize) {
1448            b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1449            src -= ssize;
1450            srclen -= ssize;
1451        }
1452
1453        if (srclen > ssize) {
1454            b |= cpu_ldub_data_ra(env, src, ra) << 4;
1455            src -= ssize;
1456            srclen -= ssize;
1457        }
1458
1459        cpu_stb_data_ra(env, dest, b, ra);
1460        dest--;
1461    }
1462}
1463
1464
1465void HELPER(pka)(CPUS390XState *env, uint64_t dest, uint64_t src,
1466                 uint32_t srclen)
1467{
1468    do_pkau(env, dest, src, srclen, 1, GETPC());
1469}
1470
1471void HELPER(pku)(CPUS390XState *env, uint64_t dest, uint64_t src,
1472                 uint32_t srclen)
1473{
1474    do_pkau(env, dest, src, srclen, 2, GETPC());
1475}
1476
1477void HELPER(unpk)(CPUS390XState *env, uint32_t len, uint64_t dest,
1478                  uint64_t src)
1479{
1480    uintptr_t ra = GETPC();
1481    int len_dest = len >> 4;
1482    int len_src = len & 0xf;
1483    uint8_t b;
1484    int second_nibble = 0;
1485
1486    dest += len_dest;
1487    src += len_src;
1488
1489    /* last byte is special, it only flips the nibbles */
1490    b = cpu_ldub_data_ra(env, src, ra);
1491    cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1492    src--;
1493    len_src--;
1494
1495    /* now pad every nibble with 0xf0 */
1496
1497    while (len_dest > 0) {
1498        uint8_t cur_byte = 0;
1499
1500        if (len_src > 0) {
1501            cur_byte = cpu_ldub_data_ra(env, src, ra);
1502        }
1503
1504        len_dest--;
1505        dest--;
1506
1507        /* only advance one nibble at a time */
1508        if (second_nibble) {
1509            cur_byte >>= 4;
1510            len_src--;
1511            src--;
1512        }
1513        second_nibble = !second_nibble;
1514
1515        /* digit */
1516        cur_byte = (cur_byte & 0xf);
1517        /* zone bits */
1518        cur_byte |= 0xf0;
1519
1520        cpu_stb_data_ra(env, dest, cur_byte, ra);
1521    }
1522}
1523
1524static inline uint32_t do_unpkau(CPUS390XState *env, uint64_t dest,
1525                                 uint32_t destlen, int dsize, uint64_t src,
1526                                 uintptr_t ra)
1527{
1528    int i;
1529    uint32_t cc;
1530    uint8_t b;
1531    /* The source operand is always 16 bytes long.  */
1532    const int srclen = 16;
1533
1534    /* The operands are processed from right to left.  */
1535    src += srclen - 1;
1536    dest += destlen - dsize;
1537
1538    /* Check for the sign.  */
1539    b = cpu_ldub_data_ra(env, src, ra);
1540    src--;
1541    switch (b & 0xf) {
1542    case 0xa:
1543    case 0xc:
1544    case 0xe ... 0xf:
1545        cc = 0;  /* plus */
1546        break;
1547    case 0xb:
1548    case 0xd:
1549        cc = 1;  /* minus */
1550        break;
1551    default:
1552    case 0x0 ... 0x9:
1553        cc = 3;  /* invalid */
1554        break;
1555    }
1556
1557    /* Now pad every nibble with 0x30, advancing one nibble at a time. */
1558    for (i = 0; i < destlen; i += dsize) {
1559        if (i == (31 * dsize)) {
1560            /* If length is 32/64 bytes, the leftmost byte is 0. */
1561            b = 0;
1562        } else if (i % (2 * dsize)) {
1563            b = cpu_ldub_data_ra(env, src, ra);
1564            src--;
1565        } else {
1566            b >>= 4;
1567        }
1568        cpu_stsize_data_ra(env, dest, 0x30 + (b & 0xf), dsize, ra);
1569        dest -= dsize;
1570    }
1571
1572    return cc;
1573}
1574
1575uint32_t HELPER(unpka)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1576                       uint64_t src)
1577{
1578    return do_unpkau(env, dest, destlen, 1, src, GETPC());
1579}
1580
1581uint32_t HELPER(unpku)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1582                       uint64_t src)
1583{
1584    return do_unpkau(env, dest, destlen, 2, src, GETPC());
1585}
1586
1587uint32_t HELPER(tp)(CPUS390XState *env, uint64_t dest, uint32_t destlen)
1588{
1589    uintptr_t ra = GETPC();
1590    uint32_t cc = 0;
1591    int i;
1592
1593    for (i = 0; i < destlen; i++) {
1594        uint8_t b = cpu_ldub_data_ra(env, dest + i, ra);
1595        /* digit */
1596        cc |= (b & 0xf0) > 0x90 ? 2 : 0;
1597
1598        if (i == (destlen - 1)) {
1599            /* sign */
1600            cc |= (b & 0xf) < 0xa ? 1 : 0;
1601        } else {
1602            /* digit */
1603            cc |= (b & 0xf) > 0x9 ? 2 : 0;
1604        }
1605    }
1606
1607    return cc;
1608}
1609
1610static uint32_t do_helper_tr(CPUS390XState *env, uint32_t len, uint64_t array,
1611                             uint64_t trans, uintptr_t ra)
1612{
1613    uint32_t i;
1614
1615    for (i = 0; i <= len; i++) {
1616        uint8_t byte = cpu_ldub_data_ra(env, array + i, ra);
1617        uint8_t new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1618        cpu_stb_data_ra(env, array + i, new_byte, ra);
1619    }
1620
1621    return env->cc_op;
1622}
1623
1624void HELPER(tr)(CPUS390XState *env, uint32_t len, uint64_t array,
1625                uint64_t trans)
1626{
1627    do_helper_tr(env, len, array, trans, GETPC());
1628}
1629
1630uint64_t HELPER(tre)(CPUS390XState *env, uint64_t array,
1631                     uint64_t len, uint64_t trans)
1632{
1633    uintptr_t ra = GETPC();
1634    uint8_t end = env->regs[0] & 0xff;
1635    uint64_t l = len;
1636    uint64_t i;
1637    uint32_t cc = 0;
1638
1639    if (!(env->psw.mask & PSW_MASK_64)) {
1640        array &= 0x7fffffff;
1641        l = (uint32_t)l;
1642    }
1643
1644    /* Lest we fail to service interrupts in a timely manner, limit the
1645       amount of work we're willing to do.  For now, let's cap at 8k.  */
1646    if (l > 0x2000) {
1647        l = 0x2000;
1648        cc = 3;
1649    }
1650
1651    for (i = 0; i < l; i++) {
1652        uint8_t byte, new_byte;
1653
1654        byte = cpu_ldub_data_ra(env, array + i, ra);
1655
1656        if (byte == end) {
1657            cc = 1;
1658            break;
1659        }
1660
1661        new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1662        cpu_stb_data_ra(env, array + i, new_byte, ra);
1663    }
1664
1665    env->cc_op = cc;
1666    env->retxl = len - i;
1667    return array + i;
1668}
1669
1670static inline uint32_t do_helper_trt(CPUS390XState *env, int len,
1671                                     uint64_t array, uint64_t trans,
1672                                     int inc, uintptr_t ra)
1673{
1674    int i;
1675
1676    for (i = 0; i <= len; i++) {
1677        uint8_t byte = cpu_ldub_data_ra(env, array + i * inc, ra);
1678        uint8_t sbyte = cpu_ldub_data_ra(env, trans + byte, ra);
1679
1680        if (sbyte != 0) {
1681            set_address(env, 1, array + i * inc);
1682            env->regs[2] = deposit64(env->regs[2], 0, 8, sbyte);
1683            return (i == len) ? 2 : 1;
1684        }
1685    }
1686
1687    return 0;
1688}
1689
1690static uint32_t do_helper_trt_fwd(CPUS390XState *env, uint32_t len,
1691                                  uint64_t array, uint64_t trans,
1692                                  uintptr_t ra)
1693{
1694    return do_helper_trt(env, len, array, trans, 1, ra);
1695}
1696
1697uint32_t HELPER(trt)(CPUS390XState *env, uint32_t len, uint64_t array,
1698                     uint64_t trans)
1699{
1700    return do_helper_trt(env, len, array, trans, 1, GETPC());
1701}
1702
1703static uint32_t do_helper_trt_bkwd(CPUS390XState *env, uint32_t len,
1704                                   uint64_t array, uint64_t trans,
1705                                   uintptr_t ra)
1706{
1707    return do_helper_trt(env, len, array, trans, -1, ra);
1708}
1709
1710uint32_t HELPER(trtr)(CPUS390XState *env, uint32_t len, uint64_t array,
1711                      uint64_t trans)
1712{
1713    return do_helper_trt(env, len, array, trans, -1, GETPC());
1714}
1715
1716/* Translate one/two to one/two */
1717uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
1718                      uint32_t tst, uint32_t sizes)
1719{
1720    uintptr_t ra = GETPC();
1721    int dsize = (sizes & 1) ? 1 : 2;
1722    int ssize = (sizes & 2) ? 1 : 2;
1723    uint64_t tbl = get_address(env, 1);
1724    uint64_t dst = get_address(env, r1);
1725    uint64_t len = get_length(env, r1 + 1);
1726    uint64_t src = get_address(env, r2);
1727    uint32_t cc = 3;
1728    int i;
1729
1730    /* The lower address bits of TBL are ignored.  For TROO, TROT, it's
1731       the low 3 bits (double-word aligned).  For TRTO, TRTT, it's either
1732       the low 12 bits (4K, without ETF2-ENH) or 3 bits (with ETF2-ENH).  */
1733    if (ssize == 2 && !s390_has_feat(S390_FEAT_ETF2_ENH)) {
1734        tbl &= -4096;
1735    } else {
1736        tbl &= -8;
1737    }
1738
1739    check_alignment(env, len, ssize, ra);
1740
1741    /* Lest we fail to service interrupts in a timely manner, */
1742    /* limit the amount of work we're willing to do.   */
1743    for (i = 0; i < 0x2000; i++) {
1744        uint16_t sval = cpu_ldusize_data_ra(env, src, ssize, ra);
1745        uint64_t tble = tbl + (sval * dsize);
1746        uint16_t dval = cpu_ldusize_data_ra(env, tble, dsize, ra);
1747        if (dval == tst) {
1748            cc = 1;
1749            break;
1750        }
1751        cpu_stsize_data_ra(env, dst, dval, dsize, ra);
1752
1753        len -= ssize;
1754        src += ssize;
1755        dst += dsize;
1756
1757        if (len == 0) {
1758            cc = 0;
1759            break;
1760        }
1761    }
1762
1763    set_address(env, r1, dst);
1764    set_length(env, r1 + 1, len);
1765    set_address(env, r2, src);
1766
1767    return cc;
1768}
1769
1770void HELPER(cdsg)(CPUS390XState *env, uint64_t addr,
1771                  uint32_t r1, uint32_t r3)
1772{
1773    uintptr_t ra = GETPC();
1774    Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
1775    Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1776    Int128 oldv;
1777    uint64_t oldh, oldl;
1778    bool fail;
1779
1780    check_alignment(env, addr, 16, ra);
1781
1782    oldh = cpu_ldq_data_ra(env, addr + 0, ra);
1783    oldl = cpu_ldq_data_ra(env, addr + 8, ra);
1784
1785    oldv = int128_make128(oldl, oldh);
1786    fail = !int128_eq(oldv, cmpv);
1787    if (fail) {
1788        newv = oldv;
1789    }
1790
1791    cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra);
1792    cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra);
1793
1794    env->cc_op = fail;
1795    env->regs[r1] = int128_gethi(oldv);
1796    env->regs[r1 + 1] = int128_getlo(oldv);
1797}
1798
1799void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr,
1800                           uint32_t r1, uint32_t r3)
1801{
1802    uintptr_t ra = GETPC();
1803    Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
1804    Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1805    int mem_idx;
1806    TCGMemOpIdx oi;
1807    Int128 oldv;
1808    bool fail;
1809
1810    assert(HAVE_CMPXCHG128);
1811
1812    mem_idx = cpu_mmu_index(env, false);
1813    oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
1814    oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
1815    fail = !int128_eq(oldv, cmpv);
1816
1817    env->cc_op = fail;
1818    env->regs[r1] = int128_gethi(oldv);
1819    env->regs[r1 + 1] = int128_getlo(oldv);
1820}
1821
1822static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
1823                        uint64_t a2, bool parallel)
1824{
1825    uint32_t mem_idx = cpu_mmu_index(env, false);
1826    uintptr_t ra = GETPC();
1827    uint32_t fc = extract32(env->regs[0], 0, 8);
1828    uint32_t sc = extract32(env->regs[0], 8, 8);
1829    uint64_t pl = get_address(env, 1) & -16;
1830    uint64_t svh, svl;
1831    uint32_t cc;
1832
1833    /* Sanity check the function code and storage characteristic.  */
1834    if (fc > 1 || sc > 3) {
1835        if (!s390_has_feat(S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2)) {
1836            goto spec_exception;
1837        }
1838        if (fc > 2 || sc > 4 || (fc == 2 && (r3 & 1))) {
1839            goto spec_exception;
1840        }
1841    }
1842
1843    /* Sanity check the alignments.  */
1844    if (extract32(a1, 0, fc + 2) || extract32(a2, 0, sc)) {
1845        goto spec_exception;
1846    }
1847
1848    /* Sanity check writability of the store address.  */
1849    probe_write(env, a2, 1 << sc, mem_idx, ra);
1850
1851    /*
1852     * Note that the compare-and-swap is atomic, and the store is atomic,
1853     * but the complete operation is not.  Therefore we do not need to
1854     * assert serial context in order to implement this.  That said,
1855     * restart early if we can't support either operation that is supposed
1856     * to be atomic.
1857     */
1858    if (parallel) {
1859        uint32_t max = 2;
1860#ifdef CONFIG_ATOMIC64
1861        max = 3;
1862#endif
1863        if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) ||
1864            (HAVE_ATOMIC128  ? 0 : sc > max)) {
1865            cpu_loop_exit_atomic(env_cpu(env), ra);
1866        }
1867    }
1868
1869    /* All loads happen before all stores.  For simplicity, load the entire
1870       store value area from the parameter list.  */
1871    svh = cpu_ldq_data_ra(env, pl + 16, ra);
1872    svl = cpu_ldq_data_ra(env, pl + 24, ra);
1873
1874    switch (fc) {
1875    case 0:
1876        {
1877            uint32_t nv = cpu_ldl_data_ra(env, pl, ra);
1878            uint32_t cv = env->regs[r3];
1879            uint32_t ov;
1880
1881            if (parallel) {
1882#ifdef CONFIG_USER_ONLY
1883                uint32_t *haddr = g2h(env_cpu(env), a1);
1884                ov = qatomic_cmpxchg__nocheck(haddr, cv, nv);
1885#else
1886                TCGMemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mem_idx);
1887                ov = helper_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi, ra);
1888#endif
1889            } else {
1890                ov = cpu_ldl_data_ra(env, a1, ra);
1891                cpu_stl_data_ra(env, a1, (ov == cv ? nv : ov), ra);
1892            }
1893            cc = (ov != cv);
1894            env->regs[r3] = deposit64(env->regs[r3], 32, 32, ov);
1895        }
1896        break;
1897
1898    case 1:
1899        {
1900            uint64_t nv = cpu_ldq_data_ra(env, pl, ra);
1901            uint64_t cv = env->regs[r3];
1902            uint64_t ov;
1903
1904            if (parallel) {
1905#ifdef CONFIG_ATOMIC64
1906# ifdef CONFIG_USER_ONLY
1907                uint64_t *haddr = g2h(env_cpu(env), a1);
1908                ov = qatomic_cmpxchg__nocheck(haddr, cv, nv);
1909# else
1910                TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN, mem_idx);
1911                ov = helper_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi, ra);
1912# endif
1913#else
1914                /* Note that we asserted !parallel above.  */
1915                g_assert_not_reached();
1916#endif
1917            } else {
1918                ov = cpu_ldq_data_ra(env, a1, ra);
1919                cpu_stq_data_ra(env, a1, (ov == cv ? nv : ov), ra);
1920            }
1921            cc = (ov != cv);
1922            env->regs[r3] = ov;
1923        }
1924        break;
1925
1926    case 2:
1927        {
1928            uint64_t nvh = cpu_ldq_data_ra(env, pl, ra);
1929            uint64_t nvl = cpu_ldq_data_ra(env, pl + 8, ra);
1930            Int128 nv = int128_make128(nvl, nvh);
1931            Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1932            Int128 ov;
1933
1934            if (!parallel) {
1935                uint64_t oh = cpu_ldq_data_ra(env, a1 + 0, ra);
1936                uint64_t ol = cpu_ldq_data_ra(env, a1 + 8, ra);
1937
1938                ov = int128_make128(ol, oh);
1939                cc = !int128_eq(ov, cv);
1940                if (cc) {
1941                    nv = ov;
1942                }
1943
1944                cpu_stq_data_ra(env, a1 + 0, int128_gethi(nv), ra);
1945                cpu_stq_data_ra(env, a1 + 8, int128_getlo(nv), ra);
1946            } else if (HAVE_CMPXCHG128) {
1947                TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
1948                ov = helper_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra);
1949                cc = !int128_eq(ov, cv);
1950            } else {
1951                /* Note that we asserted !parallel above.  */
1952                g_assert_not_reached();
1953            }
1954
1955            env->regs[r3 + 0] = int128_gethi(ov);
1956            env->regs[r3 + 1] = int128_getlo(ov);
1957        }
1958        break;
1959
1960    default:
1961        g_assert_not_reached();
1962    }
1963
1964    /* Store only if the comparison succeeded.  Note that above we use a pair
1965       of 64-bit big-endian loads, so for sc < 3 we must extract the value
1966       from the most-significant bits of svh.  */
1967    if (cc == 0) {
1968        switch (sc) {
1969        case 0:
1970            cpu_stb_data_ra(env, a2, svh >> 56, ra);
1971            break;
1972        case 1:
1973            cpu_stw_data_ra(env, a2, svh >> 48, ra);
1974            break;
1975        case 2:
1976            cpu_stl_data_ra(env, a2, svh >> 32, ra);
1977            break;
1978        case 3:
1979            cpu_stq_data_ra(env, a2, svh, ra);
1980            break;
1981        case 4:
1982            if (!parallel) {
1983                cpu_stq_data_ra(env, a2 + 0, svh, ra);
1984                cpu_stq_data_ra(env, a2 + 8, svl, ra);
1985            } else if (HAVE_ATOMIC128) {
1986                TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
1987                Int128 sv = int128_make128(svl, svh);
1988                helper_atomic_sto_be_mmu(env, a2, sv, oi, ra);
1989            } else {
1990                /* Note that we asserted !parallel above.  */
1991                g_assert_not_reached();
1992            }
1993            break;
1994        default:
1995            g_assert_not_reached();
1996        }
1997    }
1998
1999    return cc;
2000
2001 spec_exception:
2002    tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2003}
2004
2005uint32_t HELPER(csst)(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t a2)
2006{
2007    return do_csst(env, r3, a1, a2, false);
2008}
2009
2010uint32_t HELPER(csst_parallel)(CPUS390XState *env, uint32_t r3, uint64_t a1,
2011                               uint64_t a2)
2012{
2013    return do_csst(env, r3, a1, a2, true);
2014}
2015
2016#if !defined(CONFIG_USER_ONLY)
2017void HELPER(lctlg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2018{
2019    uintptr_t ra = GETPC();
2020    bool PERchanged = false;
2021    uint64_t src = a2;
2022    uint32_t i;
2023
2024    if (src & 0x7) {
2025        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2026    }
2027
2028    for (i = r1;; i = (i + 1) % 16) {
2029        uint64_t val = cpu_ldq_data_ra(env, src, ra);
2030        if (env->cregs[i] != val && i >= 9 && i <= 11) {
2031            PERchanged = true;
2032        }
2033        env->cregs[i] = val;
2034        HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%" PRIx64 "\n",
2035                   i, src, val);
2036        src += sizeof(uint64_t);
2037
2038        if (i == r3) {
2039            break;
2040        }
2041    }
2042
2043    if (PERchanged && env->psw.mask & PSW_MASK_PER) {
2044        s390_cpu_recompute_watchpoints(env_cpu(env));
2045    }
2046
2047    tlb_flush(env_cpu(env));
2048}
2049
2050void HELPER(lctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2051{
2052    uintptr_t ra = GETPC();
2053    bool PERchanged = false;
2054    uint64_t src = a2;
2055    uint32_t i;
2056
2057    if (src & 0x3) {
2058        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2059    }
2060
2061    for (i = r1;; i = (i + 1) % 16) {
2062        uint32_t val = cpu_ldl_data_ra(env, src, ra);
2063        if ((uint32_t)env->cregs[i] != val && i >= 9 && i <= 11) {
2064            PERchanged = true;
2065        }
2066        env->cregs[i] = deposit64(env->cregs[i], 0, 32, val);
2067        HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%x\n", i, src, val);
2068        src += sizeof(uint32_t);
2069
2070        if (i == r3) {
2071            break;
2072        }
2073    }
2074
2075    if (PERchanged && env->psw.mask & PSW_MASK_PER) {
2076        s390_cpu_recompute_watchpoints(env_cpu(env));
2077    }
2078
2079    tlb_flush(env_cpu(env));
2080}
2081
2082void HELPER(stctg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2083{
2084    uintptr_t ra = GETPC();
2085    uint64_t dest = a2;
2086    uint32_t i;
2087
2088    if (dest & 0x7) {
2089        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2090    }
2091
2092    for (i = r1;; i = (i + 1) % 16) {
2093        cpu_stq_data_ra(env, dest, env->cregs[i], ra);
2094        dest += sizeof(uint64_t);
2095
2096        if (i == r3) {
2097            break;
2098        }
2099    }
2100}
2101
2102void HELPER(stctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2103{
2104    uintptr_t ra = GETPC();
2105    uint64_t dest = a2;
2106    uint32_t i;
2107
2108    if (dest & 0x3) {
2109        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2110    }
2111
2112    for (i = r1;; i = (i + 1) % 16) {
2113        cpu_stl_data_ra(env, dest, env->cregs[i], ra);
2114        dest += sizeof(uint32_t);
2115
2116        if (i == r3) {
2117            break;
2118        }
2119    }
2120}
2121
2122uint32_t HELPER(testblock)(CPUS390XState *env, uint64_t real_addr)
2123{
2124    uintptr_t ra = GETPC();
2125    int i;
2126
2127    real_addr = wrap_address(env, real_addr) & TARGET_PAGE_MASK;
2128
2129    for (i = 0; i < TARGET_PAGE_SIZE; i += 8) {
2130        cpu_stq_mmuidx_ra(env, real_addr + i, 0, MMU_REAL_IDX, ra);
2131    }
2132
2133    return 0;
2134}
2135
2136uint32_t HELPER(tprot)(CPUS390XState *env, uint64_t a1, uint64_t a2)
2137{
2138    S390CPU *cpu = env_archcpu(env);
2139    CPUState *cs = env_cpu(env);
2140
2141    /*
2142     * TODO: we currently don't handle all access protection types
2143     * (including access-list and key-controlled) as well as AR mode.
2144     */
2145    if (!s390_cpu_virt_mem_check_write(cpu, a1, 0, 1)) {
2146        /* Fetching permitted; storing permitted */
2147        return 0;
2148    }
2149
2150    if (env->int_pgm_code == PGM_PROTECTION) {
2151        /* retry if reading is possible */
2152        cs->exception_index = -1;
2153        if (!s390_cpu_virt_mem_check_read(cpu, a1, 0, 1)) {
2154            /* Fetching permitted; storing not permitted */
2155            return 1;
2156        }
2157    }
2158
2159    switch (env->int_pgm_code) {
2160    case PGM_PROTECTION:
2161        /* Fetching not permitted; storing not permitted */
2162        cs->exception_index = -1;
2163        return 2;
2164    case PGM_ADDRESSING:
2165    case PGM_TRANS_SPEC:
2166        /* exceptions forwarded to the guest */
2167        s390_cpu_virt_mem_handle_exc(cpu, GETPC());
2168        return 0;
2169    }
2170
2171    /* Translation not available */
2172    cs->exception_index = -1;
2173    return 3;
2174}
2175
2176/* insert storage key extended */
2177uint64_t HELPER(iske)(CPUS390XState *env, uint64_t r2)
2178{
2179    MachineState *ms = MACHINE(qdev_get_machine());
2180    static S390SKeysState *ss;
2181    static S390SKeysClass *skeyclass;
2182    uint64_t addr = wrap_address(env, r2);
2183    uint8_t key;
2184
2185    if (addr > ms->ram_size) {
2186        return 0;
2187    }
2188
2189    if (unlikely(!ss)) {
2190        ss = s390_get_skeys_device();
2191        skeyclass = S390_SKEYS_GET_CLASS(ss);
2192    }
2193
2194    if (skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key)) {
2195        return 0;
2196    }
2197    return key;
2198}
2199
2200/* set storage key extended */
2201void HELPER(sske)(CPUS390XState *env, uint64_t r1, uint64_t r2)
2202{
2203    MachineState *ms = MACHINE(qdev_get_machine());
2204    static S390SKeysState *ss;
2205    static S390SKeysClass *skeyclass;
2206    uint64_t addr = wrap_address(env, r2);
2207    uint8_t key;
2208
2209    if (addr > ms->ram_size) {
2210        return;
2211    }
2212
2213    if (unlikely(!ss)) {
2214        ss = s390_get_skeys_device();
2215        skeyclass = S390_SKEYS_GET_CLASS(ss);
2216    }
2217
2218    key = (uint8_t) r1;
2219    skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2220   /*
2221    * As we can only flush by virtual address and not all the entries
2222    * that point to a physical address we have to flush the whole TLB.
2223    */
2224    tlb_flush_all_cpus_synced(env_cpu(env));
2225}
2226
2227/* reset reference bit extended */
2228uint32_t HELPER(rrbe)(CPUS390XState *env, uint64_t r2)
2229{
2230    MachineState *ms = MACHINE(qdev_get_machine());
2231    static S390SKeysState *ss;
2232    static S390SKeysClass *skeyclass;
2233    uint8_t re, key;
2234
2235    if (r2 > ms->ram_size) {
2236        return 0;
2237    }
2238
2239    if (unlikely(!ss)) {
2240        ss = s390_get_skeys_device();
2241        skeyclass = S390_SKEYS_GET_CLASS(ss);
2242    }
2243
2244    if (skeyclass->get_skeys(ss, r2 / TARGET_PAGE_SIZE, 1, &key)) {
2245        return 0;
2246    }
2247
2248    re = key & (SK_R | SK_C);
2249    key &= ~SK_R;
2250
2251    if (skeyclass->set_skeys(ss, r2 / TARGET_PAGE_SIZE, 1, &key)) {
2252        return 0;
2253    }
2254   /*
2255    * As we can only flush by virtual address and not all the entries
2256    * that point to a physical address we have to flush the whole TLB.
2257    */
2258    tlb_flush_all_cpus_synced(env_cpu(env));
2259
2260    /*
2261     * cc
2262     *
2263     * 0  Reference bit zero; change bit zero
2264     * 1  Reference bit zero; change bit one
2265     * 2  Reference bit one; change bit zero
2266     * 3  Reference bit one; change bit one
2267     */
2268
2269    return re >> 1;
2270}
2271
2272uint32_t HELPER(mvcs)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2)
2273{
2274    const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2275    S390Access srca, desta;
2276    uintptr_t ra = GETPC();
2277    int cc = 0;
2278
2279    HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2280               __func__, l, a1, a2);
2281
2282    if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2283        psw_as == AS_HOME || psw_as == AS_ACCREG) {
2284        s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2285    }
2286
2287    l = wrap_length32(env, l);
2288    if (l > 256) {
2289        /* max 256 */
2290        l = 256;
2291        cc = 3;
2292    } else if (!l) {
2293        return cc;
2294    }
2295
2296    /* TODO: Access key handling */
2297    srca = access_prepare(env, a2, l, MMU_DATA_LOAD, MMU_PRIMARY_IDX, ra);
2298    desta = access_prepare(env, a1, l, MMU_DATA_STORE, MMU_SECONDARY_IDX, ra);
2299    access_memmove(env, &desta, &srca, ra);
2300    return cc;
2301}
2302
2303uint32_t HELPER(mvcp)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2)
2304{
2305    const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2306    S390Access srca, desta;
2307    uintptr_t ra = GETPC();
2308    int cc = 0;
2309
2310    HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2311               __func__, l, a1, a2);
2312
2313    if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2314        psw_as == AS_HOME || psw_as == AS_ACCREG) {
2315        s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2316    }
2317
2318    l = wrap_length32(env, l);
2319    if (l > 256) {
2320        /* max 256 */
2321        l = 256;
2322        cc = 3;
2323    } else if (!l) {
2324        return cc;
2325    }
2326
2327    /* TODO: Access key handling */
2328    srca = access_prepare(env, a2, l, MMU_DATA_LOAD, MMU_SECONDARY_IDX, ra);
2329    desta = access_prepare(env, a1, l, MMU_DATA_STORE, MMU_PRIMARY_IDX, ra);
2330    access_memmove(env, &desta, &srca, ra);
2331    return cc;
2332}
2333
2334void HELPER(idte)(CPUS390XState *env, uint64_t r1, uint64_t r2, uint32_t m4)
2335{
2336    CPUState *cs = env_cpu(env);
2337    const uintptr_t ra = GETPC();
2338    uint64_t table, entry, raddr;
2339    uint16_t entries, i, index = 0;
2340
2341    if (r2 & 0xff000) {
2342        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2343    }
2344
2345    if (!(r2 & 0x800)) {
2346        /* invalidation-and-clearing operation */
2347        table = r1 & ASCE_ORIGIN;
2348        entries = (r2 & 0x7ff) + 1;
2349
2350        switch (r1 & ASCE_TYPE_MASK) {
2351        case ASCE_TYPE_REGION1:
2352            index = (r2 >> 53) & 0x7ff;
2353            break;
2354        case ASCE_TYPE_REGION2:
2355            index = (r2 >> 42) & 0x7ff;
2356            break;
2357        case ASCE_TYPE_REGION3:
2358            index = (r2 >> 31) & 0x7ff;
2359            break;
2360        case ASCE_TYPE_SEGMENT:
2361            index = (r2 >> 20) & 0x7ff;
2362            break;
2363        }
2364        for (i = 0; i < entries; i++) {
2365            /* addresses are not wrapped in 24/31bit mode but table index is */
2366            raddr = table + ((index + i) & 0x7ff) * sizeof(entry);
2367            entry = cpu_ldq_mmuidx_ra(env, raddr, MMU_REAL_IDX, ra);
2368            if (!(entry & REGION_ENTRY_I)) {
2369                /* we are allowed to not store if already invalid */
2370                entry |= REGION_ENTRY_I;
2371                cpu_stq_mmuidx_ra(env, raddr, entry, MMU_REAL_IDX, ra);
2372            }
2373        }
2374    }
2375
2376    /* We simply flush the complete tlb, therefore we can ignore r3. */
2377    if (m4 & 1) {
2378        tlb_flush(cs);
2379    } else {
2380        tlb_flush_all_cpus_synced(cs);
2381    }
2382}
2383
2384/* invalidate pte */
2385void HELPER(ipte)(CPUS390XState *env, uint64_t pto, uint64_t vaddr,
2386                  uint32_t m4)
2387{
2388    CPUState *cs = env_cpu(env);
2389    const uintptr_t ra = GETPC();
2390    uint64_t page = vaddr & TARGET_PAGE_MASK;
2391    uint64_t pte_addr, pte;
2392
2393    /* Compute the page table entry address */
2394    pte_addr = (pto & SEGMENT_ENTRY_ORIGIN);
2395    pte_addr += VADDR_PAGE_TX(vaddr) * 8;
2396
2397    /* Mark the page table entry as invalid */
2398    pte = cpu_ldq_mmuidx_ra(env, pte_addr, MMU_REAL_IDX, ra);
2399    pte |= PAGE_ENTRY_I;
2400    cpu_stq_mmuidx_ra(env, pte_addr, pte, MMU_REAL_IDX, ra);
2401
2402    /* XXX we exploit the fact that Linux passes the exact virtual
2403       address here - it's not obliged to! */
2404    if (m4 & 1) {
2405        if (vaddr & ~VADDR_PAGE_TX_MASK) {
2406            tlb_flush_page(cs, page);
2407            /* XXX 31-bit hack */
2408            tlb_flush_page(cs, page ^ 0x80000000);
2409        } else {
2410            /* looks like we don't have a valid virtual address */
2411            tlb_flush(cs);
2412        }
2413    } else {
2414        if (vaddr & ~VADDR_PAGE_TX_MASK) {
2415            tlb_flush_page_all_cpus_synced(cs, page);
2416            /* XXX 31-bit hack */
2417            tlb_flush_page_all_cpus_synced(cs, page ^ 0x80000000);
2418        } else {
2419            /* looks like we don't have a valid virtual address */
2420            tlb_flush_all_cpus_synced(cs);
2421        }
2422    }
2423}
2424
2425/* flush local tlb */
2426void HELPER(ptlb)(CPUS390XState *env)
2427{
2428    tlb_flush(env_cpu(env));
2429}
2430
2431/* flush global tlb */
2432void HELPER(purge)(CPUS390XState *env)
2433{
2434    tlb_flush_all_cpus_synced(env_cpu(env));
2435}
2436
2437/* load real address */
2438uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr)
2439{
2440    uint64_t asc = env->psw.mask & PSW_MASK_ASC;
2441    uint64_t ret, tec;
2442    int flags, exc, cc;
2443
2444    /* XXX incomplete - has more corner cases */
2445    if (!(env->psw.mask & PSW_MASK_64) && (addr >> 32)) {
2446        tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, GETPC());
2447    }
2448
2449    exc = mmu_translate(env, addr, 0, asc, &ret, &flags, &tec);
2450    if (exc) {
2451        cc = 3;
2452        ret = exc | 0x80000000;
2453    } else {
2454        cc = 0;
2455        ret |= addr & ~TARGET_PAGE_MASK;
2456    }
2457
2458    env->cc_op = cc;
2459    return ret;
2460}
2461#endif
2462
2463/* load pair from quadword */
2464uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr)
2465{
2466    uintptr_t ra = GETPC();
2467    uint64_t hi, lo;
2468
2469    check_alignment(env, addr, 16, ra);
2470    hi = cpu_ldq_data_ra(env, addr + 0, ra);
2471    lo = cpu_ldq_data_ra(env, addr + 8, ra);
2472
2473    env->retxl = lo;
2474    return hi;
2475}
2476
2477uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr)
2478{
2479    uintptr_t ra = GETPC();
2480    uint64_t hi, lo;
2481    int mem_idx;
2482    TCGMemOpIdx oi;
2483    Int128 v;
2484
2485    assert(HAVE_ATOMIC128);
2486
2487    mem_idx = cpu_mmu_index(env, false);
2488    oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
2489    v = helper_atomic_ldo_be_mmu(env, addr, oi, ra);
2490    hi = int128_gethi(v);
2491    lo = int128_getlo(v);
2492
2493    env->retxl = lo;
2494    return hi;
2495}
2496
2497/* store pair to quadword */
2498void HELPER(stpq)(CPUS390XState *env, uint64_t addr,
2499                  uint64_t low, uint64_t high)
2500{
2501    uintptr_t ra = GETPC();
2502
2503    check_alignment(env, addr, 16, ra);
2504    cpu_stq_data_ra(env, addr + 0, high, ra);
2505    cpu_stq_data_ra(env, addr + 8, low, ra);
2506}
2507
2508void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr,
2509                           uint64_t low, uint64_t high)
2510{
2511    uintptr_t ra = GETPC();
2512    int mem_idx;
2513    TCGMemOpIdx oi;
2514    Int128 v;
2515
2516    assert(HAVE_ATOMIC128);
2517
2518    mem_idx = cpu_mmu_index(env, false);
2519    oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
2520    v = int128_make128(low, high);
2521    helper_atomic_sto_be_mmu(env, addr, v, oi, ra);
2522}
2523
2524/* Execute instruction.  This instruction executes an insn modified with
2525   the contents of r1.  It does not change the executed instruction in memory;
2526   it does not change the program counter.
2527
2528   Perform this by recording the modified instruction in env->ex_value.
2529   This will be noticed by cpu_get_tb_cpu_state and thus tb translation.
2530*/
2531void HELPER(ex)(CPUS390XState *env, uint32_t ilen, uint64_t r1, uint64_t addr)
2532{
2533    uint64_t insn = cpu_lduw_code(env, addr);
2534    uint8_t opc = insn >> 8;
2535
2536    /* Or in the contents of R1[56:63].  */
2537    insn |= r1 & 0xff;
2538
2539    /* Load the rest of the instruction.  */
2540    insn <<= 48;
2541    switch (get_ilen(opc)) {
2542    case 2:
2543        break;
2544    case 4:
2545        insn |= (uint64_t)cpu_lduw_code(env, addr + 2) << 32;
2546        break;
2547    case 6:
2548        insn |= (uint64_t)(uint32_t)cpu_ldl_code(env, addr + 2) << 16;
2549        break;
2550    default:
2551        g_assert_not_reached();
2552    }
2553
2554    /* The very most common cases can be sped up by avoiding a new TB.  */
2555    if ((opc & 0xf0) == 0xd0) {
2556        typedef uint32_t (*dx_helper)(CPUS390XState *, uint32_t, uint64_t,
2557                                      uint64_t, uintptr_t);
2558        static const dx_helper dx[16] = {
2559            [0x0] = do_helper_trt_bkwd,
2560            [0x2] = do_helper_mvc,
2561            [0x4] = do_helper_nc,
2562            [0x5] = do_helper_clc,
2563            [0x6] = do_helper_oc,
2564            [0x7] = do_helper_xc,
2565            [0xc] = do_helper_tr,
2566            [0xd] = do_helper_trt_fwd,
2567        };
2568        dx_helper helper = dx[opc & 0xf];
2569
2570        if (helper) {
2571            uint32_t l = extract64(insn, 48, 8);
2572            uint32_t b1 = extract64(insn, 44, 4);
2573            uint32_t d1 = extract64(insn, 32, 12);
2574            uint32_t b2 = extract64(insn, 28, 4);
2575            uint32_t d2 = extract64(insn, 16, 12);
2576            uint64_t a1 = wrap_address(env, (b1 ? env->regs[b1] : 0) + d1);
2577            uint64_t a2 = wrap_address(env, (b2 ? env->regs[b2] : 0) + d2);
2578
2579            env->cc_op = helper(env, l, a1, a2, 0);
2580            env->psw.addr += ilen;
2581            return;
2582        }
2583    } else if (opc == 0x0a) {
2584        env->int_svc_code = extract64(insn, 48, 8);
2585        env->int_svc_ilen = ilen;
2586        helper_exception(env, EXCP_SVC);
2587        g_assert_not_reached();
2588    }
2589
2590    /* Record the insn we want to execute as well as the ilen to use
2591       during the execution of the target insn.  This will also ensure
2592       that ex_value is non-zero, which flags that we are in a state
2593       that requires such execution.  */
2594    env->ex_value = insn | ilen;
2595}
2596
2597uint32_t HELPER(mvcos)(CPUS390XState *env, uint64_t dest, uint64_t src,
2598                       uint64_t len)
2599{
2600    const uint8_t psw_key = (env->psw.mask & PSW_MASK_KEY) >> PSW_SHIFT_KEY;
2601    const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2602    const uint64_t r0 = env->regs[0];
2603    const uintptr_t ra = GETPC();
2604    uint8_t dest_key, dest_as, dest_k, dest_a;
2605    uint8_t src_key, src_as, src_k, src_a;
2606    uint64_t val;
2607    int cc = 0;
2608
2609    HELPER_LOG("%s dest %" PRIx64 ", src %" PRIx64 ", len %" PRIx64 "\n",
2610               __func__, dest, src, len);
2611
2612    if (!(env->psw.mask & PSW_MASK_DAT)) {
2613        tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2614    }
2615
2616    /* OAC (operand access control) for the first operand -> dest */
2617    val = (r0 & 0xffff0000ULL) >> 16;
2618    dest_key = (val >> 12) & 0xf;
2619    dest_as = (val >> 6) & 0x3;
2620    dest_k = (val >> 1) & 0x1;
2621    dest_a = val & 0x1;
2622
2623    /* OAC (operand access control) for the second operand -> src */
2624    val = (r0 & 0x0000ffffULL);
2625    src_key = (val >> 12) & 0xf;
2626    src_as = (val >> 6) & 0x3;
2627    src_k = (val >> 1) & 0x1;
2628    src_a = val & 0x1;
2629
2630    if (!dest_k) {
2631        dest_key = psw_key;
2632    }
2633    if (!src_k) {
2634        src_key = psw_key;
2635    }
2636    if (!dest_a) {
2637        dest_as = psw_as;
2638    }
2639    if (!src_a) {
2640        src_as = psw_as;
2641    }
2642
2643    if (dest_a && dest_as == AS_HOME && (env->psw.mask & PSW_MASK_PSTATE)) {
2644        tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2645    }
2646    if (!(env->cregs[0] & CR0_SECONDARY) &&
2647        (dest_as == AS_SECONDARY || src_as == AS_SECONDARY)) {
2648        tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2649    }
2650    if (!psw_key_valid(env, dest_key) || !psw_key_valid(env, src_key)) {
2651        tcg_s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2652    }
2653
2654    len = wrap_length32(env, len);
2655    if (len > 4096) {
2656        cc = 3;
2657        len = 4096;
2658    }
2659
2660    /* FIXME: AR-mode and proper problem state mode (using PSW keys) missing */
2661    if (src_as == AS_ACCREG || dest_as == AS_ACCREG ||
2662        (env->psw.mask & PSW_MASK_PSTATE)) {
2663        qemu_log_mask(LOG_UNIMP, "%s: AR-mode and PSTATE support missing\n",
2664                      __func__);
2665        tcg_s390_program_interrupt(env, PGM_ADDRESSING, ra);
2666    }
2667
2668    /* FIXME: Access using correct keys and AR-mode */
2669    if (len) {
2670        S390Access srca = access_prepare(env, src, len, MMU_DATA_LOAD,
2671                                         mmu_idx_from_as(src_as), ra);
2672        S390Access desta = access_prepare(env, dest, len, MMU_DATA_STORE,
2673                                          mmu_idx_from_as(dest_as), ra);
2674
2675        access_memmove(env, &desta, &srca, ra);
2676    }
2677
2678    return cc;
2679}
2680
2681/* Decode a Unicode character.  A return value < 0 indicates success, storing
2682   the UTF-32 result into OCHAR and the input length into OLEN.  A return
2683   value >= 0 indicates failure, and the CC value to be returned.  */
2684typedef int (*decode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2685                                 uint64_t ilen, bool enh_check, uintptr_t ra,
2686                                 uint32_t *ochar, uint32_t *olen);
2687
2688/* Encode a Unicode character.  A return value < 0 indicates success, storing
2689   the bytes into ADDR and the output length into OLEN.  A return value >= 0
2690   indicates failure, and the CC value to be returned.  */
2691typedef int (*encode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2692                                 uint64_t ilen, uintptr_t ra, uint32_t c,
2693                                 uint32_t *olen);
2694
2695static int decode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2696                       bool enh_check, uintptr_t ra,
2697                       uint32_t *ochar, uint32_t *olen)
2698{
2699    uint8_t s0, s1, s2, s3;
2700    uint32_t c, l;
2701
2702    if (ilen < 1) {
2703        return 0;
2704    }
2705    s0 = cpu_ldub_data_ra(env, addr, ra);
2706    if (s0 <= 0x7f) {
2707        /* one byte character */
2708        l = 1;
2709        c = s0;
2710    } else if (s0 <= (enh_check ? 0xc1 : 0xbf)) {
2711        /* invalid character */
2712        return 2;
2713    } else if (s0 <= 0xdf) {
2714        /* two byte character */
2715        l = 2;
2716        if (ilen < 2) {
2717            return 0;
2718        }
2719        s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2720        c = s0 & 0x1f;
2721        c = (c << 6) | (s1 & 0x3f);
2722        if (enh_check && (s1 & 0xc0) != 0x80) {
2723            return 2;
2724        }
2725    } else if (s0 <= 0xef) {
2726        /* three byte character */
2727        l = 3;
2728        if (ilen < 3) {
2729            return 0;
2730        }
2731        s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2732        s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2733        c = s0 & 0x0f;
2734        c = (c << 6) | (s1 & 0x3f);
2735        c = (c << 6) | (s2 & 0x3f);
2736        /* Fold the byte-by-byte range descriptions in the PoO into
2737           tests against the complete value.  It disallows encodings
2738           that could be smaller, and the UTF-16 surrogates.  */
2739        if (enh_check
2740            && ((s1 & 0xc0) != 0x80
2741                || (s2 & 0xc0) != 0x80
2742                || c < 0x1000
2743                || (c >= 0xd800 && c <= 0xdfff))) {
2744            return 2;
2745        }
2746    } else if (s0 <= (enh_check ? 0xf4 : 0xf7)) {
2747        /* four byte character */
2748        l = 4;
2749        if (ilen < 4) {
2750            return 0;
2751        }
2752        s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2753        s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2754        s3 = cpu_ldub_data_ra(env, addr + 3, ra);
2755        c = s0 & 0x07;
2756        c = (c << 6) | (s1 & 0x3f);
2757        c = (c << 6) | (s2 & 0x3f);
2758        c = (c << 6) | (s3 & 0x3f);
2759        /* See above.  */
2760        if (enh_check
2761            && ((s1 & 0xc0) != 0x80
2762                || (s2 & 0xc0) != 0x80
2763                || (s3 & 0xc0) != 0x80
2764                || c < 0x010000
2765                || c > 0x10ffff)) {
2766            return 2;
2767        }
2768    } else {
2769        /* invalid character */
2770        return 2;
2771    }
2772
2773    *ochar = c;
2774    *olen = l;
2775    return -1;
2776}
2777
2778static int decode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2779                        bool enh_check, uintptr_t ra,
2780                        uint32_t *ochar, uint32_t *olen)
2781{
2782    uint16_t s0, s1;
2783    uint32_t c, l;
2784
2785    if (ilen < 2) {
2786        return 0;
2787    }
2788    s0 = cpu_lduw_data_ra(env, addr, ra);
2789    if ((s0 & 0xfc00) != 0xd800) {
2790        /* one word character */
2791        l = 2;
2792        c = s0;
2793    } else {
2794        /* two word character */
2795        l = 4;
2796        if (ilen < 4) {
2797            return 0;
2798        }
2799        s1 = cpu_lduw_data_ra(env, addr + 2, ra);
2800        c = extract32(s0, 6, 4) + 1;
2801        c = (c << 6) | (s0 & 0x3f);
2802        c = (c << 10) | (s1 & 0x3ff);
2803        if (enh_check && (s1 & 0xfc00) != 0xdc00) {
2804            /* invalid surrogate character */
2805            return 2;
2806        }
2807    }
2808
2809    *ochar = c;
2810    *olen = l;
2811    return -1;
2812}
2813
2814static int decode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2815                        bool enh_check, uintptr_t ra,
2816                        uint32_t *ochar, uint32_t *olen)
2817{
2818    uint32_t c;
2819
2820    if (ilen < 4) {
2821        return 0;
2822    }
2823    c = cpu_ldl_data_ra(env, addr, ra);
2824    if ((c >= 0xd800 && c <= 0xdbff) || c > 0x10ffff) {
2825        /* invalid unicode character */
2826        return 2;
2827    }
2828
2829    *ochar = c;
2830    *olen = 4;
2831    return -1;
2832}
2833
2834static int encode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2835                       uintptr_t ra, uint32_t c, uint32_t *olen)
2836{
2837    uint8_t d[4];
2838    uint32_t l, i;
2839
2840    if (c <= 0x7f) {
2841        /* one byte character */
2842        l = 1;
2843        d[0] = c;
2844    } else if (c <= 0x7ff) {
2845        /* two byte character */
2846        l = 2;
2847        d[1] = 0x80 | extract32(c, 0, 6);
2848        d[0] = 0xc0 | extract32(c, 6, 5);
2849    } else if (c <= 0xffff) {
2850        /* three byte character */
2851        l = 3;
2852        d[2] = 0x80 | extract32(c, 0, 6);
2853        d[1] = 0x80 | extract32(c, 6, 6);
2854        d[0] = 0xe0 | extract32(c, 12, 4);
2855    } else {
2856        /* four byte character */
2857        l = 4;
2858        d[3] = 0x80 | extract32(c, 0, 6);
2859        d[2] = 0x80 | extract32(c, 6, 6);
2860        d[1] = 0x80 | extract32(c, 12, 6);
2861        d[0] = 0xf0 | extract32(c, 18, 3);
2862    }
2863
2864    if (ilen < l) {
2865        return 1;
2866    }
2867    for (i = 0; i < l; ++i) {
2868        cpu_stb_data_ra(env, addr + i, d[i], ra);
2869    }
2870
2871    *olen = l;
2872    return -1;
2873}
2874
2875static int encode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2876                        uintptr_t ra, uint32_t c, uint32_t *olen)
2877{
2878    uint16_t d0, d1;
2879
2880    if (c <= 0xffff) {
2881        /* one word character */
2882        if (ilen < 2) {
2883            return 1;
2884        }
2885        cpu_stw_data_ra(env, addr, c, ra);
2886        *olen = 2;
2887    } else {
2888        /* two word character */
2889        if (ilen < 4) {
2890            return 1;
2891        }
2892        d1 = 0xdc00 | extract32(c, 0, 10);
2893        d0 = 0xd800 | extract32(c, 10, 6);
2894        d0 = deposit32(d0, 6, 4, extract32(c, 16, 5) - 1);
2895        cpu_stw_data_ra(env, addr + 0, d0, ra);
2896        cpu_stw_data_ra(env, addr + 2, d1, ra);
2897        *olen = 4;
2898    }
2899
2900    return -1;
2901}
2902
2903static int encode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2904                        uintptr_t ra, uint32_t c, uint32_t *olen)
2905{
2906    if (ilen < 4) {
2907        return 1;
2908    }
2909    cpu_stl_data_ra(env, addr, c, ra);
2910    *olen = 4;
2911    return -1;
2912}
2913
2914static inline uint32_t convert_unicode(CPUS390XState *env, uint32_t r1,
2915                                       uint32_t r2, uint32_t m3, uintptr_t ra,
2916                                       decode_unicode_fn decode,
2917                                       encode_unicode_fn encode)
2918{
2919    uint64_t dst = get_address(env, r1);
2920    uint64_t dlen = get_length(env, r1 + 1);
2921    uint64_t src = get_address(env, r2);
2922    uint64_t slen = get_length(env, r2 + 1);
2923    bool enh_check = m3 & 1;
2924    int cc, i;
2925
2926    /* Lest we fail to service interrupts in a timely manner, limit the
2927       amount of work we're willing to do.  For now, let's cap at 256.  */
2928    for (i = 0; i < 256; ++i) {
2929        uint32_t c, ilen, olen;
2930
2931        cc = decode(env, src, slen, enh_check, ra, &c, &ilen);
2932        if (unlikely(cc >= 0)) {
2933            break;
2934        }
2935        cc = encode(env, dst, dlen, ra, c, &olen);
2936        if (unlikely(cc >= 0)) {
2937            break;
2938        }
2939
2940        src += ilen;
2941        slen -= ilen;
2942        dst += olen;
2943        dlen -= olen;
2944        cc = 3;
2945    }
2946
2947    set_address(env, r1, dst);
2948    set_length(env, r1 + 1, dlen);
2949    set_address(env, r2, src);
2950    set_length(env, r2 + 1, slen);
2951
2952    return cc;
2953}
2954
2955uint32_t HELPER(cu12)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2956{
2957    return convert_unicode(env, r1, r2, m3, GETPC(),
2958                           decode_utf8, encode_utf16);
2959}
2960
2961uint32_t HELPER(cu14)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2962{
2963    return convert_unicode(env, r1, r2, m3, GETPC(),
2964                           decode_utf8, encode_utf32);
2965}
2966
2967uint32_t HELPER(cu21)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2968{
2969    return convert_unicode(env, r1, r2, m3, GETPC(),
2970                           decode_utf16, encode_utf8);
2971}
2972
2973uint32_t HELPER(cu24)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2974{
2975    return convert_unicode(env, r1, r2, m3, GETPC(),
2976                           decode_utf16, encode_utf32);
2977}
2978
2979uint32_t HELPER(cu41)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2980{
2981    return convert_unicode(env, r1, r2, m3, GETPC(),
2982                           decode_utf32, encode_utf8);
2983}
2984
2985uint32_t HELPER(cu42)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2986{
2987    return convert_unicode(env, r1, r2, m3, GETPC(),
2988                           decode_utf32, encode_utf16);
2989}
2990
2991void probe_write_access(CPUS390XState *env, uint64_t addr, uint64_t len,
2992                        uintptr_t ra)
2993{
2994    /* test the actual access, not just any access to the page due to LAP */
2995    while (len) {
2996        const uint64_t pagelen = -(addr | TARGET_PAGE_MASK);
2997        const uint64_t curlen = MIN(pagelen, len);
2998
2999        probe_write(env, addr, curlen, cpu_mmu_index(env, false), ra);
3000        addr = wrap_address(env, addr + curlen);
3001        len -= curlen;
3002    }
3003}
3004
3005void HELPER(probe_write_access)(CPUS390XState *env, uint64_t addr, uint64_t len)
3006{
3007    probe_write_access(env, addr, len, GETPC());
3008}
3009