qemu/target/s390x/tcg/mem_helper.c
<<
>>
Prefs
   1/*
   2 *  S/390 memory access helper routines
   3 *
   4 *  Copyright (c) 2009 Ulrich Hecht
   5 *  Copyright (c) 2009 Alexander Graf
   6 *
   7 * This library is free software; you can redistribute it and/or
   8 * modify it under the terms of the GNU Lesser General Public
   9 * License as published by the Free Software Foundation; either
  10 * version 2.1 of the License, or (at your option) any later version.
  11 *
  12 * This library is distributed in the hope that it will be useful,
  13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 * Lesser General Public License for more details.
  16 *
  17 * You should have received a copy of the GNU Lesser General Public
  18 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  19 */
  20
  21#include "qemu/osdep.h"
  22#include "qemu/log.h"
  23#include "cpu.h"
  24#include "s390x-internal.h"
  25#include "tcg_s390x.h"
  26#include "exec/helper-proto.h"
  27#include "exec/exec-all.h"
  28#include "exec/cpu_ldst.h"
  29#include "qemu/int128.h"
  30#include "qemu/atomic128.h"
  31#include "trace.h"
  32
  33#if !defined(CONFIG_USER_ONLY)
  34#include "hw/s390x/storage-keys.h"
  35#include "hw/boards.h"
  36#endif
  37
  38/*****************************************************************************/
  39/* Softmmu support */
  40
  41/* #define DEBUG_HELPER */
  42#ifdef DEBUG_HELPER
  43#define HELPER_LOG(x...) qemu_log(x)
  44#else
  45#define HELPER_LOG(x...)
  46#endif
  47
  48static inline bool psw_key_valid(CPUS390XState *env, uint8_t psw_key)
  49{
  50    uint16_t pkm = env->cregs[3] >> 16;
  51
  52    if (env->psw.mask & PSW_MASK_PSTATE) {
  53        /* PSW key has range 0..15, it is valid if the bit is 1 in the PKM */
  54        return pkm & (0x80 >> psw_key);
  55    }
  56    return true;
  57}
  58
  59static bool is_destructive_overlap(CPUS390XState *env, uint64_t dest,
  60                                   uint64_t src, uint32_t len)
  61{
  62    if (!len || src == dest) {
  63        return false;
  64    }
  65    /* Take care of wrapping at the end of address space. */
  66    if (unlikely(wrap_address(env, src + len - 1) < src)) {
  67        return dest > src || dest <= wrap_address(env, src + len - 1);
  68    }
  69    return dest > src && dest <= src + len - 1;
  70}
  71
  72/* Trigger a SPECIFICATION exception if an address or a length is not
  73   naturally aligned.  */
  74static inline void check_alignment(CPUS390XState *env, uint64_t v,
  75                                   int wordsize, uintptr_t ra)
  76{
  77    if (v % wordsize) {
  78        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
  79    }
  80}
  81
  82/* Load a value from memory according to its size.  */
  83static inline uint64_t cpu_ldusize_data_ra(CPUS390XState *env, uint64_t addr,
  84                                           int wordsize, uintptr_t ra)
  85{
  86    switch (wordsize) {
  87    case 1:
  88        return cpu_ldub_data_ra(env, addr, ra);
  89    case 2:
  90        return cpu_lduw_data_ra(env, addr, ra);
  91    default:
  92        abort();
  93    }
  94}
  95
  96/* Store a to memory according to its size.  */
  97static inline void cpu_stsize_data_ra(CPUS390XState *env, uint64_t addr,
  98                                      uint64_t value, int wordsize,
  99                                      uintptr_t ra)
 100{
 101    switch (wordsize) {
 102    case 1:
 103        cpu_stb_data_ra(env, addr, value, ra);
 104        break;
 105    case 2:
 106        cpu_stw_data_ra(env, addr, value, ra);
 107        break;
 108    default:
 109        abort();
 110    }
 111}
 112
 113/* An access covers at most 4096 bytes and therefore at most two pages. */
 114typedef struct S390Access {
 115    target_ulong vaddr1;
 116    target_ulong vaddr2;
 117    char *haddr1;
 118    char *haddr2;
 119    uint16_t size1;
 120    uint16_t size2;
 121    /*
 122     * If we can't access the host page directly, we'll have to do I/O access
 123     * via ld/st helpers. These are internal details, so we store the
 124     * mmu idx to do the access here instead of passing it around in the
 125     * helpers. Maybe, one day we can get rid of ld/st access - once we can
 126     * handle TLB_NOTDIRTY differently. We don't expect these special accesses
 127     * to trigger exceptions - only if we would have TLB_NOTDIRTY on LAP
 128     * pages, we might trigger a new MMU translation - very unlikely that
 129     * the mapping changes in between and we would trigger a fault.
 130     */
 131    int mmu_idx;
 132} S390Access;
 133
 134/*
 135 * With nonfault=1, return the PGM_ exception that would have been injected
 136 * into the guest; return 0 if no exception was detected.
 137 *
 138 * For !CONFIG_USER_ONLY, the TEC is stored stored to env->tlb_fill_tec.
 139 * For CONFIG_USER_ONLY, the faulting address is stored to env->__excp_addr.
 140 */
 141static int s390_probe_access(CPUArchState *env, target_ulong addr, int size,
 142                             MMUAccessType access_type, int mmu_idx,
 143                             bool nonfault, void **phost, uintptr_t ra)
 144{
 145#if defined(CONFIG_USER_ONLY)
 146    return probe_access_flags(env, addr, access_type, mmu_idx,
 147                              nonfault, phost, ra);
 148#else
 149    int flags;
 150
 151    /*
 152     * For !CONFIG_USER_ONLY, we cannot rely on TLB_INVALID_MASK or haddr==NULL
 153     * to detect if there was an exception during tlb_fill().
 154     */
 155    env->tlb_fill_exc = 0;
 156    flags = probe_access_flags(env, addr, access_type, mmu_idx, nonfault, phost,
 157                               ra);
 158    if (env->tlb_fill_exc) {
 159        return env->tlb_fill_exc;
 160    }
 161
 162    if (unlikely(flags & TLB_WATCHPOINT)) {
 163        /* S390 does not presently use transaction attributes. */
 164        cpu_check_watchpoint(env_cpu(env), addr, size,
 165                             MEMTXATTRS_UNSPECIFIED,
 166                             (access_type == MMU_DATA_STORE
 167                              ? BP_MEM_WRITE : BP_MEM_READ), ra);
 168    }
 169    return 0;
 170#endif
 171}
 172
 173static int access_prepare_nf(S390Access *access, CPUS390XState *env,
 174                             bool nonfault, vaddr vaddr1, int size,
 175                             MMUAccessType access_type,
 176                             int mmu_idx, uintptr_t ra)
 177{
 178    void *haddr1, *haddr2 = NULL;
 179    int size1, size2, exc;
 180    vaddr vaddr2 = 0;
 181
 182    assert(size > 0 && size <= 4096);
 183
 184    size1 = MIN(size, -(vaddr1 | TARGET_PAGE_MASK)),
 185    size2 = size - size1;
 186
 187    exc = s390_probe_access(env, vaddr1, size1, access_type, mmu_idx, nonfault,
 188                            &haddr1, ra);
 189    if (exc) {
 190        return exc;
 191    }
 192    if (unlikely(size2)) {
 193        /* The access crosses page boundaries. */
 194        vaddr2 = wrap_address(env, vaddr1 + size1);
 195        exc = s390_probe_access(env, vaddr2, size2, access_type, mmu_idx,
 196                                nonfault, &haddr2, ra);
 197        if (exc) {
 198            return exc;
 199        }
 200    }
 201
 202    *access = (S390Access) {
 203        .vaddr1 = vaddr1,
 204        .vaddr2 = vaddr2,
 205        .haddr1 = haddr1,
 206        .haddr2 = haddr2,
 207        .size1 = size1,
 208        .size2 = size2,
 209        .mmu_idx = mmu_idx
 210    };
 211    return 0;
 212}
 213
 214static S390Access access_prepare(CPUS390XState *env, vaddr vaddr, int size,
 215                                 MMUAccessType access_type, int mmu_idx,
 216                                 uintptr_t ra)
 217{
 218    S390Access ret;
 219    int exc = access_prepare_nf(&ret, env, false, vaddr, size,
 220                                access_type, mmu_idx, ra);
 221    assert(!exc);
 222    return ret;
 223}
 224
 225/* Helper to handle memset on a single page. */
 226static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr,
 227                             uint8_t byte, uint16_t size, int mmu_idx,
 228                             uintptr_t ra)
 229{
 230#ifdef CONFIG_USER_ONLY
 231    g_assert(haddr);
 232    memset(haddr, byte, size);
 233#else
 234    MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
 235    int i;
 236
 237    if (likely(haddr)) {
 238        memset(haddr, byte, size);
 239    } else {
 240        /*
 241         * Do a single access and test if we can then get access to the
 242         * page. This is especially relevant to speed up TLB_NOTDIRTY.
 243         */
 244        g_assert(size > 0);
 245        cpu_stb_mmu(env, vaddr, byte, oi, ra);
 246        haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
 247        if (likely(haddr)) {
 248            memset(haddr + 1, byte, size - 1);
 249        } else {
 250            for (i = 1; i < size; i++) {
 251                cpu_stb_mmu(env, vaddr + i, byte, oi, ra);
 252            }
 253        }
 254    }
 255#endif
 256}
 257
 258static void access_memset(CPUS390XState *env, S390Access *desta,
 259                          uint8_t byte, uintptr_t ra)
 260{
 261
 262    do_access_memset(env, desta->vaddr1, desta->haddr1, byte, desta->size1,
 263                     desta->mmu_idx, ra);
 264    if (likely(!desta->size2)) {
 265        return;
 266    }
 267    do_access_memset(env, desta->vaddr2, desta->haddr2, byte, desta->size2,
 268                     desta->mmu_idx, ra);
 269}
 270
 271static uint8_t do_access_get_byte(CPUS390XState *env, vaddr vaddr, char **haddr,
 272                                  int offset, int mmu_idx, uintptr_t ra)
 273{
 274#ifdef CONFIG_USER_ONLY
 275    return ldub_p(*haddr + offset);
 276#else
 277    MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
 278    uint8_t byte;
 279
 280    if (likely(*haddr)) {
 281        return ldub_p(*haddr + offset);
 282    }
 283    /*
 284     * Do a single access and test if we can then get access to the
 285     * page. This is especially relevant to speed up TLB_NOTDIRTY.
 286     */
 287    byte = cpu_ldb_mmu(env, vaddr + offset, oi, ra);
 288    *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_LOAD, mmu_idx);
 289    return byte;
 290#endif
 291}
 292
 293static uint8_t access_get_byte(CPUS390XState *env, S390Access *access,
 294                               int offset, uintptr_t ra)
 295{
 296    if (offset < access->size1) {
 297        return do_access_get_byte(env, access->vaddr1, &access->haddr1,
 298                                  offset, access->mmu_idx, ra);
 299    }
 300    return do_access_get_byte(env, access->vaddr2, &access->haddr2,
 301                              offset - access->size1, access->mmu_idx, ra);
 302}
 303
 304static void do_access_set_byte(CPUS390XState *env, vaddr vaddr, char **haddr,
 305                               int offset, uint8_t byte, int mmu_idx,
 306                               uintptr_t ra)
 307{
 308#ifdef CONFIG_USER_ONLY
 309    stb_p(*haddr + offset, byte);
 310#else
 311    MemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
 312
 313    if (likely(*haddr)) {
 314        stb_p(*haddr + offset, byte);
 315        return;
 316    }
 317    /*
 318     * Do a single access and test if we can then get access to the
 319     * page. This is especially relevant to speed up TLB_NOTDIRTY.
 320     */
 321    cpu_stb_mmu(env, vaddr + offset, byte, oi, ra);
 322    *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
 323#endif
 324}
 325
 326static void access_set_byte(CPUS390XState *env, S390Access *access,
 327                            int offset, uint8_t byte, uintptr_t ra)
 328{
 329    if (offset < access->size1) {
 330        do_access_set_byte(env, access->vaddr1, &access->haddr1, offset, byte,
 331                           access->mmu_idx, ra);
 332    } else {
 333        do_access_set_byte(env, access->vaddr2, &access->haddr2,
 334                           offset - access->size1, byte, access->mmu_idx, ra);
 335    }
 336}
 337
 338/*
 339 * Move data with the same semantics as memmove() in case ranges don't overlap
 340 * or src > dest. Undefined behavior on destructive overlaps.
 341 */
 342static void access_memmove(CPUS390XState *env, S390Access *desta,
 343                           S390Access *srca, uintptr_t ra)
 344{
 345    int diff;
 346
 347    g_assert(desta->size1 + desta->size2 == srca->size1 + srca->size2);
 348
 349    /* Fallback to slow access in case we don't have access to all host pages */
 350    if (unlikely(!desta->haddr1 || (desta->size2 && !desta->haddr2) ||
 351                 !srca->haddr1 || (srca->size2 && !srca->haddr2))) {
 352        int i;
 353
 354        for (i = 0; i < desta->size1 + desta->size2; i++) {
 355            uint8_t byte = access_get_byte(env, srca, i, ra);
 356
 357            access_set_byte(env, desta, i, byte, ra);
 358        }
 359        return;
 360    }
 361
 362    if (srca->size1 == desta->size1) {
 363        memmove(desta->haddr1, srca->haddr1, srca->size1);
 364        if (unlikely(srca->size2)) {
 365            memmove(desta->haddr2, srca->haddr2, srca->size2);
 366        }
 367    } else if (srca->size1 < desta->size1) {
 368        diff = desta->size1 - srca->size1;
 369        memmove(desta->haddr1, srca->haddr1, srca->size1);
 370        memmove(desta->haddr1 + srca->size1, srca->haddr2, diff);
 371        if (likely(desta->size2)) {
 372            memmove(desta->haddr2, srca->haddr2 + diff, desta->size2);
 373        }
 374    } else {
 375        diff = srca->size1 - desta->size1;
 376        memmove(desta->haddr1, srca->haddr1, desta->size1);
 377        memmove(desta->haddr2, srca->haddr1 + desta->size1, diff);
 378        if (likely(srca->size2)) {
 379            memmove(desta->haddr2 + diff, srca->haddr2, srca->size2);
 380        }
 381    }
 382}
 383
 384static int mmu_idx_from_as(uint8_t as)
 385{
 386    switch (as) {
 387    case AS_PRIMARY:
 388        return MMU_PRIMARY_IDX;
 389    case AS_SECONDARY:
 390        return MMU_SECONDARY_IDX;
 391    case AS_HOME:
 392        return MMU_HOME_IDX;
 393    default:
 394        /* FIXME AS_ACCREG */
 395        g_assert_not_reached();
 396    }
 397}
 398
 399/* and on array */
 400static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest,
 401                             uint64_t src, uintptr_t ra)
 402{
 403    const int mmu_idx = cpu_mmu_index(env, false);
 404    S390Access srca1, srca2, desta;
 405    uint32_t i;
 406    uint8_t c = 0;
 407
 408    HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
 409               __func__, l, dest, src);
 410
 411    /* NC always processes one more byte than specified - maximum is 256 */
 412    l++;
 413
 414    srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 415    srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
 416    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 417    for (i = 0; i < l; i++) {
 418        const uint8_t x = access_get_byte(env, &srca1, i, ra) &
 419                          access_get_byte(env, &srca2, i, ra);
 420
 421        c |= x;
 422        access_set_byte(env, &desta, i, x, ra);
 423    }
 424    return c != 0;
 425}
 426
 427uint32_t HELPER(nc)(CPUS390XState *env, uint32_t l, uint64_t dest,
 428                    uint64_t src)
 429{
 430    return do_helper_nc(env, l, dest, src, GETPC());
 431}
 432
 433/* xor on array */
 434static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest,
 435                             uint64_t src, uintptr_t ra)
 436{
 437    const int mmu_idx = cpu_mmu_index(env, false);
 438    S390Access srca1, srca2, desta;
 439    uint32_t i;
 440    uint8_t c = 0;
 441
 442    HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
 443               __func__, l, dest, src);
 444
 445    /* XC always processes one more byte than specified - maximum is 256 */
 446    l++;
 447
 448    srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 449    srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
 450    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 451
 452    /* xor with itself is the same as memset(0) */
 453    if (src == dest) {
 454        access_memset(env, &desta, 0, ra);
 455        return 0;
 456    }
 457
 458    for (i = 0; i < l; i++) {
 459        const uint8_t x = access_get_byte(env, &srca1, i, ra) ^
 460                          access_get_byte(env, &srca2, i, ra);
 461
 462        c |= x;
 463        access_set_byte(env, &desta, i, x, ra);
 464    }
 465    return c != 0;
 466}
 467
 468uint32_t HELPER(xc)(CPUS390XState *env, uint32_t l, uint64_t dest,
 469                    uint64_t src)
 470{
 471    return do_helper_xc(env, l, dest, src, GETPC());
 472}
 473
 474/* or on array */
 475static uint32_t do_helper_oc(CPUS390XState *env, uint32_t l, uint64_t dest,
 476                             uint64_t src, uintptr_t ra)
 477{
 478    const int mmu_idx = cpu_mmu_index(env, false);
 479    S390Access srca1, srca2, desta;
 480    uint32_t i;
 481    uint8_t c = 0;
 482
 483    HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
 484               __func__, l, dest, src);
 485
 486    /* OC always processes one more byte than specified - maximum is 256 */
 487    l++;
 488
 489    srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 490    srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
 491    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 492    for (i = 0; i < l; i++) {
 493        const uint8_t x = access_get_byte(env, &srca1, i, ra) |
 494                          access_get_byte(env, &srca2, i, ra);
 495
 496        c |= x;
 497        access_set_byte(env, &desta, i, x, ra);
 498    }
 499    return c != 0;
 500}
 501
 502uint32_t HELPER(oc)(CPUS390XState *env, uint32_t l, uint64_t dest,
 503                    uint64_t src)
 504{
 505    return do_helper_oc(env, l, dest, src, GETPC());
 506}
 507
 508/* memmove */
 509static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
 510                              uint64_t src, uintptr_t ra)
 511{
 512    const int mmu_idx = cpu_mmu_index(env, false);
 513    S390Access srca, desta;
 514    uint32_t i;
 515
 516    HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
 517               __func__, l, dest, src);
 518
 519    /* MVC always copies one more byte than specified - maximum is 256 */
 520    l++;
 521
 522    srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 523    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 524
 525    /*
 526     * "When the operands overlap, the result is obtained as if the operands
 527     * were processed one byte at a time". Only non-destructive overlaps
 528     * behave like memmove().
 529     */
 530    if (dest == src + 1) {
 531        access_memset(env, &desta, access_get_byte(env, &srca, 0, ra), ra);
 532    } else if (!is_destructive_overlap(env, dest, src, l)) {
 533        access_memmove(env, &desta, &srca, ra);
 534    } else {
 535        for (i = 0; i < l; i++) {
 536            uint8_t byte = access_get_byte(env, &srca, i, ra);
 537
 538            access_set_byte(env, &desta, i, byte, ra);
 539        }
 540    }
 541
 542    return env->cc_op;
 543}
 544
 545void HELPER(mvc)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 546{
 547    do_helper_mvc(env, l, dest, src, GETPC());
 548}
 549
 550/* move right to left */
 551void HELPER(mvcrl)(CPUS390XState *env, uint64_t l, uint64_t dest, uint64_t src)
 552{
 553    const int mmu_idx = cpu_mmu_index(env, false);
 554    const uint64_t ra = GETPC();
 555    S390Access srca, desta;
 556    int32_t i;
 557
 558    /* MVCRL always copies one more byte than specified - maximum is 256 */
 559    l++;
 560
 561    srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 562    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 563
 564    for (i = l - 1; i >= 0; i--) {
 565        uint8_t byte = access_get_byte(env, &srca, i, ra);
 566        access_set_byte(env, &desta, i, byte, ra);
 567    }
 568}
 569
 570/* move inverse  */
 571void HELPER(mvcin)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 572{
 573    const int mmu_idx = cpu_mmu_index(env, false);
 574    S390Access srca, desta;
 575    uintptr_t ra = GETPC();
 576    int i;
 577
 578    /* MVCIN always copies one more byte than specified - maximum is 256 */
 579    l++;
 580
 581    src = wrap_address(env, src - l + 1);
 582    srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 583    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 584    for (i = 0; i < l; i++) {
 585        const uint8_t x = access_get_byte(env, &srca, l - i - 1, ra);
 586
 587        access_set_byte(env, &desta, i, x, ra);
 588    }
 589}
 590
 591/* move numerics  */
 592void HELPER(mvn)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 593{
 594    const int mmu_idx = cpu_mmu_index(env, false);
 595    S390Access srca1, srca2, desta;
 596    uintptr_t ra = GETPC();
 597    int i;
 598
 599    /* MVN always copies one more byte than specified - maximum is 256 */
 600    l++;
 601
 602    srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 603    srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
 604    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 605    for (i = 0; i < l; i++) {
 606        const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0x0f) |
 607                          (access_get_byte(env, &srca2, i, ra) & 0xf0);
 608
 609        access_set_byte(env, &desta, i, x, ra);
 610    }
 611}
 612
 613/* move with offset  */
 614void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 615{
 616    const int mmu_idx = cpu_mmu_index(env, false);
 617    /* MVO always processes one more byte than specified - maximum is 16 */
 618    const int len_dest = (l >> 4) + 1;
 619    const int len_src = (l & 0xf) + 1;
 620    uintptr_t ra = GETPC();
 621    uint8_t byte_dest, byte_src;
 622    S390Access srca, desta;
 623    int i, j;
 624
 625    srca = access_prepare(env, src, len_src, MMU_DATA_LOAD, mmu_idx, ra);
 626    desta = access_prepare(env, dest, len_dest, MMU_DATA_STORE, mmu_idx, ra);
 627
 628    /* Handle rightmost byte */
 629    byte_dest = cpu_ldub_data_ra(env, dest + len_dest - 1, ra);
 630    byte_src = access_get_byte(env, &srca, len_src - 1, ra);
 631    byte_dest = (byte_dest & 0x0f) | (byte_src << 4);
 632    access_set_byte(env, &desta, len_dest - 1, byte_dest, ra);
 633
 634    /* Process remaining bytes from right to left */
 635    for (i = len_dest - 2, j = len_src - 2; i >= 0; i--, j--) {
 636        byte_dest = byte_src >> 4;
 637        if (j >= 0) {
 638            byte_src = access_get_byte(env, &srca, j, ra);
 639        } else {
 640            byte_src = 0;
 641        }
 642        byte_dest |= byte_src << 4;
 643        access_set_byte(env, &desta, i, byte_dest, ra);
 644    }
 645}
 646
 647/* move zones  */
 648void HELPER(mvz)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 649{
 650    const int mmu_idx = cpu_mmu_index(env, false);
 651    S390Access srca1, srca2, desta;
 652    uintptr_t ra = GETPC();
 653    int i;
 654
 655    /* MVZ always copies one more byte than specified - maximum is 256 */
 656    l++;
 657
 658    srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 659    srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
 660    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 661    for (i = 0; i < l; i++) {
 662        const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0xf0) |
 663                          (access_get_byte(env, &srca2, i, ra) & 0x0f);
 664
 665        access_set_byte(env, &desta, i, x, ra);
 666    }
 667}
 668
 669/* compare unsigned byte arrays */
 670static uint32_t do_helper_clc(CPUS390XState *env, uint32_t l, uint64_t s1,
 671                              uint64_t s2, uintptr_t ra)
 672{
 673    uint32_t i;
 674    uint32_t cc = 0;
 675
 676    HELPER_LOG("%s l %d s1 %" PRIx64 " s2 %" PRIx64 "\n",
 677               __func__, l, s1, s2);
 678
 679    for (i = 0; i <= l; i++) {
 680        uint8_t x = cpu_ldub_data_ra(env, s1 + i, ra);
 681        uint8_t y = cpu_ldub_data_ra(env, s2 + i, ra);
 682        HELPER_LOG("%02x (%c)/%02x (%c) ", x, x, y, y);
 683        if (x < y) {
 684            cc = 1;
 685            break;
 686        } else if (x > y) {
 687            cc = 2;
 688            break;
 689        }
 690    }
 691
 692    HELPER_LOG("\n");
 693    return cc;
 694}
 695
 696uint32_t HELPER(clc)(CPUS390XState *env, uint32_t l, uint64_t s1, uint64_t s2)
 697{
 698    return do_helper_clc(env, l, s1, s2, GETPC());
 699}
 700
 701/* compare logical under mask */
 702uint32_t HELPER(clm)(CPUS390XState *env, uint32_t r1, uint32_t mask,
 703                     uint64_t addr)
 704{
 705    uintptr_t ra = GETPC();
 706    uint32_t cc = 0;
 707
 708    HELPER_LOG("%s: r1 0x%x mask 0x%x addr 0x%" PRIx64 "\n", __func__, r1,
 709               mask, addr);
 710
 711    while (mask) {
 712        if (mask & 8) {
 713            uint8_t d = cpu_ldub_data_ra(env, addr, ra);
 714            uint8_t r = extract32(r1, 24, 8);
 715            HELPER_LOG("mask 0x%x %02x/%02x (0x%" PRIx64 ") ", mask, r, d,
 716                       addr);
 717            if (r < d) {
 718                cc = 1;
 719                break;
 720            } else if (r > d) {
 721                cc = 2;
 722                break;
 723            }
 724            addr++;
 725        }
 726        mask = (mask << 1) & 0xf;
 727        r1 <<= 8;
 728    }
 729
 730    HELPER_LOG("\n");
 731    return cc;
 732}
 733
 734static inline uint64_t get_address(CPUS390XState *env, int reg)
 735{
 736    return wrap_address(env, env->regs[reg]);
 737}
 738
 739/*
 740 * Store the address to the given register, zeroing out unused leftmost
 741 * bits in bit positions 32-63 (24-bit and 31-bit mode only).
 742 */
 743static inline void set_address_zero(CPUS390XState *env, int reg,
 744                                    uint64_t address)
 745{
 746    if (env->psw.mask & PSW_MASK_64) {
 747        env->regs[reg] = address;
 748    } else {
 749        if (!(env->psw.mask & PSW_MASK_32)) {
 750            address &= 0x00ffffff;
 751        } else {
 752            address &= 0x7fffffff;
 753        }
 754        env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
 755    }
 756}
 757
 758static inline void set_address(CPUS390XState *env, int reg, uint64_t address)
 759{
 760    if (env->psw.mask & PSW_MASK_64) {
 761        /* 64-Bit mode */
 762        env->regs[reg] = address;
 763    } else {
 764        if (!(env->psw.mask & PSW_MASK_32)) {
 765            /* 24-Bit mode. According to the PoO it is implementation
 766            dependent if bits 32-39 remain unchanged or are set to
 767            zeros.  Choose the former so that the function can also be
 768            used for TRT.  */
 769            env->regs[reg] = deposit64(env->regs[reg], 0, 24, address);
 770        } else {
 771            /* 31-Bit mode. According to the PoO it is implementation
 772            dependent if bit 32 remains unchanged or is set to zero.
 773            Choose the latter so that the function can also be used for
 774            TRT.  */
 775            address &= 0x7fffffff;
 776            env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
 777        }
 778    }
 779}
 780
 781static inline uint64_t wrap_length32(CPUS390XState *env, uint64_t length)
 782{
 783    if (!(env->psw.mask & PSW_MASK_64)) {
 784        return (uint32_t)length;
 785    }
 786    return length;
 787}
 788
 789static inline uint64_t wrap_length31(CPUS390XState *env, uint64_t length)
 790{
 791    if (!(env->psw.mask & PSW_MASK_64)) {
 792        /* 24-Bit and 31-Bit mode */
 793        length &= 0x7fffffff;
 794    }
 795    return length;
 796}
 797
 798static inline uint64_t get_length(CPUS390XState *env, int reg)
 799{
 800    return wrap_length31(env, env->regs[reg]);
 801}
 802
 803static inline void set_length(CPUS390XState *env, int reg, uint64_t length)
 804{
 805    if (env->psw.mask & PSW_MASK_64) {
 806        /* 64-Bit mode */
 807        env->regs[reg] = length;
 808    } else {
 809        /* 24-Bit and 31-Bit mode */
 810        env->regs[reg] = deposit64(env->regs[reg], 0, 32, length);
 811    }
 812}
 813
 814/* search string (c is byte to search, r2 is string, r1 end of string) */
 815void HELPER(srst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
 816{
 817    uintptr_t ra = GETPC();
 818    uint64_t end, str;
 819    uint32_t len;
 820    uint8_t v, c = env->regs[0];
 821
 822    /* Bits 32-55 must contain all 0.  */
 823    if (env->regs[0] & 0xffffff00u) {
 824        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
 825    }
 826
 827    str = get_address(env, r2);
 828    end = get_address(env, r1);
 829
 830    /* Lest we fail to service interrupts in a timely manner, limit the
 831       amount of work we're willing to do.  For now, let's cap at 8k.  */
 832    for (len = 0; len < 0x2000; ++len) {
 833        if (str + len == end) {
 834            /* Character not found.  R1 & R2 are unmodified.  */
 835            env->cc_op = 2;
 836            return;
 837        }
 838        v = cpu_ldub_data_ra(env, str + len, ra);
 839        if (v == c) {
 840            /* Character found.  Set R1 to the location; R2 is unmodified.  */
 841            env->cc_op = 1;
 842            set_address(env, r1, str + len);
 843            return;
 844        }
 845    }
 846
 847    /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
 848    env->cc_op = 3;
 849    set_address(env, r2, str + len);
 850}
 851
 852void HELPER(srstu)(CPUS390XState *env, uint32_t r1, uint32_t r2)
 853{
 854    uintptr_t ra = GETPC();
 855    uint32_t len;
 856    uint16_t v, c = env->regs[0];
 857    uint64_t end, str, adj_end;
 858
 859    /* Bits 32-47 of R0 must be zero.  */
 860    if (env->regs[0] & 0xffff0000u) {
 861        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
 862    }
 863
 864    str = get_address(env, r2);
 865    end = get_address(env, r1);
 866
 867    /* If the LSB of the two addresses differ, use one extra byte.  */
 868    adj_end = end + ((str ^ end) & 1);
 869
 870    /* Lest we fail to service interrupts in a timely manner, limit the
 871       amount of work we're willing to do.  For now, let's cap at 8k.  */
 872    for (len = 0; len < 0x2000; len += 2) {
 873        if (str + len == adj_end) {
 874            /* End of input found.  */
 875            env->cc_op = 2;
 876            return;
 877        }
 878        v = cpu_lduw_data_ra(env, str + len, ra);
 879        if (v == c) {
 880            /* Character found.  Set R1 to the location; R2 is unmodified.  */
 881            env->cc_op = 1;
 882            set_address(env, r1, str + len);
 883            return;
 884        }
 885    }
 886
 887    /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
 888    env->cc_op = 3;
 889    set_address(env, r2, str + len);
 890}
 891
 892/* unsigned string compare (c is string terminator) */
 893uint64_t HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2)
 894{
 895    uintptr_t ra = GETPC();
 896    uint32_t len;
 897
 898    c = c & 0xff;
 899    s1 = wrap_address(env, s1);
 900    s2 = wrap_address(env, s2);
 901
 902    /* Lest we fail to service interrupts in a timely manner, limit the
 903       amount of work we're willing to do.  For now, let's cap at 8k.  */
 904    for (len = 0; len < 0x2000; ++len) {
 905        uint8_t v1 = cpu_ldub_data_ra(env, s1 + len, ra);
 906        uint8_t v2 = cpu_ldub_data_ra(env, s2 + len, ra);
 907        if (v1 == v2) {
 908            if (v1 == c) {
 909                /* Equal.  CC=0, and don't advance the registers.  */
 910                env->cc_op = 0;
 911                env->retxl = s2;
 912                return s1;
 913            }
 914        } else {
 915            /* Unequal.  CC={1,2}, and advance the registers.  Note that
 916               the terminator need not be zero, but the string that contains
 917               the terminator is by definition "low".  */
 918            env->cc_op = (v1 == c ? 1 : v2 == c ? 2 : v1 < v2 ? 1 : 2);
 919            env->retxl = s2 + len;
 920            return s1 + len;
 921        }
 922    }
 923
 924    /* CPU-determined bytes equal; advance the registers.  */
 925    env->cc_op = 3;
 926    env->retxl = s2 + len;
 927    return s1 + len;
 928}
 929
 930/* move page */
 931uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint32_t r1, uint32_t r2)
 932{
 933    const uint64_t src = get_address(env, r2) & TARGET_PAGE_MASK;
 934    const uint64_t dst = get_address(env, r1) & TARGET_PAGE_MASK;
 935    const int mmu_idx = cpu_mmu_index(env, false);
 936    const bool f = extract64(r0, 11, 1);
 937    const bool s = extract64(r0, 10, 1);
 938    const bool cco = extract64(r0, 8, 1);
 939    uintptr_t ra = GETPC();
 940    S390Access srca, desta;
 941    int exc;
 942
 943    if ((f && s) || extract64(r0, 12, 4)) {
 944        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
 945    }
 946
 947    /*
 948     * We always manually handle exceptions such that we can properly store
 949     * r1/r2 to the lowcore on page-translation exceptions.
 950     *
 951     * TODO: Access key handling
 952     */
 953    exc = access_prepare_nf(&srca, env, true, src, TARGET_PAGE_SIZE,
 954                            MMU_DATA_LOAD, mmu_idx, ra);
 955    if (exc) {
 956        if (cco) {
 957            return 2;
 958        }
 959        goto inject_exc;
 960    }
 961    exc = access_prepare_nf(&desta, env, true, dst, TARGET_PAGE_SIZE,
 962                            MMU_DATA_STORE, mmu_idx, ra);
 963    if (exc) {
 964        if (cco && exc != PGM_PROTECTION) {
 965            return 1;
 966        }
 967        goto inject_exc;
 968    }
 969    access_memmove(env, &desta, &srca, ra);
 970    return 0; /* data moved */
 971inject_exc:
 972#if !defined(CONFIG_USER_ONLY)
 973    if (exc != PGM_ADDRESSING) {
 974        stq_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, trans_exc_code),
 975                 env->tlb_fill_tec);
 976    }
 977    if (exc == PGM_PAGE_TRANS) {
 978        stb_phys(env_cpu(env)->as, env->psa + offsetof(LowCore, op_access_id),
 979                 r1 << 4 | r2);
 980    }
 981#endif
 982    tcg_s390_program_interrupt(env, exc, ra);
 983}
 984
 985/* string copy */
 986uint32_t HELPER(mvst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
 987{
 988    const int mmu_idx = cpu_mmu_index(env, false);
 989    const uint64_t d = get_address(env, r1);
 990    const uint64_t s = get_address(env, r2);
 991    const uint8_t c = env->regs[0];
 992    const int len = MIN(-(d | TARGET_PAGE_MASK), -(s | TARGET_PAGE_MASK));
 993    S390Access srca, desta;
 994    uintptr_t ra = GETPC();
 995    int i;
 996
 997    if (env->regs[0] & 0xffffff00ull) {
 998        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
 999    }
1000
1001    /*
1002     * Our access should not exceed single pages, as we must not report access
1003     * exceptions exceeding the actually copied range (which we don't know at
1004     * this point). We might over-indicate watchpoints within the pages
1005     * (if we ever care, we have to limit processing to a single byte).
1006     */
1007    srca = access_prepare(env, s, len, MMU_DATA_LOAD, mmu_idx, ra);
1008    desta = access_prepare(env, d, len, MMU_DATA_STORE, mmu_idx, ra);
1009    for (i = 0; i < len; i++) {
1010        const uint8_t v = access_get_byte(env, &srca, i, ra);
1011
1012        access_set_byte(env, &desta, i, v, ra);
1013        if (v == c) {
1014            set_address_zero(env, r1, d + i);
1015            return 1;
1016        }
1017    }
1018    set_address_zero(env, r1, d + len);
1019    set_address_zero(env, r2, s + len);
1020    return 3;
1021}
1022
1023/* load access registers r1 to r3 from memory at a2 */
1024void HELPER(lam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1025{
1026    uintptr_t ra = GETPC();
1027    int i;
1028
1029    if (a2 & 0x3) {
1030        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1031    }
1032
1033    for (i = r1;; i = (i + 1) % 16) {
1034        env->aregs[i] = cpu_ldl_data_ra(env, a2, ra);
1035        a2 += 4;
1036
1037        if (i == r3) {
1038            break;
1039        }
1040    }
1041}
1042
1043/* store access registers r1 to r3 in memory at a2 */
1044void HELPER(stam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1045{
1046    uintptr_t ra = GETPC();
1047    int i;
1048
1049    if (a2 & 0x3) {
1050        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1051    }
1052
1053    for (i = r1;; i = (i + 1) % 16) {
1054        cpu_stl_data_ra(env, a2, env->aregs[i], ra);
1055        a2 += 4;
1056
1057        if (i == r3) {
1058            break;
1059        }
1060    }
1061}
1062
1063/* move long helper */
1064static inline uint32_t do_mvcl(CPUS390XState *env,
1065                               uint64_t *dest, uint64_t *destlen,
1066                               uint64_t *src, uint64_t *srclen,
1067                               uint16_t pad, int wordsize, uintptr_t ra)
1068{
1069    const int mmu_idx = cpu_mmu_index(env, false);
1070    int len = MIN(*destlen, -(*dest | TARGET_PAGE_MASK));
1071    S390Access srca, desta;
1072    int i, cc;
1073
1074    if (*destlen == *srclen) {
1075        cc = 0;
1076    } else if (*destlen < *srclen) {
1077        cc = 1;
1078    } else {
1079        cc = 2;
1080    }
1081
1082    if (!*destlen) {
1083        return cc;
1084    }
1085
1086    /*
1087     * Only perform one type of type of operation (move/pad) at a time.
1088     * Stay within single pages.
1089     */
1090    if (*srclen) {
1091        /* Copy the src array */
1092        len = MIN(MIN(*srclen, -(*src | TARGET_PAGE_MASK)), len);
1093        *destlen -= len;
1094        *srclen -= len;
1095        srca = access_prepare(env, *src, len, MMU_DATA_LOAD, mmu_idx, ra);
1096        desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1097        access_memmove(env, &desta, &srca, ra);
1098        *src = wrap_address(env, *src + len);
1099        *dest = wrap_address(env, *dest + len);
1100    } else if (wordsize == 1) {
1101        /* Pad the remaining area */
1102        *destlen -= len;
1103        desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1104        access_memset(env, &desta, pad, ra);
1105        *dest = wrap_address(env, *dest + len);
1106    } else {
1107        desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
1108
1109        /* The remaining length selects the padding byte. */
1110        for (i = 0; i < len; (*destlen)--, i++) {
1111            if (*destlen & 1) {
1112                access_set_byte(env, &desta, i, pad, ra);
1113            } else {
1114                access_set_byte(env, &desta, i, pad >> 8, ra);
1115            }
1116        }
1117        *dest = wrap_address(env, *dest + len);
1118    }
1119
1120    return *destlen ? 3 : cc;
1121}
1122
1123/* move long */
1124uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1125{
1126    const int mmu_idx = cpu_mmu_index(env, false);
1127    uintptr_t ra = GETPC();
1128    uint64_t destlen = env->regs[r1 + 1] & 0xffffff;
1129    uint64_t dest = get_address(env, r1);
1130    uint64_t srclen = env->regs[r2 + 1] & 0xffffff;
1131    uint64_t src = get_address(env, r2);
1132    uint8_t pad = env->regs[r2 + 1] >> 24;
1133    CPUState *cs = env_cpu(env);
1134    S390Access srca, desta;
1135    uint32_t cc, cur_len;
1136
1137    if (is_destructive_overlap(env, dest, src, MIN(srclen, destlen))) {
1138        cc = 3;
1139    } else if (srclen == destlen) {
1140        cc = 0;
1141    } else if (destlen < srclen) {
1142        cc = 1;
1143    } else {
1144        cc = 2;
1145    }
1146
1147    /* We might have to zero-out some bits even if there was no action. */
1148    if (unlikely(!destlen || cc == 3)) {
1149        set_address_zero(env, r2, src);
1150        set_address_zero(env, r1, dest);
1151        return cc;
1152    } else if (!srclen) {
1153        set_address_zero(env, r2, src);
1154    }
1155
1156    /*
1157     * Only perform one type of type of operation (move/pad) in one step.
1158     * Stay within single pages.
1159     */
1160    while (destlen) {
1161        cur_len = MIN(destlen, -(dest | TARGET_PAGE_MASK));
1162        if (!srclen) {
1163            desta = access_prepare(env, dest, cur_len, MMU_DATA_STORE, mmu_idx,
1164                                   ra);
1165            access_memset(env, &desta, pad, ra);
1166        } else {
1167            cur_len = MIN(MIN(srclen, -(src | TARGET_PAGE_MASK)), cur_len);
1168
1169            srca = access_prepare(env, src, cur_len, MMU_DATA_LOAD, mmu_idx,
1170                                  ra);
1171            desta = access_prepare(env, dest, cur_len, MMU_DATA_STORE, mmu_idx,
1172                                   ra);
1173            access_memmove(env, &desta, &srca, ra);
1174            src = wrap_address(env, src + cur_len);
1175            srclen -= cur_len;
1176            env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen);
1177            set_address_zero(env, r2, src);
1178        }
1179        dest = wrap_address(env, dest + cur_len);
1180        destlen -= cur_len;
1181        env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, destlen);
1182        set_address_zero(env, r1, dest);
1183
1184        /*
1185         * MVCL is interruptible. Return to the main loop if requested after
1186         * writing back all state to registers. If no interrupt will get
1187         * injected, we'll end up back in this handler and continue processing
1188         * the remaining parts.
1189         */
1190        if (destlen && unlikely(cpu_loop_exit_requested(cs))) {
1191            cpu_loop_exit_restore(cs, ra);
1192        }
1193    }
1194    return cc;
1195}
1196
1197/* move long extended */
1198uint32_t HELPER(mvcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1199                       uint32_t r3)
1200{
1201    uintptr_t ra = GETPC();
1202    uint64_t destlen = get_length(env, r1 + 1);
1203    uint64_t dest = get_address(env, r1);
1204    uint64_t srclen = get_length(env, r3 + 1);
1205    uint64_t src = get_address(env, r3);
1206    uint8_t pad = a2;
1207    uint32_t cc;
1208
1209    cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 1, ra);
1210
1211    set_length(env, r1 + 1, destlen);
1212    set_length(env, r3 + 1, srclen);
1213    set_address(env, r1, dest);
1214    set_address(env, r3, src);
1215
1216    return cc;
1217}
1218
1219/* move long unicode */
1220uint32_t HELPER(mvclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1221                       uint32_t r3)
1222{
1223    uintptr_t ra = GETPC();
1224    uint64_t destlen = get_length(env, r1 + 1);
1225    uint64_t dest = get_address(env, r1);
1226    uint64_t srclen = get_length(env, r3 + 1);
1227    uint64_t src = get_address(env, r3);
1228    uint16_t pad = a2;
1229    uint32_t cc;
1230
1231    cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 2, ra);
1232
1233    set_length(env, r1 + 1, destlen);
1234    set_length(env, r3 + 1, srclen);
1235    set_address(env, r1, dest);
1236    set_address(env, r3, src);
1237
1238    return cc;
1239}
1240
1241/* compare logical long helper */
1242static inline uint32_t do_clcl(CPUS390XState *env,
1243                               uint64_t *src1, uint64_t *src1len,
1244                               uint64_t *src3, uint64_t *src3len,
1245                               uint16_t pad, uint64_t limit,
1246                               int wordsize, uintptr_t ra)
1247{
1248    uint64_t len = MAX(*src1len, *src3len);
1249    uint32_t cc = 0;
1250
1251    check_alignment(env, *src1len | *src3len, wordsize, ra);
1252
1253    if (!len) {
1254        return cc;
1255    }
1256
1257    /* Lest we fail to service interrupts in a timely manner, limit the
1258       amount of work we're willing to do.  */
1259    if (len > limit) {
1260        len = limit;
1261        cc = 3;
1262    }
1263
1264    for (; len; len -= wordsize) {
1265        uint16_t v1 = pad;
1266        uint16_t v3 = pad;
1267
1268        if (*src1len) {
1269            v1 = cpu_ldusize_data_ra(env, *src1, wordsize, ra);
1270        }
1271        if (*src3len) {
1272            v3 = cpu_ldusize_data_ra(env, *src3, wordsize, ra);
1273        }
1274
1275        if (v1 != v3) {
1276            cc = (v1 < v3) ? 1 : 2;
1277            break;
1278        }
1279
1280        if (*src1len) {
1281            *src1 += wordsize;
1282            *src1len -= wordsize;
1283        }
1284        if (*src3len) {
1285            *src3 += wordsize;
1286            *src3len -= wordsize;
1287        }
1288    }
1289
1290    return cc;
1291}
1292
1293
1294/* compare logical long */
1295uint32_t HELPER(clcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1296{
1297    uintptr_t ra = GETPC();
1298    uint64_t src1len = extract64(env->regs[r1 + 1], 0, 24);
1299    uint64_t src1 = get_address(env, r1);
1300    uint64_t src3len = extract64(env->regs[r2 + 1], 0, 24);
1301    uint64_t src3 = get_address(env, r2);
1302    uint8_t pad = env->regs[r2 + 1] >> 24;
1303    uint32_t cc;
1304
1305    cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, -1, 1, ra);
1306
1307    env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, src1len);
1308    env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, src3len);
1309    set_address(env, r1, src1);
1310    set_address(env, r2, src3);
1311
1312    return cc;
1313}
1314
1315/* compare logical long extended memcompare insn with padding */
1316uint32_t HELPER(clcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1317                       uint32_t r3)
1318{
1319    uintptr_t ra = GETPC();
1320    uint64_t src1len = get_length(env, r1 + 1);
1321    uint64_t src1 = get_address(env, r1);
1322    uint64_t src3len = get_length(env, r3 + 1);
1323    uint64_t src3 = get_address(env, r3);
1324    uint8_t pad = a2;
1325    uint32_t cc;
1326
1327    cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x2000, 1, ra);
1328
1329    set_length(env, r1 + 1, src1len);
1330    set_length(env, r3 + 1, src3len);
1331    set_address(env, r1, src1);
1332    set_address(env, r3, src3);
1333
1334    return cc;
1335}
1336
1337/* compare logical long unicode memcompare insn with padding */
1338uint32_t HELPER(clclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1339                       uint32_t r3)
1340{
1341    uintptr_t ra = GETPC();
1342    uint64_t src1len = get_length(env, r1 + 1);
1343    uint64_t src1 = get_address(env, r1);
1344    uint64_t src3len = get_length(env, r3 + 1);
1345    uint64_t src3 = get_address(env, r3);
1346    uint16_t pad = a2;
1347    uint32_t cc = 0;
1348
1349    cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x1000, 2, ra);
1350
1351    set_length(env, r1 + 1, src1len);
1352    set_length(env, r3 + 1, src3len);
1353    set_address(env, r1, src1);
1354    set_address(env, r3, src3);
1355
1356    return cc;
1357}
1358
1359/* checksum */
1360uint64_t HELPER(cksm)(CPUS390XState *env, uint64_t r1,
1361                      uint64_t src, uint64_t src_len)
1362{
1363    uintptr_t ra = GETPC();
1364    uint64_t max_len, len;
1365    uint64_t cksm = (uint32_t)r1;
1366
1367    /* Lest we fail to service interrupts in a timely manner, limit the
1368       amount of work we're willing to do.  For now, let's cap at 8k.  */
1369    max_len = (src_len > 0x2000 ? 0x2000 : src_len);
1370
1371    /* Process full words as available.  */
1372    for (len = 0; len + 4 <= max_len; len += 4, src += 4) {
1373        cksm += (uint32_t)cpu_ldl_data_ra(env, src, ra);
1374    }
1375
1376    switch (max_len - len) {
1377    case 1:
1378        cksm += cpu_ldub_data_ra(env, src, ra) << 24;
1379        len += 1;
1380        break;
1381    case 2:
1382        cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1383        len += 2;
1384        break;
1385    case 3:
1386        cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1387        cksm += cpu_ldub_data_ra(env, src + 2, ra) << 8;
1388        len += 3;
1389        break;
1390    }
1391
1392    /* Fold the carry from the checksum.  Note that we can see carry-out
1393       during folding more than once (but probably not more than twice).  */
1394    while (cksm > 0xffffffffull) {
1395        cksm = (uint32_t)cksm + (cksm >> 32);
1396    }
1397
1398    /* Indicate whether or not we've processed everything.  */
1399    env->cc_op = (len == src_len ? 0 : 3);
1400
1401    /* Return both cksm and processed length.  */
1402    env->retxl = cksm;
1403    return len;
1404}
1405
1406void HELPER(pack)(CPUS390XState *env, uint32_t len, uint64_t dest, uint64_t src)
1407{
1408    uintptr_t ra = GETPC();
1409    int len_dest = len >> 4;
1410    int len_src = len & 0xf;
1411    uint8_t b;
1412
1413    dest += len_dest;
1414    src += len_src;
1415
1416    /* last byte is special, it only flips the nibbles */
1417    b = cpu_ldub_data_ra(env, src, ra);
1418    cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1419    src--;
1420    len_src--;
1421
1422    /* now pack every value */
1423    while (len_dest > 0) {
1424        b = 0;
1425
1426        if (len_src >= 0) {
1427            b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1428            src--;
1429            len_src--;
1430        }
1431        if (len_src >= 0) {
1432            b |= cpu_ldub_data_ra(env, src, ra) << 4;
1433            src--;
1434            len_src--;
1435        }
1436
1437        len_dest--;
1438        dest--;
1439        cpu_stb_data_ra(env, dest, b, ra);
1440    }
1441}
1442
1443static inline void do_pkau(CPUS390XState *env, uint64_t dest, uint64_t src,
1444                           uint32_t srclen, int ssize, uintptr_t ra)
1445{
1446    int i;
1447    /* The destination operand is always 16 bytes long.  */
1448    const int destlen = 16;
1449
1450    /* The operands are processed from right to left.  */
1451    src += srclen - 1;
1452    dest += destlen - 1;
1453
1454    for (i = 0; i < destlen; i++) {
1455        uint8_t b = 0;
1456
1457        /* Start with a positive sign */
1458        if (i == 0) {
1459            b = 0xc;
1460        } else if (srclen > ssize) {
1461            b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1462            src -= ssize;
1463            srclen -= ssize;
1464        }
1465
1466        if (srclen > ssize) {
1467            b |= cpu_ldub_data_ra(env, src, ra) << 4;
1468            src -= ssize;
1469            srclen -= ssize;
1470        }
1471
1472        cpu_stb_data_ra(env, dest, b, ra);
1473        dest--;
1474    }
1475}
1476
1477
1478void HELPER(pka)(CPUS390XState *env, uint64_t dest, uint64_t src,
1479                 uint32_t srclen)
1480{
1481    do_pkau(env, dest, src, srclen, 1, GETPC());
1482}
1483
1484void HELPER(pku)(CPUS390XState *env, uint64_t dest, uint64_t src,
1485                 uint32_t srclen)
1486{
1487    do_pkau(env, dest, src, srclen, 2, GETPC());
1488}
1489
1490void HELPER(unpk)(CPUS390XState *env, uint32_t len, uint64_t dest,
1491                  uint64_t src)
1492{
1493    uintptr_t ra = GETPC();
1494    int len_dest = len >> 4;
1495    int len_src = len & 0xf;
1496    uint8_t b;
1497    int second_nibble = 0;
1498
1499    dest += len_dest;
1500    src += len_src;
1501
1502    /* last byte is special, it only flips the nibbles */
1503    b = cpu_ldub_data_ra(env, src, ra);
1504    cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1505    src--;
1506    len_src--;
1507
1508    /* now pad every nibble with 0xf0 */
1509
1510    while (len_dest > 0) {
1511        uint8_t cur_byte = 0;
1512
1513        if (len_src > 0) {
1514            cur_byte = cpu_ldub_data_ra(env, src, ra);
1515        }
1516
1517        len_dest--;
1518        dest--;
1519
1520        /* only advance one nibble at a time */
1521        if (second_nibble) {
1522            cur_byte >>= 4;
1523            len_src--;
1524            src--;
1525        }
1526        second_nibble = !second_nibble;
1527
1528        /* digit */
1529        cur_byte = (cur_byte & 0xf);
1530        /* zone bits */
1531        cur_byte |= 0xf0;
1532
1533        cpu_stb_data_ra(env, dest, cur_byte, ra);
1534    }
1535}
1536
1537static inline uint32_t do_unpkau(CPUS390XState *env, uint64_t dest,
1538                                 uint32_t destlen, int dsize, uint64_t src,
1539                                 uintptr_t ra)
1540{
1541    int i;
1542    uint32_t cc;
1543    uint8_t b;
1544    /* The source operand is always 16 bytes long.  */
1545    const int srclen = 16;
1546
1547    /* The operands are processed from right to left.  */
1548    src += srclen - 1;
1549    dest += destlen - dsize;
1550
1551    /* Check for the sign.  */
1552    b = cpu_ldub_data_ra(env, src, ra);
1553    src--;
1554    switch (b & 0xf) {
1555    case 0xa:
1556    case 0xc:
1557    case 0xe ... 0xf:
1558        cc = 0;  /* plus */
1559        break;
1560    case 0xb:
1561    case 0xd:
1562        cc = 1;  /* minus */
1563        break;
1564    default:
1565    case 0x0 ... 0x9:
1566        cc = 3;  /* invalid */
1567        break;
1568    }
1569
1570    /* Now pad every nibble with 0x30, advancing one nibble at a time. */
1571    for (i = 0; i < destlen; i += dsize) {
1572        if (i == (31 * dsize)) {
1573            /* If length is 32/64 bytes, the leftmost byte is 0. */
1574            b = 0;
1575        } else if (i % (2 * dsize)) {
1576            b = cpu_ldub_data_ra(env, src, ra);
1577            src--;
1578        } else {
1579            b >>= 4;
1580        }
1581        cpu_stsize_data_ra(env, dest, 0x30 + (b & 0xf), dsize, ra);
1582        dest -= dsize;
1583    }
1584
1585    return cc;
1586}
1587
1588uint32_t HELPER(unpka)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1589                       uint64_t src)
1590{
1591    return do_unpkau(env, dest, destlen, 1, src, GETPC());
1592}
1593
1594uint32_t HELPER(unpku)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1595                       uint64_t src)
1596{
1597    return do_unpkau(env, dest, destlen, 2, src, GETPC());
1598}
1599
1600uint32_t HELPER(tp)(CPUS390XState *env, uint64_t dest, uint32_t destlen)
1601{
1602    uintptr_t ra = GETPC();
1603    uint32_t cc = 0;
1604    int i;
1605
1606    for (i = 0; i < destlen; i++) {
1607        uint8_t b = cpu_ldub_data_ra(env, dest + i, ra);
1608        /* digit */
1609        cc |= (b & 0xf0) > 0x90 ? 2 : 0;
1610
1611        if (i == (destlen - 1)) {
1612            /* sign */
1613            cc |= (b & 0xf) < 0xa ? 1 : 0;
1614        } else {
1615            /* digit */
1616            cc |= (b & 0xf) > 0x9 ? 2 : 0;
1617        }
1618    }
1619
1620    return cc;
1621}
1622
1623static uint32_t do_helper_tr(CPUS390XState *env, uint32_t len, uint64_t array,
1624                             uint64_t trans, uintptr_t ra)
1625{
1626    uint32_t i;
1627
1628    for (i = 0; i <= len; i++) {
1629        uint8_t byte = cpu_ldub_data_ra(env, array + i, ra);
1630        uint8_t new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1631        cpu_stb_data_ra(env, array + i, new_byte, ra);
1632    }
1633
1634    return env->cc_op;
1635}
1636
1637void HELPER(tr)(CPUS390XState *env, uint32_t len, uint64_t array,
1638                uint64_t trans)
1639{
1640    do_helper_tr(env, len, array, trans, GETPC());
1641}
1642
1643uint64_t HELPER(tre)(CPUS390XState *env, uint64_t array,
1644                     uint64_t len, uint64_t trans)
1645{
1646    uintptr_t ra = GETPC();
1647    uint8_t end = env->regs[0] & 0xff;
1648    uint64_t l = len;
1649    uint64_t i;
1650    uint32_t cc = 0;
1651
1652    if (!(env->psw.mask & PSW_MASK_64)) {
1653        array &= 0x7fffffff;
1654        l = (uint32_t)l;
1655    }
1656
1657    /* Lest we fail to service interrupts in a timely manner, limit the
1658       amount of work we're willing to do.  For now, let's cap at 8k.  */
1659    if (l > 0x2000) {
1660        l = 0x2000;
1661        cc = 3;
1662    }
1663
1664    for (i = 0; i < l; i++) {
1665        uint8_t byte, new_byte;
1666
1667        byte = cpu_ldub_data_ra(env, array + i, ra);
1668
1669        if (byte == end) {
1670            cc = 1;
1671            break;
1672        }
1673
1674        new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1675        cpu_stb_data_ra(env, array + i, new_byte, ra);
1676    }
1677
1678    env->cc_op = cc;
1679    env->retxl = len - i;
1680    return array + i;
1681}
1682
1683static inline uint32_t do_helper_trt(CPUS390XState *env, int len,
1684                                     uint64_t array, uint64_t trans,
1685                                     int inc, uintptr_t ra)
1686{
1687    int i;
1688
1689    for (i = 0; i <= len; i++) {
1690        uint8_t byte = cpu_ldub_data_ra(env, array + i * inc, ra);
1691        uint8_t sbyte = cpu_ldub_data_ra(env, trans + byte, ra);
1692
1693        if (sbyte != 0) {
1694            set_address(env, 1, array + i * inc);
1695            env->regs[2] = deposit64(env->regs[2], 0, 8, sbyte);
1696            return (i == len) ? 2 : 1;
1697        }
1698    }
1699
1700    return 0;
1701}
1702
1703static uint32_t do_helper_trt_fwd(CPUS390XState *env, uint32_t len,
1704                                  uint64_t array, uint64_t trans,
1705                                  uintptr_t ra)
1706{
1707    return do_helper_trt(env, len, array, trans, 1, ra);
1708}
1709
1710uint32_t HELPER(trt)(CPUS390XState *env, uint32_t len, uint64_t array,
1711                     uint64_t trans)
1712{
1713    return do_helper_trt(env, len, array, trans, 1, GETPC());
1714}
1715
1716static uint32_t do_helper_trt_bkwd(CPUS390XState *env, uint32_t len,
1717                                   uint64_t array, uint64_t trans,
1718                                   uintptr_t ra)
1719{
1720    return do_helper_trt(env, len, array, trans, -1, ra);
1721}
1722
1723uint32_t HELPER(trtr)(CPUS390XState *env, uint32_t len, uint64_t array,
1724                      uint64_t trans)
1725{
1726    return do_helper_trt(env, len, array, trans, -1, GETPC());
1727}
1728
1729/* Translate one/two to one/two */
1730uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
1731                      uint32_t tst, uint32_t sizes)
1732{
1733    uintptr_t ra = GETPC();
1734    int dsize = (sizes & 1) ? 1 : 2;
1735    int ssize = (sizes & 2) ? 1 : 2;
1736    uint64_t tbl = get_address(env, 1);
1737    uint64_t dst = get_address(env, r1);
1738    uint64_t len = get_length(env, r1 + 1);
1739    uint64_t src = get_address(env, r2);
1740    uint32_t cc = 3;
1741    int i;
1742
1743    /* The lower address bits of TBL are ignored.  For TROO, TROT, it's
1744       the low 3 bits (double-word aligned).  For TRTO, TRTT, it's either
1745       the low 12 bits (4K, without ETF2-ENH) or 3 bits (with ETF2-ENH).  */
1746    if (ssize == 2 && !s390_has_feat(S390_FEAT_ETF2_ENH)) {
1747        tbl &= -4096;
1748    } else {
1749        tbl &= -8;
1750    }
1751
1752    check_alignment(env, len, ssize, ra);
1753
1754    /* Lest we fail to service interrupts in a timely manner, */
1755    /* limit the amount of work we're willing to do.   */
1756    for (i = 0; i < 0x2000; i++) {
1757        uint16_t sval = cpu_ldusize_data_ra(env, src, ssize, ra);
1758        uint64_t tble = tbl + (sval * dsize);
1759        uint16_t dval = cpu_ldusize_data_ra(env, tble, dsize, ra);
1760        if (dval == tst) {
1761            cc = 1;
1762            break;
1763        }
1764        cpu_stsize_data_ra(env, dst, dval, dsize, ra);
1765
1766        len -= ssize;
1767        src += ssize;
1768        dst += dsize;
1769
1770        if (len == 0) {
1771            cc = 0;
1772            break;
1773        }
1774    }
1775
1776    set_address(env, r1, dst);
1777    set_length(env, r1 + 1, len);
1778    set_address(env, r2, src);
1779
1780    return cc;
1781}
1782
1783void HELPER(cdsg)(CPUS390XState *env, uint64_t addr,
1784                  uint32_t r1, uint32_t r3)
1785{
1786    uintptr_t ra = GETPC();
1787    Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
1788    Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1789    Int128 oldv;
1790    uint64_t oldh, oldl;
1791    bool fail;
1792
1793    check_alignment(env, addr, 16, ra);
1794
1795    oldh = cpu_ldq_data_ra(env, addr + 0, ra);
1796    oldl = cpu_ldq_data_ra(env, addr + 8, ra);
1797
1798    oldv = int128_make128(oldl, oldh);
1799    fail = !int128_eq(oldv, cmpv);
1800    if (fail) {
1801        newv = oldv;
1802    }
1803
1804    cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra);
1805    cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra);
1806
1807    env->cc_op = fail;
1808    env->regs[r1] = int128_gethi(oldv);
1809    env->regs[r1 + 1] = int128_getlo(oldv);
1810}
1811
1812void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr,
1813                           uint32_t r1, uint32_t r3)
1814{
1815    uintptr_t ra = GETPC();
1816    Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
1817    Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1818    int mem_idx;
1819    MemOpIdx oi;
1820    Int128 oldv;
1821    bool fail;
1822
1823    assert(HAVE_CMPXCHG128);
1824
1825    mem_idx = cpu_mmu_index(env, false);
1826    oi = make_memop_idx(MO_TE | MO_128 | MO_ALIGN, mem_idx);
1827    oldv = cpu_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
1828    fail = !int128_eq(oldv, cmpv);
1829
1830    env->cc_op = fail;
1831    env->regs[r1] = int128_gethi(oldv);
1832    env->regs[r1 + 1] = int128_getlo(oldv);
1833}
1834
1835static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
1836                        uint64_t a2, bool parallel)
1837{
1838    uint32_t mem_idx = cpu_mmu_index(env, false);
1839    uintptr_t ra = GETPC();
1840    uint32_t fc = extract32(env->regs[0], 0, 8);
1841    uint32_t sc = extract32(env->regs[0], 8, 8);
1842    uint64_t pl = get_address(env, 1) & -16;
1843    uint64_t svh, svl;
1844    uint32_t cc;
1845
1846    /* Sanity check the function code and storage characteristic.  */
1847    if (fc > 1 || sc > 3) {
1848        if (!s390_has_feat(S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2)) {
1849            goto spec_exception;
1850        }
1851        if (fc > 2 || sc > 4 || (fc == 2 && (r3 & 1))) {
1852            goto spec_exception;
1853        }
1854    }
1855
1856    /* Sanity check the alignments.  */
1857    if (extract32(a1, 0, fc + 2) || extract32(a2, 0, sc)) {
1858        goto spec_exception;
1859    }
1860
1861    /* Sanity check writability of the store address.  */
1862    probe_write(env, a2, 1 << sc, mem_idx, ra);
1863
1864    /*
1865     * Note that the compare-and-swap is atomic, and the store is atomic,
1866     * but the complete operation is not.  Therefore we do not need to
1867     * assert serial context in order to implement this.  That said,
1868     * restart early if we can't support either operation that is supposed
1869     * to be atomic.
1870     */
1871    if (parallel) {
1872        uint32_t max = 2;
1873#ifdef CONFIG_ATOMIC64
1874        max = 3;
1875#endif
1876        if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) ||
1877            (HAVE_ATOMIC128  ? 0 : sc > max)) {
1878            cpu_loop_exit_atomic(env_cpu(env), ra);
1879        }
1880    }
1881
1882    /* All loads happen before all stores.  For simplicity, load the entire
1883       store value area from the parameter list.  */
1884    svh = cpu_ldq_data_ra(env, pl + 16, ra);
1885    svl = cpu_ldq_data_ra(env, pl + 24, ra);
1886
1887    switch (fc) {
1888    case 0:
1889        {
1890            uint32_t nv = cpu_ldl_data_ra(env, pl, ra);
1891            uint32_t cv = env->regs[r3];
1892            uint32_t ov;
1893
1894            if (parallel) {
1895#ifdef CONFIG_USER_ONLY
1896                uint32_t *haddr = g2h(env_cpu(env), a1);
1897                ov = qatomic_cmpxchg__nocheck(haddr, cv, nv);
1898#else
1899                MemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mem_idx);
1900                ov = cpu_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi, ra);
1901#endif
1902            } else {
1903                ov = cpu_ldl_data_ra(env, a1, ra);
1904                cpu_stl_data_ra(env, a1, (ov == cv ? nv : ov), ra);
1905            }
1906            cc = (ov != cv);
1907            env->regs[r3] = deposit64(env->regs[r3], 32, 32, ov);
1908        }
1909        break;
1910
1911    case 1:
1912        {
1913            uint64_t nv = cpu_ldq_data_ra(env, pl, ra);
1914            uint64_t cv = env->regs[r3];
1915            uint64_t ov;
1916
1917            if (parallel) {
1918#ifdef CONFIG_ATOMIC64
1919                MemOpIdx oi = make_memop_idx(MO_TEUQ | MO_ALIGN, mem_idx);
1920                ov = cpu_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi, ra);
1921#else
1922                /* Note that we asserted !parallel above.  */
1923                g_assert_not_reached();
1924#endif
1925            } else {
1926                ov = cpu_ldq_data_ra(env, a1, ra);
1927                cpu_stq_data_ra(env, a1, (ov == cv ? nv : ov), ra);
1928            }
1929            cc = (ov != cv);
1930            env->regs[r3] = ov;
1931        }
1932        break;
1933
1934    case 2:
1935        {
1936            uint64_t nvh = cpu_ldq_data_ra(env, pl, ra);
1937            uint64_t nvl = cpu_ldq_data_ra(env, pl + 8, ra);
1938            Int128 nv = int128_make128(nvl, nvh);
1939            Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1940            Int128 ov;
1941
1942            if (!parallel) {
1943                uint64_t oh = cpu_ldq_data_ra(env, a1 + 0, ra);
1944                uint64_t ol = cpu_ldq_data_ra(env, a1 + 8, ra);
1945
1946                ov = int128_make128(ol, oh);
1947                cc = !int128_eq(ov, cv);
1948                if (cc) {
1949                    nv = ov;
1950                }
1951
1952                cpu_stq_data_ra(env, a1 + 0, int128_gethi(nv), ra);
1953                cpu_stq_data_ra(env, a1 + 8, int128_getlo(nv), ra);
1954            } else if (HAVE_CMPXCHG128) {
1955                MemOpIdx oi = make_memop_idx(MO_TE | MO_128 | MO_ALIGN, mem_idx);
1956                ov = cpu_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra);
1957                cc = !int128_eq(ov, cv);
1958            } else {
1959                /* Note that we asserted !parallel above.  */
1960                g_assert_not_reached();
1961            }
1962
1963            env->regs[r3 + 0] = int128_gethi(ov);
1964            env->regs[r3 + 1] = int128_getlo(ov);
1965        }
1966        break;
1967
1968    default:
1969        g_assert_not_reached();
1970    }
1971
1972    /* Store only if the comparison succeeded.  Note that above we use a pair
1973       of 64-bit big-endian loads, so for sc < 3 we must extract the value
1974       from the most-significant bits of svh.  */
1975    if (cc == 0) {
1976        switch (sc) {
1977        case 0:
1978            cpu_stb_data_ra(env, a2, svh >> 56, ra);
1979            break;
1980        case 1:
1981            cpu_stw_data_ra(env, a2, svh >> 48, ra);
1982            break;
1983        case 2:
1984            cpu_stl_data_ra(env, a2, svh >> 32, ra);
1985            break;
1986        case 3:
1987            cpu_stq_data_ra(env, a2, svh, ra);
1988            break;
1989        case 4:
1990            if (!parallel) {
1991                cpu_stq_data_ra(env, a2 + 0, svh, ra);
1992                cpu_stq_data_ra(env, a2 + 8, svl, ra);
1993            } else if (HAVE_ATOMIC128) {
1994                MemOpIdx oi = make_memop_idx(MO_TEUQ | MO_ALIGN_16, mem_idx);
1995                Int128 sv = int128_make128(svl, svh);
1996                cpu_atomic_sto_be_mmu(env, a2, sv, oi, ra);
1997            } else {
1998                /* Note that we asserted !parallel above.  */
1999                g_assert_not_reached();
2000            }
2001            break;
2002        default:
2003            g_assert_not_reached();
2004        }
2005    }
2006
2007    return cc;
2008
2009 spec_exception:
2010    tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2011}
2012
2013uint32_t HELPER(csst)(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t a2)
2014{
2015    return do_csst(env, r3, a1, a2, false);
2016}
2017
2018uint32_t HELPER(csst_parallel)(CPUS390XState *env, uint32_t r3, uint64_t a1,
2019                               uint64_t a2)
2020{
2021    return do_csst(env, r3, a1, a2, true);
2022}
2023
2024#if !defined(CONFIG_USER_ONLY)
2025void HELPER(lctlg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2026{
2027    uintptr_t ra = GETPC();
2028    bool PERchanged = false;
2029    uint64_t src = a2;
2030    uint32_t i;
2031
2032    if (src & 0x7) {
2033        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2034    }
2035
2036    for (i = r1;; i = (i + 1) % 16) {
2037        uint64_t val = cpu_ldq_data_ra(env, src, ra);
2038        if (env->cregs[i] != val && i >= 9 && i <= 11) {
2039            PERchanged = true;
2040        }
2041        env->cregs[i] = val;
2042        HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%" PRIx64 "\n",
2043                   i, src, val);
2044        src += sizeof(uint64_t);
2045
2046        if (i == r3) {
2047            break;
2048        }
2049    }
2050
2051    if (PERchanged && env->psw.mask & PSW_MASK_PER) {
2052        s390_cpu_recompute_watchpoints(env_cpu(env));
2053    }
2054
2055    tlb_flush(env_cpu(env));
2056}
2057
2058void HELPER(lctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2059{
2060    uintptr_t ra = GETPC();
2061    bool PERchanged = false;
2062    uint64_t src = a2;
2063    uint32_t i;
2064
2065    if (src & 0x3) {
2066        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2067    }
2068
2069    for (i = r1;; i = (i + 1) % 16) {
2070        uint32_t val = cpu_ldl_data_ra(env, src, ra);
2071        if ((uint32_t)env->cregs[i] != val && i >= 9 && i <= 11) {
2072            PERchanged = true;
2073        }
2074        env->cregs[i] = deposit64(env->cregs[i], 0, 32, val);
2075        HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%x\n", i, src, val);
2076        src += sizeof(uint32_t);
2077
2078        if (i == r3) {
2079            break;
2080        }
2081    }
2082
2083    if (PERchanged && env->psw.mask & PSW_MASK_PER) {
2084        s390_cpu_recompute_watchpoints(env_cpu(env));
2085    }
2086
2087    tlb_flush(env_cpu(env));
2088}
2089
2090void HELPER(stctg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2091{
2092    uintptr_t ra = GETPC();
2093    uint64_t dest = a2;
2094    uint32_t i;
2095
2096    if (dest & 0x7) {
2097        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2098    }
2099
2100    for (i = r1;; i = (i + 1) % 16) {
2101        cpu_stq_data_ra(env, dest, env->cregs[i], ra);
2102        dest += sizeof(uint64_t);
2103
2104        if (i == r3) {
2105            break;
2106        }
2107    }
2108}
2109
2110void HELPER(stctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2111{
2112    uintptr_t ra = GETPC();
2113    uint64_t dest = a2;
2114    uint32_t i;
2115
2116    if (dest & 0x3) {
2117        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2118    }
2119
2120    for (i = r1;; i = (i + 1) % 16) {
2121        cpu_stl_data_ra(env, dest, env->cregs[i], ra);
2122        dest += sizeof(uint32_t);
2123
2124        if (i == r3) {
2125            break;
2126        }
2127    }
2128}
2129
2130uint32_t HELPER(testblock)(CPUS390XState *env, uint64_t real_addr)
2131{
2132    uintptr_t ra = GETPC();
2133    int i;
2134
2135    real_addr = wrap_address(env, real_addr) & TARGET_PAGE_MASK;
2136
2137    for (i = 0; i < TARGET_PAGE_SIZE; i += 8) {
2138        cpu_stq_mmuidx_ra(env, real_addr + i, 0, MMU_REAL_IDX, ra);
2139    }
2140
2141    return 0;
2142}
2143
2144uint32_t HELPER(tprot)(CPUS390XState *env, uint64_t a1, uint64_t a2)
2145{
2146    S390CPU *cpu = env_archcpu(env);
2147    CPUState *cs = env_cpu(env);
2148
2149    /*
2150     * TODO: we currently don't handle all access protection types
2151     * (including access-list and key-controlled) as well as AR mode.
2152     */
2153    if (!s390_cpu_virt_mem_check_write(cpu, a1, 0, 1)) {
2154        /* Fetching permitted; storing permitted */
2155        return 0;
2156    }
2157
2158    if (env->int_pgm_code == PGM_PROTECTION) {
2159        /* retry if reading is possible */
2160        cs->exception_index = -1;
2161        if (!s390_cpu_virt_mem_check_read(cpu, a1, 0, 1)) {
2162            /* Fetching permitted; storing not permitted */
2163            return 1;
2164        }
2165    }
2166
2167    switch (env->int_pgm_code) {
2168    case PGM_PROTECTION:
2169        /* Fetching not permitted; storing not permitted */
2170        cs->exception_index = -1;
2171        return 2;
2172    case PGM_ADDRESSING:
2173    case PGM_TRANS_SPEC:
2174        /* exceptions forwarded to the guest */
2175        s390_cpu_virt_mem_handle_exc(cpu, GETPC());
2176        return 0;
2177    }
2178
2179    /* Translation not available */
2180    cs->exception_index = -1;
2181    return 3;
2182}
2183
2184/* insert storage key extended */
2185uint64_t HELPER(iske)(CPUS390XState *env, uint64_t r2)
2186{
2187    static S390SKeysState *ss;
2188    static S390SKeysClass *skeyclass;
2189    uint64_t addr = wrap_address(env, r2);
2190    uint8_t key;
2191    int rc;
2192
2193    addr = mmu_real2abs(env, addr);
2194    if (!mmu_absolute_addr_valid(addr, false)) {
2195        tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2196    }
2197
2198    if (unlikely(!ss)) {
2199        ss = s390_get_skeys_device();
2200        skeyclass = S390_SKEYS_GET_CLASS(ss);
2201        if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2202            tlb_flush_all_cpus_synced(env_cpu(env));
2203        }
2204    }
2205
2206    rc = skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2207    if (rc) {
2208        trace_get_skeys_nonzero(rc);
2209        return 0;
2210    }
2211    return key;
2212}
2213
2214/* set storage key extended */
2215void HELPER(sske)(CPUS390XState *env, uint64_t r1, uint64_t r2)
2216{
2217    static S390SKeysState *ss;
2218    static S390SKeysClass *skeyclass;
2219    uint64_t addr = wrap_address(env, r2);
2220    uint8_t key;
2221    int rc;
2222
2223    addr = mmu_real2abs(env, addr);
2224    if (!mmu_absolute_addr_valid(addr, false)) {
2225        tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2226    }
2227
2228    if (unlikely(!ss)) {
2229        ss = s390_get_skeys_device();
2230        skeyclass = S390_SKEYS_GET_CLASS(ss);
2231        if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2232            tlb_flush_all_cpus_synced(env_cpu(env));
2233        }
2234    }
2235
2236    key = r1 & 0xfe;
2237    rc = skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2238    if (rc) {
2239        trace_set_skeys_nonzero(rc);
2240    }
2241   /*
2242    * As we can only flush by virtual address and not all the entries
2243    * that point to a physical address we have to flush the whole TLB.
2244    */
2245    tlb_flush_all_cpus_synced(env_cpu(env));
2246}
2247
2248/* reset reference bit extended */
2249uint32_t HELPER(rrbe)(CPUS390XState *env, uint64_t r2)
2250{
2251    uint64_t addr = wrap_address(env, r2);
2252    static S390SKeysState *ss;
2253    static S390SKeysClass *skeyclass;
2254    uint8_t re, key;
2255    int rc;
2256
2257    addr = mmu_real2abs(env, addr);
2258    if (!mmu_absolute_addr_valid(addr, false)) {
2259        tcg_s390_program_interrupt(env, PGM_ADDRESSING, GETPC());
2260    }
2261
2262    if (unlikely(!ss)) {
2263        ss = s390_get_skeys_device();
2264        skeyclass = S390_SKEYS_GET_CLASS(ss);
2265        if (skeyclass->enable_skeys && !skeyclass->enable_skeys(ss)) {
2266            tlb_flush_all_cpus_synced(env_cpu(env));
2267        }
2268    }
2269
2270    rc = skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2271    if (rc) {
2272        trace_get_skeys_nonzero(rc);
2273        return 0;
2274    }
2275
2276    re = key & (SK_R | SK_C);
2277    key &= ~SK_R;
2278
2279    rc = skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2280    if (rc) {
2281        trace_set_skeys_nonzero(rc);
2282        return 0;
2283    }
2284   /*
2285    * As we can only flush by virtual address and not all the entries
2286    * that point to a physical address we have to flush the whole TLB.
2287    */
2288    tlb_flush_all_cpus_synced(env_cpu(env));
2289
2290    /*
2291     * cc
2292     *
2293     * 0  Reference bit zero; change bit zero
2294     * 1  Reference bit zero; change bit one
2295     * 2  Reference bit one; change bit zero
2296     * 3  Reference bit one; change bit one
2297     */
2298
2299    return re >> 1;
2300}
2301
2302uint32_t HELPER(mvcs)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2)
2303{
2304    const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2305    S390Access srca, desta;
2306    uintptr_t ra = GETPC();
2307    int cc = 0;
2308
2309    HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2310               __func__, l, a1, a2);
2311
2312    if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2313        psw_as == AS_HOME || psw_as == AS_ACCREG) {
2314        s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2315    }
2316
2317    l = wrap_length32(env, l);
2318    if (l > 256) {
2319        /* max 256 */
2320        l = 256;
2321        cc = 3;
2322    } else if (!l) {
2323        return cc;
2324    }
2325
2326    /* TODO: Access key handling */
2327    srca = access_prepare(env, a2, l, MMU_DATA_LOAD, MMU_PRIMARY_IDX, ra);
2328    desta = access_prepare(env, a1, l, MMU_DATA_STORE, MMU_SECONDARY_IDX, ra);
2329    access_memmove(env, &desta, &srca, ra);
2330    return cc;
2331}
2332
2333uint32_t HELPER(mvcp)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2)
2334{
2335    const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2336    S390Access srca, desta;
2337    uintptr_t ra = GETPC();
2338    int cc = 0;
2339
2340    HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2341               __func__, l, a1, a2);
2342
2343    if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2344        psw_as == AS_HOME || psw_as == AS_ACCREG) {
2345        s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2346    }
2347
2348    l = wrap_length32(env, l);
2349    if (l > 256) {
2350        /* max 256 */
2351        l = 256;
2352        cc = 3;
2353    } else if (!l) {
2354        return cc;
2355    }
2356
2357    /* TODO: Access key handling */
2358    srca = access_prepare(env, a2, l, MMU_DATA_LOAD, MMU_SECONDARY_IDX, ra);
2359    desta = access_prepare(env, a1, l, MMU_DATA_STORE, MMU_PRIMARY_IDX, ra);
2360    access_memmove(env, &desta, &srca, ra);
2361    return cc;
2362}
2363
2364void HELPER(idte)(CPUS390XState *env, uint64_t r1, uint64_t r2, uint32_t m4)
2365{
2366    CPUState *cs = env_cpu(env);
2367    const uintptr_t ra = GETPC();
2368    uint64_t table, entry, raddr;
2369    uint16_t entries, i, index = 0;
2370
2371    if (r2 & 0xff000) {
2372        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2373    }
2374
2375    if (!(r2 & 0x800)) {
2376        /* invalidation-and-clearing operation */
2377        table = r1 & ASCE_ORIGIN;
2378        entries = (r2 & 0x7ff) + 1;
2379
2380        switch (r1 & ASCE_TYPE_MASK) {
2381        case ASCE_TYPE_REGION1:
2382            index = (r2 >> 53) & 0x7ff;
2383            break;
2384        case ASCE_TYPE_REGION2:
2385            index = (r2 >> 42) & 0x7ff;
2386            break;
2387        case ASCE_TYPE_REGION3:
2388            index = (r2 >> 31) & 0x7ff;
2389            break;
2390        case ASCE_TYPE_SEGMENT:
2391            index = (r2 >> 20) & 0x7ff;
2392            break;
2393        }
2394        for (i = 0; i < entries; i++) {
2395            /* addresses are not wrapped in 24/31bit mode but table index is */
2396            raddr = table + ((index + i) & 0x7ff) * sizeof(entry);
2397            entry = cpu_ldq_mmuidx_ra(env, raddr, MMU_REAL_IDX, ra);
2398            if (!(entry & REGION_ENTRY_I)) {
2399                /* we are allowed to not store if already invalid */
2400                entry |= REGION_ENTRY_I;
2401                cpu_stq_mmuidx_ra(env, raddr, entry, MMU_REAL_IDX, ra);
2402            }
2403        }
2404    }
2405
2406    /* We simply flush the complete tlb, therefore we can ignore r3. */
2407    if (m4 & 1) {
2408        tlb_flush(cs);
2409    } else {
2410        tlb_flush_all_cpus_synced(cs);
2411    }
2412}
2413
2414/* invalidate pte */
2415void HELPER(ipte)(CPUS390XState *env, uint64_t pto, uint64_t vaddr,
2416                  uint32_t m4)
2417{
2418    CPUState *cs = env_cpu(env);
2419    const uintptr_t ra = GETPC();
2420    uint64_t page = vaddr & TARGET_PAGE_MASK;
2421    uint64_t pte_addr, pte;
2422
2423    /* Compute the page table entry address */
2424    pte_addr = (pto & SEGMENT_ENTRY_ORIGIN);
2425    pte_addr += VADDR_PAGE_TX(vaddr) * 8;
2426
2427    /* Mark the page table entry as invalid */
2428    pte = cpu_ldq_mmuidx_ra(env, pte_addr, MMU_REAL_IDX, ra);
2429    pte |= PAGE_ENTRY_I;
2430    cpu_stq_mmuidx_ra(env, pte_addr, pte, MMU_REAL_IDX, ra);
2431
2432    /* XXX we exploit the fact that Linux passes the exact virtual
2433       address here - it's not obliged to! */
2434    if (m4 & 1) {
2435        if (vaddr & ~VADDR_PAGE_TX_MASK) {
2436            tlb_flush_page(cs, page);
2437            /* XXX 31-bit hack */
2438            tlb_flush_page(cs, page ^ 0x80000000);
2439        } else {
2440            /* looks like we don't have a valid virtual address */
2441            tlb_flush(cs);
2442        }
2443    } else {
2444        if (vaddr & ~VADDR_PAGE_TX_MASK) {
2445            tlb_flush_page_all_cpus_synced(cs, page);
2446            /* XXX 31-bit hack */
2447            tlb_flush_page_all_cpus_synced(cs, page ^ 0x80000000);
2448        } else {
2449            /* looks like we don't have a valid virtual address */
2450            tlb_flush_all_cpus_synced(cs);
2451        }
2452    }
2453}
2454
2455/* flush local tlb */
2456void HELPER(ptlb)(CPUS390XState *env)
2457{
2458    tlb_flush(env_cpu(env));
2459}
2460
2461/* flush global tlb */
2462void HELPER(purge)(CPUS390XState *env)
2463{
2464    tlb_flush_all_cpus_synced(env_cpu(env));
2465}
2466
2467/* load real address */
2468uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr)
2469{
2470    uint64_t asc = env->psw.mask & PSW_MASK_ASC;
2471    uint64_t ret, tec;
2472    int flags, exc, cc;
2473
2474    /* XXX incomplete - has more corner cases */
2475    if (!(env->psw.mask & PSW_MASK_64) && (addr >> 32)) {
2476        tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, GETPC());
2477    }
2478
2479    exc = mmu_translate(env, addr, MMU_S390_LRA, asc, &ret, &flags, &tec);
2480    if (exc) {
2481        cc = 3;
2482        ret = exc | 0x80000000;
2483    } else {
2484        cc = 0;
2485        ret |= addr & ~TARGET_PAGE_MASK;
2486    }
2487
2488    env->cc_op = cc;
2489    return ret;
2490}
2491#endif
2492
2493/* load pair from quadword */
2494uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr)
2495{
2496    uintptr_t ra = GETPC();
2497    uint64_t hi, lo;
2498
2499    check_alignment(env, addr, 16, ra);
2500    hi = cpu_ldq_data_ra(env, addr + 0, ra);
2501    lo = cpu_ldq_data_ra(env, addr + 8, ra);
2502
2503    env->retxl = lo;
2504    return hi;
2505}
2506
2507uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr)
2508{
2509    uintptr_t ra = GETPC();
2510    uint64_t hi, lo;
2511    int mem_idx;
2512    MemOpIdx oi;
2513    Int128 v;
2514
2515    assert(HAVE_ATOMIC128);
2516
2517    mem_idx = cpu_mmu_index(env, false);
2518    oi = make_memop_idx(MO_TEUQ | MO_ALIGN_16, mem_idx);
2519    v = cpu_atomic_ldo_be_mmu(env, addr, oi, ra);
2520    hi = int128_gethi(v);
2521    lo = int128_getlo(v);
2522
2523    env->retxl = lo;
2524    return hi;
2525}
2526
2527/* store pair to quadword */
2528void HELPER(stpq)(CPUS390XState *env, uint64_t addr,
2529                  uint64_t low, uint64_t high)
2530{
2531    uintptr_t ra = GETPC();
2532
2533    check_alignment(env, addr, 16, ra);
2534    cpu_stq_data_ra(env, addr + 0, high, ra);
2535    cpu_stq_data_ra(env, addr + 8, low, ra);
2536}
2537
2538void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr,
2539                           uint64_t low, uint64_t high)
2540{
2541    uintptr_t ra = GETPC();
2542    int mem_idx;
2543    MemOpIdx oi;
2544    Int128 v;
2545
2546    assert(HAVE_ATOMIC128);
2547
2548    mem_idx = cpu_mmu_index(env, false);
2549    oi = make_memop_idx(MO_TEUQ | MO_ALIGN_16, mem_idx);
2550    v = int128_make128(low, high);
2551    cpu_atomic_sto_be_mmu(env, addr, v, oi, ra);
2552}
2553
2554/* Execute instruction.  This instruction executes an insn modified with
2555   the contents of r1.  It does not change the executed instruction in memory;
2556   it does not change the program counter.
2557
2558   Perform this by recording the modified instruction in env->ex_value.
2559   This will be noticed by cpu_get_tb_cpu_state and thus tb translation.
2560*/
2561void HELPER(ex)(CPUS390XState *env, uint32_t ilen, uint64_t r1, uint64_t addr)
2562{
2563    uint64_t insn = cpu_lduw_code(env, addr);
2564    uint8_t opc = insn >> 8;
2565
2566    /* Or in the contents of R1[56:63].  */
2567    insn |= r1 & 0xff;
2568
2569    /* Load the rest of the instruction.  */
2570    insn <<= 48;
2571    switch (get_ilen(opc)) {
2572    case 2:
2573        break;
2574    case 4:
2575        insn |= (uint64_t)cpu_lduw_code(env, addr + 2) << 32;
2576        break;
2577    case 6:
2578        insn |= (uint64_t)(uint32_t)cpu_ldl_code(env, addr + 2) << 16;
2579        break;
2580    default:
2581        g_assert_not_reached();
2582    }
2583
2584    /* The very most common cases can be sped up by avoiding a new TB.  */
2585    if ((opc & 0xf0) == 0xd0) {
2586        typedef uint32_t (*dx_helper)(CPUS390XState *, uint32_t, uint64_t,
2587                                      uint64_t, uintptr_t);
2588        static const dx_helper dx[16] = {
2589            [0x0] = do_helper_trt_bkwd,
2590            [0x2] = do_helper_mvc,
2591            [0x4] = do_helper_nc,
2592            [0x5] = do_helper_clc,
2593            [0x6] = do_helper_oc,
2594            [0x7] = do_helper_xc,
2595            [0xc] = do_helper_tr,
2596            [0xd] = do_helper_trt_fwd,
2597        };
2598        dx_helper helper = dx[opc & 0xf];
2599
2600        if (helper) {
2601            uint32_t l = extract64(insn, 48, 8);
2602            uint32_t b1 = extract64(insn, 44, 4);
2603            uint32_t d1 = extract64(insn, 32, 12);
2604            uint32_t b2 = extract64(insn, 28, 4);
2605            uint32_t d2 = extract64(insn, 16, 12);
2606            uint64_t a1 = wrap_address(env, (b1 ? env->regs[b1] : 0) + d1);
2607            uint64_t a2 = wrap_address(env, (b2 ? env->regs[b2] : 0) + d2);
2608
2609            env->cc_op = helper(env, l, a1, a2, 0);
2610            env->psw.addr += ilen;
2611            return;
2612        }
2613    } else if (opc == 0x0a) {
2614        env->int_svc_code = extract64(insn, 48, 8);
2615        env->int_svc_ilen = ilen;
2616        helper_exception(env, EXCP_SVC);
2617        g_assert_not_reached();
2618    }
2619
2620    /* Record the insn we want to execute as well as the ilen to use
2621       during the execution of the target insn.  This will also ensure
2622       that ex_value is non-zero, which flags that we are in a state
2623       that requires such execution.  */
2624    env->ex_value = insn | ilen;
2625}
2626
2627uint32_t HELPER(mvcos)(CPUS390XState *env, uint64_t dest, uint64_t src,
2628                       uint64_t len)
2629{
2630    const uint8_t psw_key = (env->psw.mask & PSW_MASK_KEY) >> PSW_SHIFT_KEY;
2631    const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2632    const uint64_t r0 = env->regs[0];
2633    const uintptr_t ra = GETPC();
2634    uint8_t dest_key, dest_as, dest_k, dest_a;
2635    uint8_t src_key, src_as, src_k, src_a;
2636    uint64_t val;
2637    int cc = 0;
2638
2639    HELPER_LOG("%s dest %" PRIx64 ", src %" PRIx64 ", len %" PRIx64 "\n",
2640               __func__, dest, src, len);
2641
2642    if (!(env->psw.mask & PSW_MASK_DAT)) {
2643        tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2644    }
2645
2646    /* OAC (operand access control) for the first operand -> dest */
2647    val = (r0 & 0xffff0000ULL) >> 16;
2648    dest_key = (val >> 12) & 0xf;
2649    dest_as = (val >> 6) & 0x3;
2650    dest_k = (val >> 1) & 0x1;
2651    dest_a = val & 0x1;
2652
2653    /* OAC (operand access control) for the second operand -> src */
2654    val = (r0 & 0x0000ffffULL);
2655    src_key = (val >> 12) & 0xf;
2656    src_as = (val >> 6) & 0x3;
2657    src_k = (val >> 1) & 0x1;
2658    src_a = val & 0x1;
2659
2660    if (!dest_k) {
2661        dest_key = psw_key;
2662    }
2663    if (!src_k) {
2664        src_key = psw_key;
2665    }
2666    if (!dest_a) {
2667        dest_as = psw_as;
2668    }
2669    if (!src_a) {
2670        src_as = psw_as;
2671    }
2672
2673    if (dest_a && dest_as == AS_HOME && (env->psw.mask & PSW_MASK_PSTATE)) {
2674        tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2675    }
2676    if (!(env->cregs[0] & CR0_SECONDARY) &&
2677        (dest_as == AS_SECONDARY || src_as == AS_SECONDARY)) {
2678        tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2679    }
2680    if (!psw_key_valid(env, dest_key) || !psw_key_valid(env, src_key)) {
2681        tcg_s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2682    }
2683
2684    len = wrap_length32(env, len);
2685    if (len > 4096) {
2686        cc = 3;
2687        len = 4096;
2688    }
2689
2690    /* FIXME: AR-mode and proper problem state mode (using PSW keys) missing */
2691    if (src_as == AS_ACCREG || dest_as == AS_ACCREG ||
2692        (env->psw.mask & PSW_MASK_PSTATE)) {
2693        qemu_log_mask(LOG_UNIMP, "%s: AR-mode and PSTATE support missing\n",
2694                      __func__);
2695        tcg_s390_program_interrupt(env, PGM_ADDRESSING, ra);
2696    }
2697
2698    /* FIXME: Access using correct keys and AR-mode */
2699    if (len) {
2700        S390Access srca = access_prepare(env, src, len, MMU_DATA_LOAD,
2701                                         mmu_idx_from_as(src_as), ra);
2702        S390Access desta = access_prepare(env, dest, len, MMU_DATA_STORE,
2703                                          mmu_idx_from_as(dest_as), ra);
2704
2705        access_memmove(env, &desta, &srca, ra);
2706    }
2707
2708    return cc;
2709}
2710
2711/* Decode a Unicode character.  A return value < 0 indicates success, storing
2712   the UTF-32 result into OCHAR and the input length into OLEN.  A return
2713   value >= 0 indicates failure, and the CC value to be returned.  */
2714typedef int (*decode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2715                                 uint64_t ilen, bool enh_check, uintptr_t ra,
2716                                 uint32_t *ochar, uint32_t *olen);
2717
2718/* Encode a Unicode character.  A return value < 0 indicates success, storing
2719   the bytes into ADDR and the output length into OLEN.  A return value >= 0
2720   indicates failure, and the CC value to be returned.  */
2721typedef int (*encode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2722                                 uint64_t ilen, uintptr_t ra, uint32_t c,
2723                                 uint32_t *olen);
2724
2725static int decode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2726                       bool enh_check, uintptr_t ra,
2727                       uint32_t *ochar, uint32_t *olen)
2728{
2729    uint8_t s0, s1, s2, s3;
2730    uint32_t c, l;
2731
2732    if (ilen < 1) {
2733        return 0;
2734    }
2735    s0 = cpu_ldub_data_ra(env, addr, ra);
2736    if (s0 <= 0x7f) {
2737        /* one byte character */
2738        l = 1;
2739        c = s0;
2740    } else if (s0 <= (enh_check ? 0xc1 : 0xbf)) {
2741        /* invalid character */
2742        return 2;
2743    } else if (s0 <= 0xdf) {
2744        /* two byte character */
2745        l = 2;
2746        if (ilen < 2) {
2747            return 0;
2748        }
2749        s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2750        c = s0 & 0x1f;
2751        c = (c << 6) | (s1 & 0x3f);
2752        if (enh_check && (s1 & 0xc0) != 0x80) {
2753            return 2;
2754        }
2755    } else if (s0 <= 0xef) {
2756        /* three byte character */
2757        l = 3;
2758        if (ilen < 3) {
2759            return 0;
2760        }
2761        s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2762        s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2763        c = s0 & 0x0f;
2764        c = (c << 6) | (s1 & 0x3f);
2765        c = (c << 6) | (s2 & 0x3f);
2766        /* Fold the byte-by-byte range descriptions in the PoO into
2767           tests against the complete value.  It disallows encodings
2768           that could be smaller, and the UTF-16 surrogates.  */
2769        if (enh_check
2770            && ((s1 & 0xc0) != 0x80
2771                || (s2 & 0xc0) != 0x80
2772                || c < 0x1000
2773                || (c >= 0xd800 && c <= 0xdfff))) {
2774            return 2;
2775        }
2776    } else if (s0 <= (enh_check ? 0xf4 : 0xf7)) {
2777        /* four byte character */
2778        l = 4;
2779        if (ilen < 4) {
2780            return 0;
2781        }
2782        s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2783        s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2784        s3 = cpu_ldub_data_ra(env, addr + 3, ra);
2785        c = s0 & 0x07;
2786        c = (c << 6) | (s1 & 0x3f);
2787        c = (c << 6) | (s2 & 0x3f);
2788        c = (c << 6) | (s3 & 0x3f);
2789        /* See above.  */
2790        if (enh_check
2791            && ((s1 & 0xc0) != 0x80
2792                || (s2 & 0xc0) != 0x80
2793                || (s3 & 0xc0) != 0x80
2794                || c < 0x010000
2795                || c > 0x10ffff)) {
2796            return 2;
2797        }
2798    } else {
2799        /* invalid character */
2800        return 2;
2801    }
2802
2803    *ochar = c;
2804    *olen = l;
2805    return -1;
2806}
2807
2808static int decode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2809                        bool enh_check, uintptr_t ra,
2810                        uint32_t *ochar, uint32_t *olen)
2811{
2812    uint16_t s0, s1;
2813    uint32_t c, l;
2814
2815    if (ilen < 2) {
2816        return 0;
2817    }
2818    s0 = cpu_lduw_data_ra(env, addr, ra);
2819    if ((s0 & 0xfc00) != 0xd800) {
2820        /* one word character */
2821        l = 2;
2822        c = s0;
2823    } else {
2824        /* two word character */
2825        l = 4;
2826        if (ilen < 4) {
2827            return 0;
2828        }
2829        s1 = cpu_lduw_data_ra(env, addr + 2, ra);
2830        c = extract32(s0, 6, 4) + 1;
2831        c = (c << 6) | (s0 & 0x3f);
2832        c = (c << 10) | (s1 & 0x3ff);
2833        if (enh_check && (s1 & 0xfc00) != 0xdc00) {
2834            /* invalid surrogate character */
2835            return 2;
2836        }
2837    }
2838
2839    *ochar = c;
2840    *olen = l;
2841    return -1;
2842}
2843
2844static int decode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2845                        bool enh_check, uintptr_t ra,
2846                        uint32_t *ochar, uint32_t *olen)
2847{
2848    uint32_t c;
2849
2850    if (ilen < 4) {
2851        return 0;
2852    }
2853    c = cpu_ldl_data_ra(env, addr, ra);
2854    if ((c >= 0xd800 && c <= 0xdbff) || c > 0x10ffff) {
2855        /* invalid unicode character */
2856        return 2;
2857    }
2858
2859    *ochar = c;
2860    *olen = 4;
2861    return -1;
2862}
2863
2864static int encode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2865                       uintptr_t ra, uint32_t c, uint32_t *olen)
2866{
2867    uint8_t d[4];
2868    uint32_t l, i;
2869
2870    if (c <= 0x7f) {
2871        /* one byte character */
2872        l = 1;
2873        d[0] = c;
2874    } else if (c <= 0x7ff) {
2875        /* two byte character */
2876        l = 2;
2877        d[1] = 0x80 | extract32(c, 0, 6);
2878        d[0] = 0xc0 | extract32(c, 6, 5);
2879    } else if (c <= 0xffff) {
2880        /* three byte character */
2881        l = 3;
2882        d[2] = 0x80 | extract32(c, 0, 6);
2883        d[1] = 0x80 | extract32(c, 6, 6);
2884        d[0] = 0xe0 | extract32(c, 12, 4);
2885    } else {
2886        /* four byte character */
2887        l = 4;
2888        d[3] = 0x80 | extract32(c, 0, 6);
2889        d[2] = 0x80 | extract32(c, 6, 6);
2890        d[1] = 0x80 | extract32(c, 12, 6);
2891        d[0] = 0xf0 | extract32(c, 18, 3);
2892    }
2893
2894    if (ilen < l) {
2895        return 1;
2896    }
2897    for (i = 0; i < l; ++i) {
2898        cpu_stb_data_ra(env, addr + i, d[i], ra);
2899    }
2900
2901    *olen = l;
2902    return -1;
2903}
2904
2905static int encode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2906                        uintptr_t ra, uint32_t c, uint32_t *olen)
2907{
2908    uint16_t d0, d1;
2909
2910    if (c <= 0xffff) {
2911        /* one word character */
2912        if (ilen < 2) {
2913            return 1;
2914        }
2915        cpu_stw_data_ra(env, addr, c, ra);
2916        *olen = 2;
2917    } else {
2918        /* two word character */
2919        if (ilen < 4) {
2920            return 1;
2921        }
2922        d1 = 0xdc00 | extract32(c, 0, 10);
2923        d0 = 0xd800 | extract32(c, 10, 6);
2924        d0 = deposit32(d0, 6, 4, extract32(c, 16, 5) - 1);
2925        cpu_stw_data_ra(env, addr + 0, d0, ra);
2926        cpu_stw_data_ra(env, addr + 2, d1, ra);
2927        *olen = 4;
2928    }
2929
2930    return -1;
2931}
2932
2933static int encode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2934                        uintptr_t ra, uint32_t c, uint32_t *olen)
2935{
2936    if (ilen < 4) {
2937        return 1;
2938    }
2939    cpu_stl_data_ra(env, addr, c, ra);
2940    *olen = 4;
2941    return -1;
2942}
2943
2944static inline uint32_t convert_unicode(CPUS390XState *env, uint32_t r1,
2945                                       uint32_t r2, uint32_t m3, uintptr_t ra,
2946                                       decode_unicode_fn decode,
2947                                       encode_unicode_fn encode)
2948{
2949    uint64_t dst = get_address(env, r1);
2950    uint64_t dlen = get_length(env, r1 + 1);
2951    uint64_t src = get_address(env, r2);
2952    uint64_t slen = get_length(env, r2 + 1);
2953    bool enh_check = m3 & 1;
2954    int cc, i;
2955
2956    /* Lest we fail to service interrupts in a timely manner, limit the
2957       amount of work we're willing to do.  For now, let's cap at 256.  */
2958    for (i = 0; i < 256; ++i) {
2959        uint32_t c, ilen, olen;
2960
2961        cc = decode(env, src, slen, enh_check, ra, &c, &ilen);
2962        if (unlikely(cc >= 0)) {
2963            break;
2964        }
2965        cc = encode(env, dst, dlen, ra, c, &olen);
2966        if (unlikely(cc >= 0)) {
2967            break;
2968        }
2969
2970        src += ilen;
2971        slen -= ilen;
2972        dst += olen;
2973        dlen -= olen;
2974        cc = 3;
2975    }
2976
2977    set_address(env, r1, dst);
2978    set_length(env, r1 + 1, dlen);
2979    set_address(env, r2, src);
2980    set_length(env, r2 + 1, slen);
2981
2982    return cc;
2983}
2984
2985uint32_t HELPER(cu12)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2986{
2987    return convert_unicode(env, r1, r2, m3, GETPC(),
2988                           decode_utf8, encode_utf16);
2989}
2990
2991uint32_t HELPER(cu14)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2992{
2993    return convert_unicode(env, r1, r2, m3, GETPC(),
2994                           decode_utf8, encode_utf32);
2995}
2996
2997uint32_t HELPER(cu21)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2998{
2999    return convert_unicode(env, r1, r2, m3, GETPC(),
3000                           decode_utf16, encode_utf8);
3001}
3002
3003uint32_t HELPER(cu24)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
3004{
3005    return convert_unicode(env, r1, r2, m3, GETPC(),
3006                           decode_utf16, encode_utf32);
3007}
3008
3009uint32_t HELPER(cu41)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
3010{
3011    return convert_unicode(env, r1, r2, m3, GETPC(),
3012                           decode_utf32, encode_utf8);
3013}
3014
3015uint32_t HELPER(cu42)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
3016{
3017    return convert_unicode(env, r1, r2, m3, GETPC(),
3018                           decode_utf32, encode_utf16);
3019}
3020
3021void probe_write_access(CPUS390XState *env, uint64_t addr, uint64_t len,
3022                        uintptr_t ra)
3023{
3024    /* test the actual access, not just any access to the page due to LAP */
3025    while (len) {
3026        const uint64_t pagelen = -(addr | TARGET_PAGE_MASK);
3027        const uint64_t curlen = MIN(pagelen, len);
3028
3029        probe_write(env, addr, curlen, cpu_mmu_index(env, false), ra);
3030        addr = wrap_address(env, addr + curlen);
3031        len -= curlen;
3032    }
3033}
3034
3035void HELPER(probe_write_access)(CPUS390XState *env, uint64_t addr, uint64_t len)
3036{
3037    probe_write_access(env, addr, len, GETPC());
3038}
3039