qemu/target/s390x/mem_helper.c
<<
>>
Prefs
   1/*
   2 *  S/390 memory access helper routines
   3 *
   4 *  Copyright (c) 2009 Ulrich Hecht
   5 *  Copyright (c) 2009 Alexander Graf
   6 *
   7 * This library is free software; you can redistribute it and/or
   8 * modify it under the terms of the GNU Lesser General Public
   9 * License as published by the Free Software Foundation; either
  10 * version 2.1 of the License, or (at your option) any later version.
  11 *
  12 * This library is distributed in the hope that it will be useful,
  13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 * Lesser General Public License for more details.
  16 *
  17 * You should have received a copy of the GNU Lesser General Public
  18 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  19 */
  20
  21#include "qemu/osdep.h"
  22#include "cpu.h"
  23#include "internal.h"
  24#include "tcg_s390x.h"
  25#include "exec/helper-proto.h"
  26#include "exec/exec-all.h"
  27#include "exec/cpu_ldst.h"
  28#include "qemu/int128.h"
  29#include "qemu/atomic128.h"
  30#include "tcg/tcg.h"
  31
  32#if !defined(CONFIG_USER_ONLY)
  33#include "hw/s390x/storage-keys.h"
  34#endif
  35
  36/*****************************************************************************/
  37/* Softmmu support */
  38
  39/* #define DEBUG_HELPER */
  40#ifdef DEBUG_HELPER
  41#define HELPER_LOG(x...) qemu_log(x)
  42#else
  43#define HELPER_LOG(x...)
  44#endif
  45
  46static inline bool psw_key_valid(CPUS390XState *env, uint8_t psw_key)
  47{
  48    uint16_t pkm = env->cregs[3] >> 16;
  49
  50    if (env->psw.mask & PSW_MASK_PSTATE) {
  51        /* PSW key has range 0..15, it is valid if the bit is 1 in the PKM */
  52        return pkm & (0x80 >> psw_key);
  53    }
  54    return true;
  55}
  56
  57static bool is_destructive_overlap(CPUS390XState *env, uint64_t dest,
  58                                   uint64_t src, uint32_t len)
  59{
  60    if (!len || src == dest) {
  61        return false;
  62    }
  63    /* Take care of wrapping at the end of address space. */
  64    if (unlikely(wrap_address(env, src + len - 1) < src)) {
  65        return dest > src || dest <= wrap_address(env, src + len - 1);
  66    }
  67    return dest > src && dest <= src + len - 1;
  68}
  69
  70/* Trigger a SPECIFICATION exception if an address or a length is not
  71   naturally aligned.  */
  72static inline void check_alignment(CPUS390XState *env, uint64_t v,
  73                                   int wordsize, uintptr_t ra)
  74{
  75    if (v % wordsize) {
  76        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
  77    }
  78}
  79
  80/* Load a value from memory according to its size.  */
  81static inline uint64_t cpu_ldusize_data_ra(CPUS390XState *env, uint64_t addr,
  82                                           int wordsize, uintptr_t ra)
  83{
  84    switch (wordsize) {
  85    case 1:
  86        return cpu_ldub_data_ra(env, addr, ra);
  87    case 2:
  88        return cpu_lduw_data_ra(env, addr, ra);
  89    default:
  90        abort();
  91    }
  92}
  93
  94/* Store a to memory according to its size.  */
  95static inline void cpu_stsize_data_ra(CPUS390XState *env, uint64_t addr,
  96                                      uint64_t value, int wordsize,
  97                                      uintptr_t ra)
  98{
  99    switch (wordsize) {
 100    case 1:
 101        cpu_stb_data_ra(env, addr, value, ra);
 102        break;
 103    case 2:
 104        cpu_stw_data_ra(env, addr, value, ra);
 105        break;
 106    default:
 107        abort();
 108    }
 109}
 110
 111/* An access covers at most 4096 bytes and therefore at most two pages. */
 112typedef struct S390Access {
 113    target_ulong vaddr1;
 114    target_ulong vaddr2;
 115    char *haddr1;
 116    char *haddr2;
 117    uint16_t size1;
 118    uint16_t size2;
 119    /*
 120     * If we can't access the host page directly, we'll have to do I/O access
 121     * via ld/st helpers. These are internal details, so we store the
 122     * mmu idx to do the access here instead of passing it around in the
 123     * helpers. Maybe, one day we can get rid of ld/st access - once we can
 124     * handle TLB_NOTDIRTY differently. We don't expect these special accesses
 125     * to trigger exceptions - only if we would have TLB_NOTDIRTY on LAP
 126     * pages, we might trigger a new MMU translation - very unlikely that
 127     * the mapping changes in between and we would trigger a fault.
 128     */
 129    int mmu_idx;
 130} S390Access;
 131
 132static S390Access access_prepare(CPUS390XState *env, vaddr vaddr, int size,
 133                                 MMUAccessType access_type, int mmu_idx,
 134                                 uintptr_t ra)
 135{
 136    S390Access access = {
 137        .vaddr1 = vaddr,
 138        .size1 = MIN(size, -(vaddr | TARGET_PAGE_MASK)),
 139        .mmu_idx = mmu_idx,
 140    };
 141
 142    g_assert(size > 0 && size <= 4096);
 143    access.haddr1 = probe_access(env, access.vaddr1, access.size1, access_type,
 144                                 mmu_idx, ra);
 145
 146    if (unlikely(access.size1 != size)) {
 147        /* The access crosses page boundaries. */
 148        access.vaddr2 = wrap_address(env, vaddr + access.size1);
 149        access.size2 = size - access.size1;
 150        access.haddr2 = probe_access(env, access.vaddr2, access.size2,
 151                                     access_type, mmu_idx, ra);
 152    }
 153    return access;
 154}
 155
 156/* Helper to handle memset on a single page. */
 157static void do_access_memset(CPUS390XState *env, vaddr vaddr, char *haddr,
 158                             uint8_t byte, uint16_t size, int mmu_idx,
 159                             uintptr_t ra)
 160{
 161#ifdef CONFIG_USER_ONLY
 162    g_assert(haddr);
 163    memset(haddr, byte, size);
 164#else
 165    TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
 166    int i;
 167
 168    if (likely(haddr)) {
 169        memset(haddr, byte, size);
 170    } else {
 171        /*
 172         * Do a single access and test if we can then get access to the
 173         * page. This is especially relevant to speed up TLB_NOTDIRTY.
 174         */
 175        g_assert(size > 0);
 176        helper_ret_stb_mmu(env, vaddr, byte, oi, ra);
 177        haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
 178        if (likely(haddr)) {
 179            memset(haddr + 1, byte, size - 1);
 180        } else {
 181            for (i = 1; i < size; i++) {
 182                helper_ret_stb_mmu(env, vaddr + i, byte, oi, ra);
 183            }
 184        }
 185    }
 186#endif
 187}
 188
 189static void access_memset(CPUS390XState *env, S390Access *desta,
 190                          uint8_t byte, uintptr_t ra)
 191{
 192
 193    do_access_memset(env, desta->vaddr1, desta->haddr1, byte, desta->size1,
 194                     desta->mmu_idx, ra);
 195    if (likely(!desta->size2)) {
 196        return;
 197    }
 198    do_access_memset(env, desta->vaddr2, desta->haddr2, byte, desta->size2,
 199                     desta->mmu_idx, ra);
 200}
 201
 202static uint8_t do_access_get_byte(CPUS390XState *env, vaddr vaddr, char **haddr,
 203                                  int offset, int mmu_idx, uintptr_t ra)
 204{
 205#ifdef CONFIG_USER_ONLY
 206    return ldub_p(*haddr + offset);
 207#else
 208    TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
 209    uint8_t byte;
 210
 211    if (likely(*haddr)) {
 212        return ldub_p(*haddr + offset);
 213    }
 214    /*
 215     * Do a single access and test if we can then get access to the
 216     * page. This is especially relevant to speed up TLB_NOTDIRTY.
 217     */
 218    byte = helper_ret_ldub_mmu(env, vaddr + offset, oi, ra);
 219    *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_LOAD, mmu_idx);
 220    return byte;
 221#endif
 222}
 223
 224static uint8_t access_get_byte(CPUS390XState *env, S390Access *access,
 225                               int offset, uintptr_t ra)
 226{
 227    if (offset < access->size1) {
 228        return do_access_get_byte(env, access->vaddr1, &access->haddr1,
 229                                  offset, access->mmu_idx, ra);
 230    }
 231    return do_access_get_byte(env, access->vaddr2, &access->haddr2,
 232                              offset - access->size1, access->mmu_idx, ra);
 233}
 234
 235static void do_access_set_byte(CPUS390XState *env, vaddr vaddr, char **haddr,
 236                               int offset, uint8_t byte, int mmu_idx,
 237                               uintptr_t ra)
 238{
 239#ifdef CONFIG_USER_ONLY
 240    stb_p(*haddr + offset, byte);
 241#else
 242    TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
 243
 244    if (likely(*haddr)) {
 245        stb_p(*haddr + offset, byte);
 246        return;
 247    }
 248    /*
 249     * Do a single access and test if we can then get access to the
 250     * page. This is especially relevant to speed up TLB_NOTDIRTY.
 251     */
 252    helper_ret_stb_mmu(env, vaddr + offset, byte, oi, ra);
 253    *haddr = tlb_vaddr_to_host(env, vaddr, MMU_DATA_STORE, mmu_idx);
 254#endif
 255}
 256
 257static void access_set_byte(CPUS390XState *env, S390Access *access,
 258                            int offset, uint8_t byte, uintptr_t ra)
 259{
 260    if (offset < access->size1) {
 261        do_access_set_byte(env, access->vaddr1, &access->haddr1, offset, byte,
 262                           access->mmu_idx, ra);
 263    } else {
 264        do_access_set_byte(env, access->vaddr2, &access->haddr2,
 265                           offset - access->size1, byte, access->mmu_idx, ra);
 266    }
 267}
 268
 269/*
 270 * Move data with the same semantics as memmove() in case ranges don't overlap
 271 * or src > dest. Undefined behavior on destructive overlaps.
 272 */
 273static void access_memmove(CPUS390XState *env, S390Access *desta,
 274                           S390Access *srca, uintptr_t ra)
 275{
 276    int diff;
 277
 278    g_assert(desta->size1 + desta->size2 == srca->size1 + srca->size2);
 279
 280    /* Fallback to slow access in case we don't have access to all host pages */
 281    if (unlikely(!desta->haddr1 || (desta->size2 && !desta->haddr2) ||
 282                 !srca->haddr1 || (srca->size2 && !srca->haddr2))) {
 283        int i;
 284
 285        for (i = 0; i < desta->size1 + desta->size2; i++) {
 286            uint8_t byte = access_get_byte(env, srca, i, ra);
 287
 288            access_set_byte(env, desta, i, byte, ra);
 289        }
 290        return;
 291    }
 292
 293    if (srca->size1 == desta->size1) {
 294        memmove(desta->haddr1, srca->haddr1, srca->size1);
 295        if (unlikely(srca->size2)) {
 296            memmove(desta->haddr2, srca->haddr2, srca->size2);
 297        }
 298    } else if (srca->size1 < desta->size1) {
 299        diff = desta->size1 - srca->size1;
 300        memmove(desta->haddr1, srca->haddr1, srca->size1);
 301        memmove(desta->haddr1 + srca->size1, srca->haddr2, diff);
 302        if (likely(desta->size2)) {
 303            memmove(desta->haddr2, srca->haddr2 + diff, desta->size2);
 304        }
 305    } else {
 306        diff = srca->size1 - desta->size1;
 307        memmove(desta->haddr1, srca->haddr1, desta->size1);
 308        memmove(desta->haddr2, srca->haddr1 + desta->size1, diff);
 309        if (likely(srca->size2)) {
 310            memmove(desta->haddr2 + diff, srca->haddr2, srca->size2);
 311        }
 312    }
 313}
 314
 315static int mmu_idx_from_as(uint8_t as)
 316{
 317    switch (as) {
 318    case AS_PRIMARY:
 319        return MMU_PRIMARY_IDX;
 320    case AS_SECONDARY:
 321        return MMU_SECONDARY_IDX;
 322    case AS_HOME:
 323        return MMU_HOME_IDX;
 324    default:
 325        /* FIXME AS_ACCREG */
 326        g_assert_not_reached();
 327    }
 328}
 329
 330/* and on array */
 331static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest,
 332                             uint64_t src, uintptr_t ra)
 333{
 334    const int mmu_idx = cpu_mmu_index(env, false);
 335    S390Access srca1, srca2, desta;
 336    uint32_t i;
 337    uint8_t c = 0;
 338
 339    HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
 340               __func__, l, dest, src);
 341
 342    /* NC always processes one more byte than specified - maximum is 256 */
 343    l++;
 344
 345    srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 346    srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
 347    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 348    for (i = 0; i < l; i++) {
 349        const uint8_t x = access_get_byte(env, &srca1, i, ra) &
 350                          access_get_byte(env, &srca2, i, ra);
 351
 352        c |= x;
 353        access_set_byte(env, &desta, i, x, ra);
 354    }
 355    return c != 0;
 356}
 357
 358uint32_t HELPER(nc)(CPUS390XState *env, uint32_t l, uint64_t dest,
 359                    uint64_t src)
 360{
 361    return do_helper_nc(env, l, dest, src, GETPC());
 362}
 363
 364/* xor on array */
 365static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest,
 366                             uint64_t src, uintptr_t ra)
 367{
 368    const int mmu_idx = cpu_mmu_index(env, false);
 369    S390Access srca1, srca2, desta;
 370    uint32_t i;
 371    uint8_t c = 0;
 372
 373    HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
 374               __func__, l, dest, src);
 375
 376    /* XC always processes one more byte than specified - maximum is 256 */
 377    l++;
 378
 379    srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 380    srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
 381    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 382
 383    /* xor with itself is the same as memset(0) */
 384    if (src == dest) {
 385        access_memset(env, &desta, 0, ra);
 386        return 0;
 387    }
 388
 389    for (i = 0; i < l; i++) {
 390        const uint8_t x = access_get_byte(env, &srca1, i, ra) ^
 391                          access_get_byte(env, &srca2, i, ra);
 392
 393        c |= x;
 394        access_set_byte(env, &desta, i, x, ra);
 395    }
 396    return c != 0;
 397}
 398
 399uint32_t HELPER(xc)(CPUS390XState *env, uint32_t l, uint64_t dest,
 400                    uint64_t src)
 401{
 402    return do_helper_xc(env, l, dest, src, GETPC());
 403}
 404
 405/* or on array */
 406static uint32_t do_helper_oc(CPUS390XState *env, uint32_t l, uint64_t dest,
 407                             uint64_t src, uintptr_t ra)
 408{
 409    const int mmu_idx = cpu_mmu_index(env, false);
 410    S390Access srca1, srca2, desta;
 411    uint32_t i;
 412    uint8_t c = 0;
 413
 414    HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
 415               __func__, l, dest, src);
 416
 417    /* OC always processes one more byte than specified - maximum is 256 */
 418    l++;
 419
 420    srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 421    srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
 422    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 423    for (i = 0; i < l; i++) {
 424        const uint8_t x = access_get_byte(env, &srca1, i, ra) |
 425                          access_get_byte(env, &srca2, i, ra);
 426
 427        c |= x;
 428        access_set_byte(env, &desta, i, x, ra);
 429    }
 430    return c != 0;
 431}
 432
 433uint32_t HELPER(oc)(CPUS390XState *env, uint32_t l, uint64_t dest,
 434                    uint64_t src)
 435{
 436    return do_helper_oc(env, l, dest, src, GETPC());
 437}
 438
 439/* memmove */
 440static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
 441                              uint64_t src, uintptr_t ra)
 442{
 443    const int mmu_idx = cpu_mmu_index(env, false);
 444    S390Access srca, desta;
 445    uint32_t i;
 446
 447    HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
 448               __func__, l, dest, src);
 449
 450    /* MVC always copies one more byte than specified - maximum is 256 */
 451    l++;
 452
 453    srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 454    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 455
 456    /*
 457     * "When the operands overlap, the result is obtained as if the operands
 458     * were processed one byte at a time". Only non-destructive overlaps
 459     * behave like memmove().
 460     */
 461    if (dest == src + 1) {
 462        access_memset(env, &desta, access_get_byte(env, &srca, 0, ra), ra);
 463    } else if (!is_destructive_overlap(env, dest, src, l)) {
 464        access_memmove(env, &desta, &srca, ra);
 465    } else {
 466        for (i = 0; i < l; i++) {
 467            uint8_t byte = access_get_byte(env, &srca, i, ra);
 468
 469            access_set_byte(env, &desta, i, byte, ra);
 470        }
 471    }
 472
 473    return env->cc_op;
 474}
 475
 476void HELPER(mvc)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 477{
 478    do_helper_mvc(env, l, dest, src, GETPC());
 479}
 480
 481/* move inverse  */
 482void HELPER(mvcin)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 483{
 484    const int mmu_idx = cpu_mmu_index(env, false);
 485    S390Access srca, desta;
 486    uintptr_t ra = GETPC();
 487    int i;
 488
 489    /* MVCIN always copies one more byte than specified - maximum is 256 */
 490    l++;
 491
 492    src = wrap_address(env, src - l + 1);
 493    srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 494    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 495    for (i = 0; i < l; i++) {
 496        const uint8_t x = access_get_byte(env, &srca, l - i - 1, ra);
 497
 498        access_set_byte(env, &desta, i, x, ra);
 499    }
 500}
 501
 502/* move numerics  */
 503void HELPER(mvn)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 504{
 505    const int mmu_idx = cpu_mmu_index(env, false);
 506    S390Access srca1, srca2, desta;
 507    uintptr_t ra = GETPC();
 508    int i;
 509
 510    /* MVN always copies one more byte than specified - maximum is 256 */
 511    l++;
 512
 513    srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 514    srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
 515    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 516    for (i = 0; i < l; i++) {
 517        const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0x0f) |
 518                          (access_get_byte(env, &srca2, i, ra) & 0xf0);
 519
 520        access_set_byte(env, &desta, i, x, ra);
 521    }
 522}
 523
 524/* move with offset  */
 525void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 526{
 527    const int mmu_idx = cpu_mmu_index(env, false);
 528    /* MVO always processes one more byte than specified - maximum is 16 */
 529    const int len_dest = (l >> 4) + 1;
 530    const int len_src = (l & 0xf) + 1;
 531    uintptr_t ra = GETPC();
 532    uint8_t byte_dest, byte_src;
 533    S390Access srca, desta;
 534    int i, j;
 535
 536    srca = access_prepare(env, src, len_src, MMU_DATA_LOAD, mmu_idx, ra);
 537    desta = access_prepare(env, dest, len_dest, MMU_DATA_STORE, mmu_idx, ra);
 538
 539    /* Handle rightmost byte */
 540    byte_dest = cpu_ldub_data_ra(env, dest + len_dest - 1, ra);
 541    byte_src = access_get_byte(env, &srca, len_src - 1, ra);
 542    byte_dest = (byte_dest & 0x0f) | (byte_src << 4);
 543    access_set_byte(env, &desta, len_dest - 1, byte_dest, ra);
 544
 545    /* Process remaining bytes from right to left */
 546    for (i = len_dest - 2, j = len_src - 2; i >= 0; i--, j--) {
 547        byte_dest = byte_src >> 4;
 548        if (j >= 0) {
 549            byte_src = access_get_byte(env, &srca, j, ra);
 550        } else {
 551            byte_src = 0;
 552        }
 553        byte_dest |= byte_src << 4;
 554        access_set_byte(env, &desta, i, byte_dest, ra);
 555    }
 556}
 557
 558/* move zones  */
 559void HELPER(mvz)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 560{
 561    const int mmu_idx = cpu_mmu_index(env, false);
 562    S390Access srca1, srca2, desta;
 563    uintptr_t ra = GETPC();
 564    int i;
 565
 566    /* MVZ always copies one more byte than specified - maximum is 256 */
 567    l++;
 568
 569    srca1 = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
 570    srca2 = access_prepare(env, dest, l, MMU_DATA_LOAD, mmu_idx, ra);
 571    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
 572    for (i = 0; i < l; i++) {
 573        const uint8_t x = (access_get_byte(env, &srca1, i, ra) & 0xf0) |
 574                          (access_get_byte(env, &srca2, i, ra) & 0x0f);
 575
 576        access_set_byte(env, &desta, i, x, ra);
 577    }
 578}
 579
 580/* compare unsigned byte arrays */
 581static uint32_t do_helper_clc(CPUS390XState *env, uint32_t l, uint64_t s1,
 582                              uint64_t s2, uintptr_t ra)
 583{
 584    uint32_t i;
 585    uint32_t cc = 0;
 586
 587    HELPER_LOG("%s l %d s1 %" PRIx64 " s2 %" PRIx64 "\n",
 588               __func__, l, s1, s2);
 589
 590    for (i = 0; i <= l; i++) {
 591        uint8_t x = cpu_ldub_data_ra(env, s1 + i, ra);
 592        uint8_t y = cpu_ldub_data_ra(env, s2 + i, ra);
 593        HELPER_LOG("%02x (%c)/%02x (%c) ", x, x, y, y);
 594        if (x < y) {
 595            cc = 1;
 596            break;
 597        } else if (x > y) {
 598            cc = 2;
 599            break;
 600        }
 601    }
 602
 603    HELPER_LOG("\n");
 604    return cc;
 605}
 606
 607uint32_t HELPER(clc)(CPUS390XState *env, uint32_t l, uint64_t s1, uint64_t s2)
 608{
 609    return do_helper_clc(env, l, s1, s2, GETPC());
 610}
 611
 612/* compare logical under mask */
 613uint32_t HELPER(clm)(CPUS390XState *env, uint32_t r1, uint32_t mask,
 614                     uint64_t addr)
 615{
 616    uintptr_t ra = GETPC();
 617    uint32_t cc = 0;
 618
 619    HELPER_LOG("%s: r1 0x%x mask 0x%x addr 0x%" PRIx64 "\n", __func__, r1,
 620               mask, addr);
 621
 622    while (mask) {
 623        if (mask & 8) {
 624            uint8_t d = cpu_ldub_data_ra(env, addr, ra);
 625            uint8_t r = extract32(r1, 24, 8);
 626            HELPER_LOG("mask 0x%x %02x/%02x (0x%" PRIx64 ") ", mask, r, d,
 627                       addr);
 628            if (r < d) {
 629                cc = 1;
 630                break;
 631            } else if (r > d) {
 632                cc = 2;
 633                break;
 634            }
 635            addr++;
 636        }
 637        mask = (mask << 1) & 0xf;
 638        r1 <<= 8;
 639    }
 640
 641    HELPER_LOG("\n");
 642    return cc;
 643}
 644
 645static inline uint64_t get_address(CPUS390XState *env, int reg)
 646{
 647    return wrap_address(env, env->regs[reg]);
 648}
 649
 650/*
 651 * Store the address to the given register, zeroing out unused leftmost
 652 * bits in bit positions 32-63 (24-bit and 31-bit mode only).
 653 */
 654static inline void set_address_zero(CPUS390XState *env, int reg,
 655                                    uint64_t address)
 656{
 657    if (env->psw.mask & PSW_MASK_64) {
 658        env->regs[reg] = address;
 659    } else {
 660        if (!(env->psw.mask & PSW_MASK_32)) {
 661            address &= 0x00ffffff;
 662        } else {
 663            address &= 0x7fffffff;
 664        }
 665        env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
 666    }
 667}
 668
 669static inline void set_address(CPUS390XState *env, int reg, uint64_t address)
 670{
 671    if (env->psw.mask & PSW_MASK_64) {
 672        /* 64-Bit mode */
 673        env->regs[reg] = address;
 674    } else {
 675        if (!(env->psw.mask & PSW_MASK_32)) {
 676            /* 24-Bit mode. According to the PoO it is implementation
 677            dependent if bits 32-39 remain unchanged or are set to
 678            zeros.  Choose the former so that the function can also be
 679            used for TRT.  */
 680            env->regs[reg] = deposit64(env->regs[reg], 0, 24, address);
 681        } else {
 682            /* 31-Bit mode. According to the PoO it is implementation
 683            dependent if bit 32 remains unchanged or is set to zero.
 684            Choose the latter so that the function can also be used for
 685            TRT.  */
 686            address &= 0x7fffffff;
 687            env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
 688        }
 689    }
 690}
 691
 692static inline uint64_t wrap_length32(CPUS390XState *env, uint64_t length)
 693{
 694    if (!(env->psw.mask & PSW_MASK_64)) {
 695        return (uint32_t)length;
 696    }
 697    return length;
 698}
 699
 700static inline uint64_t wrap_length31(CPUS390XState *env, uint64_t length)
 701{
 702    if (!(env->psw.mask & PSW_MASK_64)) {
 703        /* 24-Bit and 31-Bit mode */
 704        length &= 0x7fffffff;
 705    }
 706    return length;
 707}
 708
 709static inline uint64_t get_length(CPUS390XState *env, int reg)
 710{
 711    return wrap_length31(env, env->regs[reg]);
 712}
 713
 714static inline void set_length(CPUS390XState *env, int reg, uint64_t length)
 715{
 716    if (env->psw.mask & PSW_MASK_64) {
 717        /* 64-Bit mode */
 718        env->regs[reg] = length;
 719    } else {
 720        /* 24-Bit and 31-Bit mode */
 721        env->regs[reg] = deposit64(env->regs[reg], 0, 32, length);
 722    }
 723}
 724
 725/* search string (c is byte to search, r2 is string, r1 end of string) */
 726void HELPER(srst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
 727{
 728    uintptr_t ra = GETPC();
 729    uint64_t end, str;
 730    uint32_t len;
 731    uint8_t v, c = env->regs[0];
 732
 733    /* Bits 32-55 must contain all 0.  */
 734    if (env->regs[0] & 0xffffff00u) {
 735        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
 736    }
 737
 738    str = get_address(env, r2);
 739    end = get_address(env, r1);
 740
 741    /* Lest we fail to service interrupts in a timely manner, limit the
 742       amount of work we're willing to do.  For now, let's cap at 8k.  */
 743    for (len = 0; len < 0x2000; ++len) {
 744        if (str + len == end) {
 745            /* Character not found.  R1 & R2 are unmodified.  */
 746            env->cc_op = 2;
 747            return;
 748        }
 749        v = cpu_ldub_data_ra(env, str + len, ra);
 750        if (v == c) {
 751            /* Character found.  Set R1 to the location; R2 is unmodified.  */
 752            env->cc_op = 1;
 753            set_address(env, r1, str + len);
 754            return;
 755        }
 756    }
 757
 758    /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
 759    env->cc_op = 3;
 760    set_address(env, r2, str + len);
 761}
 762
 763void HELPER(srstu)(CPUS390XState *env, uint32_t r1, uint32_t r2)
 764{
 765    uintptr_t ra = GETPC();
 766    uint32_t len;
 767    uint16_t v, c = env->regs[0];
 768    uint64_t end, str, adj_end;
 769
 770    /* Bits 32-47 of R0 must be zero.  */
 771    if (env->regs[0] & 0xffff0000u) {
 772        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
 773    }
 774
 775    str = get_address(env, r2);
 776    end = get_address(env, r1);
 777
 778    /* If the LSB of the two addresses differ, use one extra byte.  */
 779    adj_end = end + ((str ^ end) & 1);
 780
 781    /* Lest we fail to service interrupts in a timely manner, limit the
 782       amount of work we're willing to do.  For now, let's cap at 8k.  */
 783    for (len = 0; len < 0x2000; len += 2) {
 784        if (str + len == adj_end) {
 785            /* End of input found.  */
 786            env->cc_op = 2;
 787            return;
 788        }
 789        v = cpu_lduw_data_ra(env, str + len, ra);
 790        if (v == c) {
 791            /* Character found.  Set R1 to the location; R2 is unmodified.  */
 792            env->cc_op = 1;
 793            set_address(env, r1, str + len);
 794            return;
 795        }
 796    }
 797
 798    /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
 799    env->cc_op = 3;
 800    set_address(env, r2, str + len);
 801}
 802
 803/* unsigned string compare (c is string terminator) */
 804uint64_t HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2)
 805{
 806    uintptr_t ra = GETPC();
 807    uint32_t len;
 808
 809    c = c & 0xff;
 810    s1 = wrap_address(env, s1);
 811    s2 = wrap_address(env, s2);
 812
 813    /* Lest we fail to service interrupts in a timely manner, limit the
 814       amount of work we're willing to do.  For now, let's cap at 8k.  */
 815    for (len = 0; len < 0x2000; ++len) {
 816        uint8_t v1 = cpu_ldub_data_ra(env, s1 + len, ra);
 817        uint8_t v2 = cpu_ldub_data_ra(env, s2 + len, ra);
 818        if (v1 == v2) {
 819            if (v1 == c) {
 820                /* Equal.  CC=0, and don't advance the registers.  */
 821                env->cc_op = 0;
 822                env->retxl = s2;
 823                return s1;
 824            }
 825        } else {
 826            /* Unequal.  CC={1,2}, and advance the registers.  Note that
 827               the terminator need not be zero, but the string that contains
 828               the terminator is by definition "low".  */
 829            env->cc_op = (v1 == c ? 1 : v2 == c ? 2 : v1 < v2 ? 1 : 2);
 830            env->retxl = s2 + len;
 831            return s1 + len;
 832        }
 833    }
 834
 835    /* CPU-determined bytes equal; advance the registers.  */
 836    env->cc_op = 3;
 837    env->retxl = s2 + len;
 838    return s1 + len;
 839}
 840
 841/* move page */
 842uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint64_t r1, uint64_t r2)
 843{
 844    const int mmu_idx = cpu_mmu_index(env, false);
 845    const bool f = extract64(r0, 11, 1);
 846    const bool s = extract64(r0, 10, 1);
 847    uintptr_t ra = GETPC();
 848    S390Access srca, desta;
 849
 850    if ((f && s) || extract64(r0, 12, 4)) {
 851        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, GETPC());
 852    }
 853
 854    r1 = wrap_address(env, r1 & TARGET_PAGE_MASK);
 855    r2 = wrap_address(env, r2 & TARGET_PAGE_MASK);
 856
 857    /*
 858     * TODO:
 859     * - Access key handling
 860     * - CC-option with surpression of page-translation exceptions
 861     * - Store r1/r2 register identifiers at real location 162
 862     */
 863    srca = access_prepare(env, r2, TARGET_PAGE_SIZE, MMU_DATA_LOAD, mmu_idx,
 864                          ra);
 865    desta = access_prepare(env, r1, TARGET_PAGE_SIZE, MMU_DATA_STORE, mmu_idx,
 866                           ra);
 867    access_memmove(env, &desta, &srca, ra);
 868    return 0; /* data moved */
 869}
 870
 871/* string copy */
 872uint32_t HELPER(mvst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
 873{
 874    const int mmu_idx = cpu_mmu_index(env, false);
 875    const uint64_t d = get_address(env, r1);
 876    const uint64_t s = get_address(env, r2);
 877    const uint8_t c = env->regs[0];
 878    const int len = MIN(-(d | TARGET_PAGE_MASK), -(s | TARGET_PAGE_MASK));
 879    S390Access srca, desta;
 880    uintptr_t ra = GETPC();
 881    int i;
 882
 883    if (env->regs[0] & 0xffffff00ull) {
 884        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
 885    }
 886
 887    /*
 888     * Our access should not exceed single pages, as we must not report access
 889     * exceptions exceeding the actually copied range (which we don't know at
 890     * this point). We might over-indicate watchpoints within the pages
 891     * (if we ever care, we have to limit processing to a single byte).
 892     */
 893    srca = access_prepare(env, s, len, MMU_DATA_LOAD, mmu_idx, ra);
 894    desta = access_prepare(env, d, len, MMU_DATA_STORE, mmu_idx, ra);
 895    for (i = 0; i < len; i++) {
 896        const uint8_t v = access_get_byte(env, &srca, i, ra);
 897
 898        access_set_byte(env, &desta, i, v, ra);
 899        if (v == c) {
 900            set_address_zero(env, r1, d + i);
 901            return 1;
 902        }
 903    }
 904    set_address_zero(env, r1, d + len);
 905    set_address_zero(env, r2, s + len);
 906    return 3;
 907}
 908
 909/* load access registers r1 to r3 from memory at a2 */
 910void HELPER(lam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
 911{
 912    uintptr_t ra = GETPC();
 913    int i;
 914
 915    if (a2 & 0x3) {
 916        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
 917    }
 918
 919    for (i = r1;; i = (i + 1) % 16) {
 920        env->aregs[i] = cpu_ldl_data_ra(env, a2, ra);
 921        a2 += 4;
 922
 923        if (i == r3) {
 924            break;
 925        }
 926    }
 927}
 928
 929/* store access registers r1 to r3 in memory at a2 */
 930void HELPER(stam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
 931{
 932    uintptr_t ra = GETPC();
 933    int i;
 934
 935    if (a2 & 0x3) {
 936        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
 937    }
 938
 939    for (i = r1;; i = (i + 1) % 16) {
 940        cpu_stl_data_ra(env, a2, env->aregs[i], ra);
 941        a2 += 4;
 942
 943        if (i == r3) {
 944            break;
 945        }
 946    }
 947}
 948
 949/* move long helper */
 950static inline uint32_t do_mvcl(CPUS390XState *env,
 951                               uint64_t *dest, uint64_t *destlen,
 952                               uint64_t *src, uint64_t *srclen,
 953                               uint16_t pad, int wordsize, uintptr_t ra)
 954{
 955    const int mmu_idx = cpu_mmu_index(env, false);
 956    int len = MIN(*destlen, -(*dest | TARGET_PAGE_MASK));
 957    S390Access srca, desta;
 958    int i, cc;
 959
 960    if (*destlen == *srclen) {
 961        cc = 0;
 962    } else if (*destlen < *srclen) {
 963        cc = 1;
 964    } else {
 965        cc = 2;
 966    }
 967
 968    if (!*destlen) {
 969        return cc;
 970    }
 971
 972    /*
 973     * Only perform one type of type of operation (move/pad) at a time.
 974     * Stay within single pages.
 975     */
 976    if (*srclen) {
 977        /* Copy the src array */
 978        len = MIN(MIN(*srclen, -(*src | TARGET_PAGE_MASK)), len);
 979        *destlen -= len;
 980        *srclen -= len;
 981        srca = access_prepare(env, *src, len, MMU_DATA_LOAD, mmu_idx, ra);
 982        desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
 983        access_memmove(env, &desta, &srca, ra);
 984        *src = wrap_address(env, *src + len);
 985        *dest = wrap_address(env, *dest + len);
 986    } else if (wordsize == 1) {
 987        /* Pad the remaining area */
 988        *destlen -= len;
 989        desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
 990        access_memset(env, &desta, pad, ra);
 991        *dest = wrap_address(env, *dest + len);
 992    } else {
 993        desta = access_prepare(env, *dest, len, MMU_DATA_STORE, mmu_idx, ra);
 994
 995        /* The remaining length selects the padding byte. */
 996        for (i = 0; i < len; (*destlen)--, i++) {
 997            if (*destlen & 1) {
 998                access_set_byte(env, &desta, i, pad, ra);
 999            } else {
1000                access_set_byte(env, &desta, i, pad >> 8, ra);
1001            }
1002        }
1003        *dest = wrap_address(env, *dest + len);
1004    }
1005
1006    return *destlen ? 3 : cc;
1007}
1008
1009/* move long */
1010uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1011{
1012    const int mmu_idx = cpu_mmu_index(env, false);
1013    uintptr_t ra = GETPC();
1014    uint64_t destlen = env->regs[r1 + 1] & 0xffffff;
1015    uint64_t dest = get_address(env, r1);
1016    uint64_t srclen = env->regs[r2 + 1] & 0xffffff;
1017    uint64_t src = get_address(env, r2);
1018    uint8_t pad = env->regs[r2 + 1] >> 24;
1019    CPUState *cs = env_cpu(env);
1020    S390Access srca, desta;
1021    uint32_t cc, cur_len;
1022
1023    if (is_destructive_overlap(env, dest, src, MIN(srclen, destlen))) {
1024        cc = 3;
1025    } else if (srclen == destlen) {
1026        cc = 0;
1027    } else if (destlen < srclen) {
1028        cc = 1;
1029    } else {
1030        cc = 2;
1031    }
1032
1033    /* We might have to zero-out some bits even if there was no action. */
1034    if (unlikely(!destlen || cc == 3)) {
1035        set_address_zero(env, r2, src);
1036        set_address_zero(env, r1, dest);
1037        return cc;
1038    } else if (!srclen) {
1039        set_address_zero(env, r2, src);
1040    }
1041
1042    /*
1043     * Only perform one type of type of operation (move/pad) in one step.
1044     * Stay within single pages.
1045     */
1046    while (destlen) {
1047        cur_len = MIN(destlen, -(dest | TARGET_PAGE_MASK));
1048        if (!srclen) {
1049            desta = access_prepare(env, dest, cur_len, MMU_DATA_STORE, mmu_idx,
1050                                   ra);
1051            access_memset(env, &desta, pad, ra);
1052        } else {
1053            cur_len = MIN(MIN(srclen, -(src | TARGET_PAGE_MASK)), cur_len);
1054
1055            srca = access_prepare(env, src, cur_len, MMU_DATA_LOAD, mmu_idx,
1056                                  ra);
1057            desta = access_prepare(env, dest, cur_len, MMU_DATA_STORE, mmu_idx,
1058                                   ra);
1059            access_memmove(env, &desta, &srca, ra);
1060            src = wrap_address(env, src + cur_len);
1061            srclen -= cur_len;
1062            env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen);
1063            set_address_zero(env, r2, src);
1064        }
1065        dest = wrap_address(env, dest + cur_len);
1066        destlen -= cur_len;
1067        env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, destlen);
1068        set_address_zero(env, r1, dest);
1069
1070        /*
1071         * MVCL is interruptible. Return to the main loop if requested after
1072         * writing back all state to registers. If no interrupt will get
1073         * injected, we'll end up back in this handler and continue processing
1074         * the remaining parts.
1075         */
1076        if (destlen && unlikely(cpu_loop_exit_requested(cs))) {
1077            cpu_loop_exit_restore(cs, ra);
1078        }
1079    }
1080    return cc;
1081}
1082
1083/* move long extended */
1084uint32_t HELPER(mvcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1085                       uint32_t r3)
1086{
1087    uintptr_t ra = GETPC();
1088    uint64_t destlen = get_length(env, r1 + 1);
1089    uint64_t dest = get_address(env, r1);
1090    uint64_t srclen = get_length(env, r3 + 1);
1091    uint64_t src = get_address(env, r3);
1092    uint8_t pad = a2;
1093    uint32_t cc;
1094
1095    cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 1, ra);
1096
1097    set_length(env, r1 + 1, destlen);
1098    set_length(env, r3 + 1, srclen);
1099    set_address(env, r1, dest);
1100    set_address(env, r3, src);
1101
1102    return cc;
1103}
1104
1105/* move long unicode */
1106uint32_t HELPER(mvclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1107                       uint32_t r3)
1108{
1109    uintptr_t ra = GETPC();
1110    uint64_t destlen = get_length(env, r1 + 1);
1111    uint64_t dest = get_address(env, r1);
1112    uint64_t srclen = get_length(env, r3 + 1);
1113    uint64_t src = get_address(env, r3);
1114    uint16_t pad = a2;
1115    uint32_t cc;
1116
1117    cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 2, ra);
1118
1119    set_length(env, r1 + 1, destlen);
1120    set_length(env, r3 + 1, srclen);
1121    set_address(env, r1, dest);
1122    set_address(env, r3, src);
1123
1124    return cc;
1125}
1126
1127/* compare logical long helper */
1128static inline uint32_t do_clcl(CPUS390XState *env,
1129                               uint64_t *src1, uint64_t *src1len,
1130                               uint64_t *src3, uint64_t *src3len,
1131                               uint16_t pad, uint64_t limit,
1132                               int wordsize, uintptr_t ra)
1133{
1134    uint64_t len = MAX(*src1len, *src3len);
1135    uint32_t cc = 0;
1136
1137    check_alignment(env, *src1len | *src3len, wordsize, ra);
1138
1139    if (!len) {
1140        return cc;
1141    }
1142
1143    /* Lest we fail to service interrupts in a timely manner, limit the
1144       amount of work we're willing to do.  */
1145    if (len > limit) {
1146        len = limit;
1147        cc = 3;
1148    }
1149
1150    for (; len; len -= wordsize) {
1151        uint16_t v1 = pad;
1152        uint16_t v3 = pad;
1153
1154        if (*src1len) {
1155            v1 = cpu_ldusize_data_ra(env, *src1, wordsize, ra);
1156        }
1157        if (*src3len) {
1158            v3 = cpu_ldusize_data_ra(env, *src3, wordsize, ra);
1159        }
1160
1161        if (v1 != v3) {
1162            cc = (v1 < v3) ? 1 : 2;
1163            break;
1164        }
1165
1166        if (*src1len) {
1167            *src1 += wordsize;
1168            *src1len -= wordsize;
1169        }
1170        if (*src3len) {
1171            *src3 += wordsize;
1172            *src3len -= wordsize;
1173        }
1174    }
1175
1176    return cc;
1177}
1178
1179
1180/* compare logical long */
1181uint32_t HELPER(clcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
1182{
1183    uintptr_t ra = GETPC();
1184    uint64_t src1len = extract64(env->regs[r1 + 1], 0, 24);
1185    uint64_t src1 = get_address(env, r1);
1186    uint64_t src3len = extract64(env->regs[r2 + 1], 0, 24);
1187    uint64_t src3 = get_address(env, r2);
1188    uint8_t pad = env->regs[r2 + 1] >> 24;
1189    uint32_t cc;
1190
1191    cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, -1, 1, ra);
1192
1193    env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, src1len);
1194    env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, src3len);
1195    set_address(env, r1, src1);
1196    set_address(env, r2, src3);
1197
1198    return cc;
1199}
1200
1201/* compare logical long extended memcompare insn with padding */
1202uint32_t HELPER(clcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1203                       uint32_t r3)
1204{
1205    uintptr_t ra = GETPC();
1206    uint64_t src1len = get_length(env, r1 + 1);
1207    uint64_t src1 = get_address(env, r1);
1208    uint64_t src3len = get_length(env, r3 + 1);
1209    uint64_t src3 = get_address(env, r3);
1210    uint8_t pad = a2;
1211    uint32_t cc;
1212
1213    cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x2000, 1, ra);
1214
1215    set_length(env, r1 + 1, src1len);
1216    set_length(env, r3 + 1, src3len);
1217    set_address(env, r1, src1);
1218    set_address(env, r3, src3);
1219
1220    return cc;
1221}
1222
1223/* compare logical long unicode memcompare insn with padding */
1224uint32_t HELPER(clclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
1225                       uint32_t r3)
1226{
1227    uintptr_t ra = GETPC();
1228    uint64_t src1len = get_length(env, r1 + 1);
1229    uint64_t src1 = get_address(env, r1);
1230    uint64_t src3len = get_length(env, r3 + 1);
1231    uint64_t src3 = get_address(env, r3);
1232    uint16_t pad = a2;
1233    uint32_t cc = 0;
1234
1235    cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x1000, 2, ra);
1236
1237    set_length(env, r1 + 1, src1len);
1238    set_length(env, r3 + 1, src3len);
1239    set_address(env, r1, src1);
1240    set_address(env, r3, src3);
1241
1242    return cc;
1243}
1244
1245/* checksum */
1246uint64_t HELPER(cksm)(CPUS390XState *env, uint64_t r1,
1247                      uint64_t src, uint64_t src_len)
1248{
1249    uintptr_t ra = GETPC();
1250    uint64_t max_len, len;
1251    uint64_t cksm = (uint32_t)r1;
1252
1253    /* Lest we fail to service interrupts in a timely manner, limit the
1254       amount of work we're willing to do.  For now, let's cap at 8k.  */
1255    max_len = (src_len > 0x2000 ? 0x2000 : src_len);
1256
1257    /* Process full words as available.  */
1258    for (len = 0; len + 4 <= max_len; len += 4, src += 4) {
1259        cksm += (uint32_t)cpu_ldl_data_ra(env, src, ra);
1260    }
1261
1262    switch (max_len - len) {
1263    case 1:
1264        cksm += cpu_ldub_data_ra(env, src, ra) << 24;
1265        len += 1;
1266        break;
1267    case 2:
1268        cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1269        len += 2;
1270        break;
1271    case 3:
1272        cksm += cpu_lduw_data_ra(env, src, ra) << 16;
1273        cksm += cpu_ldub_data_ra(env, src + 2, ra) << 8;
1274        len += 3;
1275        break;
1276    }
1277
1278    /* Fold the carry from the checksum.  Note that we can see carry-out
1279       during folding more than once (but probably not more than twice).  */
1280    while (cksm > 0xffffffffull) {
1281        cksm = (uint32_t)cksm + (cksm >> 32);
1282    }
1283
1284    /* Indicate whether or not we've processed everything.  */
1285    env->cc_op = (len == src_len ? 0 : 3);
1286
1287    /* Return both cksm and processed length.  */
1288    env->retxl = cksm;
1289    return len;
1290}
1291
1292void HELPER(pack)(CPUS390XState *env, uint32_t len, uint64_t dest, uint64_t src)
1293{
1294    uintptr_t ra = GETPC();
1295    int len_dest = len >> 4;
1296    int len_src = len & 0xf;
1297    uint8_t b;
1298
1299    dest += len_dest;
1300    src += len_src;
1301
1302    /* last byte is special, it only flips the nibbles */
1303    b = cpu_ldub_data_ra(env, src, ra);
1304    cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1305    src--;
1306    len_src--;
1307
1308    /* now pack every value */
1309    while (len_dest > 0) {
1310        b = 0;
1311
1312        if (len_src >= 0) {
1313            b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1314            src--;
1315            len_src--;
1316        }
1317        if (len_src >= 0) {
1318            b |= cpu_ldub_data_ra(env, src, ra) << 4;
1319            src--;
1320            len_src--;
1321        }
1322
1323        len_dest--;
1324        dest--;
1325        cpu_stb_data_ra(env, dest, b, ra);
1326    }
1327}
1328
1329static inline void do_pkau(CPUS390XState *env, uint64_t dest, uint64_t src,
1330                           uint32_t srclen, int ssize, uintptr_t ra)
1331{
1332    int i;
1333    /* The destination operand is always 16 bytes long.  */
1334    const int destlen = 16;
1335
1336    /* The operands are processed from right to left.  */
1337    src += srclen - 1;
1338    dest += destlen - 1;
1339
1340    for (i = 0; i < destlen; i++) {
1341        uint8_t b = 0;
1342
1343        /* Start with a positive sign */
1344        if (i == 0) {
1345            b = 0xc;
1346        } else if (srclen > ssize) {
1347            b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1348            src -= ssize;
1349            srclen -= ssize;
1350        }
1351
1352        if (srclen > ssize) {
1353            b |= cpu_ldub_data_ra(env, src, ra) << 4;
1354            src -= ssize;
1355            srclen -= ssize;
1356        }
1357
1358        cpu_stb_data_ra(env, dest, b, ra);
1359        dest--;
1360    }
1361}
1362
1363
1364void HELPER(pka)(CPUS390XState *env, uint64_t dest, uint64_t src,
1365                 uint32_t srclen)
1366{
1367    do_pkau(env, dest, src, srclen, 1, GETPC());
1368}
1369
1370void HELPER(pku)(CPUS390XState *env, uint64_t dest, uint64_t src,
1371                 uint32_t srclen)
1372{
1373    do_pkau(env, dest, src, srclen, 2, GETPC());
1374}
1375
1376void HELPER(unpk)(CPUS390XState *env, uint32_t len, uint64_t dest,
1377                  uint64_t src)
1378{
1379    uintptr_t ra = GETPC();
1380    int len_dest = len >> 4;
1381    int len_src = len & 0xf;
1382    uint8_t b;
1383    int second_nibble = 0;
1384
1385    dest += len_dest;
1386    src += len_src;
1387
1388    /* last byte is special, it only flips the nibbles */
1389    b = cpu_ldub_data_ra(env, src, ra);
1390    cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1391    src--;
1392    len_src--;
1393
1394    /* now pad every nibble with 0xf0 */
1395
1396    while (len_dest > 0) {
1397        uint8_t cur_byte = 0;
1398
1399        if (len_src > 0) {
1400            cur_byte = cpu_ldub_data_ra(env, src, ra);
1401        }
1402
1403        len_dest--;
1404        dest--;
1405
1406        /* only advance one nibble at a time */
1407        if (second_nibble) {
1408            cur_byte >>= 4;
1409            len_src--;
1410            src--;
1411        }
1412        second_nibble = !second_nibble;
1413
1414        /* digit */
1415        cur_byte = (cur_byte & 0xf);
1416        /* zone bits */
1417        cur_byte |= 0xf0;
1418
1419        cpu_stb_data_ra(env, dest, cur_byte, ra);
1420    }
1421}
1422
1423static inline uint32_t do_unpkau(CPUS390XState *env, uint64_t dest,
1424                                 uint32_t destlen, int dsize, uint64_t src,
1425                                 uintptr_t ra)
1426{
1427    int i;
1428    uint32_t cc;
1429    uint8_t b;
1430    /* The source operand is always 16 bytes long.  */
1431    const int srclen = 16;
1432
1433    /* The operands are processed from right to left.  */
1434    src += srclen - 1;
1435    dest += destlen - dsize;
1436
1437    /* Check for the sign.  */
1438    b = cpu_ldub_data_ra(env, src, ra);
1439    src--;
1440    switch (b & 0xf) {
1441    case 0xa:
1442    case 0xc:
1443    case 0xe ... 0xf:
1444        cc = 0;  /* plus */
1445        break;
1446    case 0xb:
1447    case 0xd:
1448        cc = 1;  /* minus */
1449        break;
1450    default:
1451    case 0x0 ... 0x9:
1452        cc = 3;  /* invalid */
1453        break;
1454    }
1455
1456    /* Now pad every nibble with 0x30, advancing one nibble at a time. */
1457    for (i = 0; i < destlen; i += dsize) {
1458        if (i == (31 * dsize)) {
1459            /* If length is 32/64 bytes, the leftmost byte is 0. */
1460            b = 0;
1461        } else if (i % (2 * dsize)) {
1462            b = cpu_ldub_data_ra(env, src, ra);
1463            src--;
1464        } else {
1465            b >>= 4;
1466        }
1467        cpu_stsize_data_ra(env, dest, 0x30 + (b & 0xf), dsize, ra);
1468        dest -= dsize;
1469    }
1470
1471    return cc;
1472}
1473
1474uint32_t HELPER(unpka)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1475                       uint64_t src)
1476{
1477    return do_unpkau(env, dest, destlen, 1, src, GETPC());
1478}
1479
1480uint32_t HELPER(unpku)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1481                       uint64_t src)
1482{
1483    return do_unpkau(env, dest, destlen, 2, src, GETPC());
1484}
1485
1486uint32_t HELPER(tp)(CPUS390XState *env, uint64_t dest, uint32_t destlen)
1487{
1488    uintptr_t ra = GETPC();
1489    uint32_t cc = 0;
1490    int i;
1491
1492    for (i = 0; i < destlen; i++) {
1493        uint8_t b = cpu_ldub_data_ra(env, dest + i, ra);
1494        /* digit */
1495        cc |= (b & 0xf0) > 0x90 ? 2 : 0;
1496
1497        if (i == (destlen - 1)) {
1498            /* sign */
1499            cc |= (b & 0xf) < 0xa ? 1 : 0;
1500        } else {
1501            /* digit */
1502            cc |= (b & 0xf) > 0x9 ? 2 : 0;
1503        }
1504    }
1505
1506    return cc;
1507}
1508
1509static uint32_t do_helper_tr(CPUS390XState *env, uint32_t len, uint64_t array,
1510                             uint64_t trans, uintptr_t ra)
1511{
1512    uint32_t i;
1513
1514    for (i = 0; i <= len; i++) {
1515        uint8_t byte = cpu_ldub_data_ra(env, array + i, ra);
1516        uint8_t new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1517        cpu_stb_data_ra(env, array + i, new_byte, ra);
1518    }
1519
1520    return env->cc_op;
1521}
1522
1523void HELPER(tr)(CPUS390XState *env, uint32_t len, uint64_t array,
1524                uint64_t trans)
1525{
1526    do_helper_tr(env, len, array, trans, GETPC());
1527}
1528
1529uint64_t HELPER(tre)(CPUS390XState *env, uint64_t array,
1530                     uint64_t len, uint64_t trans)
1531{
1532    uintptr_t ra = GETPC();
1533    uint8_t end = env->regs[0] & 0xff;
1534    uint64_t l = len;
1535    uint64_t i;
1536    uint32_t cc = 0;
1537
1538    if (!(env->psw.mask & PSW_MASK_64)) {
1539        array &= 0x7fffffff;
1540        l = (uint32_t)l;
1541    }
1542
1543    /* Lest we fail to service interrupts in a timely manner, limit the
1544       amount of work we're willing to do.  For now, let's cap at 8k.  */
1545    if (l > 0x2000) {
1546        l = 0x2000;
1547        cc = 3;
1548    }
1549
1550    for (i = 0; i < l; i++) {
1551        uint8_t byte, new_byte;
1552
1553        byte = cpu_ldub_data_ra(env, array + i, ra);
1554
1555        if (byte == end) {
1556            cc = 1;
1557            break;
1558        }
1559
1560        new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1561        cpu_stb_data_ra(env, array + i, new_byte, ra);
1562    }
1563
1564    env->cc_op = cc;
1565    env->retxl = len - i;
1566    return array + i;
1567}
1568
1569static inline uint32_t do_helper_trt(CPUS390XState *env, int len,
1570                                     uint64_t array, uint64_t trans,
1571                                     int inc, uintptr_t ra)
1572{
1573    int i;
1574
1575    for (i = 0; i <= len; i++) {
1576        uint8_t byte = cpu_ldub_data_ra(env, array + i * inc, ra);
1577        uint8_t sbyte = cpu_ldub_data_ra(env, trans + byte, ra);
1578
1579        if (sbyte != 0) {
1580            set_address(env, 1, array + i * inc);
1581            env->regs[2] = deposit64(env->regs[2], 0, 8, sbyte);
1582            return (i == len) ? 2 : 1;
1583        }
1584    }
1585
1586    return 0;
1587}
1588
1589static uint32_t do_helper_trt_fwd(CPUS390XState *env, uint32_t len,
1590                                  uint64_t array, uint64_t trans,
1591                                  uintptr_t ra)
1592{
1593    return do_helper_trt(env, len, array, trans, 1, ra);
1594}
1595
1596uint32_t HELPER(trt)(CPUS390XState *env, uint32_t len, uint64_t array,
1597                     uint64_t trans)
1598{
1599    return do_helper_trt(env, len, array, trans, 1, GETPC());
1600}
1601
1602static uint32_t do_helper_trt_bkwd(CPUS390XState *env, uint32_t len,
1603                                   uint64_t array, uint64_t trans,
1604                                   uintptr_t ra)
1605{
1606    return do_helper_trt(env, len, array, trans, -1, ra);
1607}
1608
1609uint32_t HELPER(trtr)(CPUS390XState *env, uint32_t len, uint64_t array,
1610                      uint64_t trans)
1611{
1612    return do_helper_trt(env, len, array, trans, -1, GETPC());
1613}
1614
1615/* Translate one/two to one/two */
1616uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
1617                      uint32_t tst, uint32_t sizes)
1618{
1619    uintptr_t ra = GETPC();
1620    int dsize = (sizes & 1) ? 1 : 2;
1621    int ssize = (sizes & 2) ? 1 : 2;
1622    uint64_t tbl = get_address(env, 1);
1623    uint64_t dst = get_address(env, r1);
1624    uint64_t len = get_length(env, r1 + 1);
1625    uint64_t src = get_address(env, r2);
1626    uint32_t cc = 3;
1627    int i;
1628
1629    /* The lower address bits of TBL are ignored.  For TROO, TROT, it's
1630       the low 3 bits (double-word aligned).  For TRTO, TRTT, it's either
1631       the low 12 bits (4K, without ETF2-ENH) or 3 bits (with ETF2-ENH).  */
1632    if (ssize == 2 && !s390_has_feat(S390_FEAT_ETF2_ENH)) {
1633        tbl &= -4096;
1634    } else {
1635        tbl &= -8;
1636    }
1637
1638    check_alignment(env, len, ssize, ra);
1639
1640    /* Lest we fail to service interrupts in a timely manner, */
1641    /* limit the amount of work we're willing to do.   */
1642    for (i = 0; i < 0x2000; i++) {
1643        uint16_t sval = cpu_ldusize_data_ra(env, src, ssize, ra);
1644        uint64_t tble = tbl + (sval * dsize);
1645        uint16_t dval = cpu_ldusize_data_ra(env, tble, dsize, ra);
1646        if (dval == tst) {
1647            cc = 1;
1648            break;
1649        }
1650        cpu_stsize_data_ra(env, dst, dval, dsize, ra);
1651
1652        len -= ssize;
1653        src += ssize;
1654        dst += dsize;
1655
1656        if (len == 0) {
1657            cc = 0;
1658            break;
1659        }
1660    }
1661
1662    set_address(env, r1, dst);
1663    set_length(env, r1 + 1, len);
1664    set_address(env, r2, src);
1665
1666    return cc;
1667}
1668
1669void HELPER(cdsg)(CPUS390XState *env, uint64_t addr,
1670                  uint32_t r1, uint32_t r3)
1671{
1672    uintptr_t ra = GETPC();
1673    Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
1674    Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1675    Int128 oldv;
1676    uint64_t oldh, oldl;
1677    bool fail;
1678
1679    check_alignment(env, addr, 16, ra);
1680
1681    oldh = cpu_ldq_data_ra(env, addr + 0, ra);
1682    oldl = cpu_ldq_data_ra(env, addr + 8, ra);
1683
1684    oldv = int128_make128(oldl, oldh);
1685    fail = !int128_eq(oldv, cmpv);
1686    if (fail) {
1687        newv = oldv;
1688    }
1689
1690    cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra);
1691    cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra);
1692
1693    env->cc_op = fail;
1694    env->regs[r1] = int128_gethi(oldv);
1695    env->regs[r1 + 1] = int128_getlo(oldv);
1696}
1697
1698void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr,
1699                           uint32_t r1, uint32_t r3)
1700{
1701    uintptr_t ra = GETPC();
1702    Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
1703    Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1704    int mem_idx;
1705    TCGMemOpIdx oi;
1706    Int128 oldv;
1707    bool fail;
1708
1709    assert(HAVE_CMPXCHG128);
1710
1711    mem_idx = cpu_mmu_index(env, false);
1712    oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
1713    oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
1714    fail = !int128_eq(oldv, cmpv);
1715
1716    env->cc_op = fail;
1717    env->regs[r1] = int128_gethi(oldv);
1718    env->regs[r1 + 1] = int128_getlo(oldv);
1719}
1720
1721static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
1722                        uint64_t a2, bool parallel)
1723{
1724    uint32_t mem_idx = cpu_mmu_index(env, false);
1725    uintptr_t ra = GETPC();
1726    uint32_t fc = extract32(env->regs[0], 0, 8);
1727    uint32_t sc = extract32(env->regs[0], 8, 8);
1728    uint64_t pl = get_address(env, 1) & -16;
1729    uint64_t svh, svl;
1730    uint32_t cc;
1731
1732    /* Sanity check the function code and storage characteristic.  */
1733    if (fc > 1 || sc > 3) {
1734        if (!s390_has_feat(S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2)) {
1735            goto spec_exception;
1736        }
1737        if (fc > 2 || sc > 4 || (fc == 2 && (r3 & 1))) {
1738            goto spec_exception;
1739        }
1740    }
1741
1742    /* Sanity check the alignments.  */
1743    if (extract32(a1, 0, fc + 2) || extract32(a2, 0, sc)) {
1744        goto spec_exception;
1745    }
1746
1747    /* Sanity check writability of the store address.  */
1748    probe_write(env, a2, 1 << sc, mem_idx, ra);
1749
1750    /*
1751     * Note that the compare-and-swap is atomic, and the store is atomic,
1752     * but the complete operation is not.  Therefore we do not need to
1753     * assert serial context in order to implement this.  That said,
1754     * restart early if we can't support either operation that is supposed
1755     * to be atomic.
1756     */
1757    if (parallel) {
1758        uint32_t max = 2;
1759#ifdef CONFIG_ATOMIC64
1760        max = 3;
1761#endif
1762        if ((HAVE_CMPXCHG128 ? 0 : fc + 2 > max) ||
1763            (HAVE_ATOMIC128  ? 0 : sc > max)) {
1764            cpu_loop_exit_atomic(env_cpu(env), ra);
1765        }
1766    }
1767
1768    /* All loads happen before all stores.  For simplicity, load the entire
1769       store value area from the parameter list.  */
1770    svh = cpu_ldq_data_ra(env, pl + 16, ra);
1771    svl = cpu_ldq_data_ra(env, pl + 24, ra);
1772
1773    switch (fc) {
1774    case 0:
1775        {
1776            uint32_t nv = cpu_ldl_data_ra(env, pl, ra);
1777            uint32_t cv = env->regs[r3];
1778            uint32_t ov;
1779
1780            if (parallel) {
1781#ifdef CONFIG_USER_ONLY
1782                uint32_t *haddr = g2h(a1);
1783                ov = atomic_cmpxchg__nocheck(haddr, cv, nv);
1784#else
1785                TCGMemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mem_idx);
1786                ov = helper_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi, ra);
1787#endif
1788            } else {
1789                ov = cpu_ldl_data_ra(env, a1, ra);
1790                cpu_stl_data_ra(env, a1, (ov == cv ? nv : ov), ra);
1791            }
1792            cc = (ov != cv);
1793            env->regs[r3] = deposit64(env->regs[r3], 32, 32, ov);
1794        }
1795        break;
1796
1797    case 1:
1798        {
1799            uint64_t nv = cpu_ldq_data_ra(env, pl, ra);
1800            uint64_t cv = env->regs[r3];
1801            uint64_t ov;
1802
1803            if (parallel) {
1804#ifdef CONFIG_ATOMIC64
1805# ifdef CONFIG_USER_ONLY
1806                uint64_t *haddr = g2h(a1);
1807                ov = atomic_cmpxchg__nocheck(haddr, cv, nv);
1808# else
1809                TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN, mem_idx);
1810                ov = helper_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi, ra);
1811# endif
1812#else
1813                /* Note that we asserted !parallel above.  */
1814                g_assert_not_reached();
1815#endif
1816            } else {
1817                ov = cpu_ldq_data_ra(env, a1, ra);
1818                cpu_stq_data_ra(env, a1, (ov == cv ? nv : ov), ra);
1819            }
1820            cc = (ov != cv);
1821            env->regs[r3] = ov;
1822        }
1823        break;
1824
1825    case 2:
1826        {
1827            uint64_t nvh = cpu_ldq_data_ra(env, pl, ra);
1828            uint64_t nvl = cpu_ldq_data_ra(env, pl + 8, ra);
1829            Int128 nv = int128_make128(nvl, nvh);
1830            Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1831            Int128 ov;
1832
1833            if (!parallel) {
1834                uint64_t oh = cpu_ldq_data_ra(env, a1 + 0, ra);
1835                uint64_t ol = cpu_ldq_data_ra(env, a1 + 8, ra);
1836
1837                ov = int128_make128(ol, oh);
1838                cc = !int128_eq(ov, cv);
1839                if (cc) {
1840                    nv = ov;
1841                }
1842
1843                cpu_stq_data_ra(env, a1 + 0, int128_gethi(nv), ra);
1844                cpu_stq_data_ra(env, a1 + 8, int128_getlo(nv), ra);
1845            } else if (HAVE_CMPXCHG128) {
1846                TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
1847                ov = helper_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra);
1848                cc = !int128_eq(ov, cv);
1849            } else {
1850                /* Note that we asserted !parallel above.  */
1851                g_assert_not_reached();
1852            }
1853
1854            env->regs[r3 + 0] = int128_gethi(ov);
1855            env->regs[r3 + 1] = int128_getlo(ov);
1856        }
1857        break;
1858
1859    default:
1860        g_assert_not_reached();
1861    }
1862
1863    /* Store only if the comparison succeeded.  Note that above we use a pair
1864       of 64-bit big-endian loads, so for sc < 3 we must extract the value
1865       from the most-significant bits of svh.  */
1866    if (cc == 0) {
1867        switch (sc) {
1868        case 0:
1869            cpu_stb_data_ra(env, a2, svh >> 56, ra);
1870            break;
1871        case 1:
1872            cpu_stw_data_ra(env, a2, svh >> 48, ra);
1873            break;
1874        case 2:
1875            cpu_stl_data_ra(env, a2, svh >> 32, ra);
1876            break;
1877        case 3:
1878            cpu_stq_data_ra(env, a2, svh, ra);
1879            break;
1880        case 4:
1881            if (!parallel) {
1882                cpu_stq_data_ra(env, a2 + 0, svh, ra);
1883                cpu_stq_data_ra(env, a2 + 8, svl, ra);
1884            } else if (HAVE_ATOMIC128) {
1885                TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
1886                Int128 sv = int128_make128(svl, svh);
1887                helper_atomic_sto_be_mmu(env, a2, sv, oi, ra);
1888            } else {
1889                /* Note that we asserted !parallel above.  */
1890                g_assert_not_reached();
1891            }
1892            break;
1893        default:
1894            g_assert_not_reached();
1895        }
1896    }
1897
1898    return cc;
1899
1900 spec_exception:
1901    tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1902}
1903
1904uint32_t HELPER(csst)(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t a2)
1905{
1906    return do_csst(env, r3, a1, a2, false);
1907}
1908
1909uint32_t HELPER(csst_parallel)(CPUS390XState *env, uint32_t r3, uint64_t a1,
1910                               uint64_t a2)
1911{
1912    return do_csst(env, r3, a1, a2, true);
1913}
1914
1915#if !defined(CONFIG_USER_ONLY)
1916void HELPER(lctlg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1917{
1918    uintptr_t ra = GETPC();
1919    bool PERchanged = false;
1920    uint64_t src = a2;
1921    uint32_t i;
1922
1923    if (src & 0x7) {
1924        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1925    }
1926
1927    for (i = r1;; i = (i + 1) % 16) {
1928        uint64_t val = cpu_ldq_data_ra(env, src, ra);
1929        if (env->cregs[i] != val && i >= 9 && i <= 11) {
1930            PERchanged = true;
1931        }
1932        env->cregs[i] = val;
1933        HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%" PRIx64 "\n",
1934                   i, src, val);
1935        src += sizeof(uint64_t);
1936
1937        if (i == r3) {
1938            break;
1939        }
1940    }
1941
1942    if (PERchanged && env->psw.mask & PSW_MASK_PER) {
1943        s390_cpu_recompute_watchpoints(env_cpu(env));
1944    }
1945
1946    tlb_flush(env_cpu(env));
1947}
1948
1949void HELPER(lctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1950{
1951    uintptr_t ra = GETPC();
1952    bool PERchanged = false;
1953    uint64_t src = a2;
1954    uint32_t i;
1955
1956    if (src & 0x3) {
1957        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1958    }
1959
1960    for (i = r1;; i = (i + 1) % 16) {
1961        uint32_t val = cpu_ldl_data_ra(env, src, ra);
1962        if ((uint32_t)env->cregs[i] != val && i >= 9 && i <= 11) {
1963            PERchanged = true;
1964        }
1965        env->cregs[i] = deposit64(env->cregs[i], 0, 32, val);
1966        HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%x\n", i, src, val);
1967        src += sizeof(uint32_t);
1968
1969        if (i == r3) {
1970            break;
1971        }
1972    }
1973
1974    if (PERchanged && env->psw.mask & PSW_MASK_PER) {
1975        s390_cpu_recompute_watchpoints(env_cpu(env));
1976    }
1977
1978    tlb_flush(env_cpu(env));
1979}
1980
1981void HELPER(stctg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1982{
1983    uintptr_t ra = GETPC();
1984    uint64_t dest = a2;
1985    uint32_t i;
1986
1987    if (dest & 0x7) {
1988        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
1989    }
1990
1991    for (i = r1;; i = (i + 1) % 16) {
1992        cpu_stq_data_ra(env, dest, env->cregs[i], ra);
1993        dest += sizeof(uint64_t);
1994
1995        if (i == r3) {
1996            break;
1997        }
1998    }
1999}
2000
2001void HELPER(stctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
2002{
2003    uintptr_t ra = GETPC();
2004    uint64_t dest = a2;
2005    uint32_t i;
2006
2007    if (dest & 0x3) {
2008        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2009    }
2010
2011    for (i = r1;; i = (i + 1) % 16) {
2012        cpu_stl_data_ra(env, dest, env->cregs[i], ra);
2013        dest += sizeof(uint32_t);
2014
2015        if (i == r3) {
2016            break;
2017        }
2018    }
2019}
2020
2021uint32_t HELPER(testblock)(CPUS390XState *env, uint64_t real_addr)
2022{
2023    uintptr_t ra = GETPC();
2024    int i;
2025
2026    real_addr = wrap_address(env, real_addr) & TARGET_PAGE_MASK;
2027
2028    for (i = 0; i < TARGET_PAGE_SIZE; i += 8) {
2029        cpu_stq_mmuidx_ra(env, real_addr + i, 0, MMU_REAL_IDX, ra);
2030    }
2031
2032    return 0;
2033}
2034
2035uint32_t HELPER(tprot)(CPUS390XState *env, uint64_t a1, uint64_t a2)
2036{
2037    S390CPU *cpu = env_archcpu(env);
2038    CPUState *cs = env_cpu(env);
2039
2040    /*
2041     * TODO: we currently don't handle all access protection types
2042     * (including access-list and key-controlled) as well as AR mode.
2043     */
2044    if (!s390_cpu_virt_mem_check_write(cpu, a1, 0, 1)) {
2045        /* Fetching permitted; storing permitted */
2046        return 0;
2047    }
2048
2049    if (env->int_pgm_code == PGM_PROTECTION) {
2050        /* retry if reading is possible */
2051        cs->exception_index = -1;
2052        if (!s390_cpu_virt_mem_check_read(cpu, a1, 0, 1)) {
2053            /* Fetching permitted; storing not permitted */
2054            return 1;
2055        }
2056    }
2057
2058    switch (env->int_pgm_code) {
2059    case PGM_PROTECTION:
2060        /* Fetching not permitted; storing not permitted */
2061        cs->exception_index = -1;
2062        return 2;
2063    case PGM_ADDRESSING:
2064    case PGM_TRANS_SPEC:
2065        /* exceptions forwarded to the guest */
2066        s390_cpu_virt_mem_handle_exc(cpu, GETPC());
2067        return 0;
2068    }
2069
2070    /* Translation not available */
2071    cs->exception_index = -1;
2072    return 3;
2073}
2074
2075/* insert storage key extended */
2076uint64_t HELPER(iske)(CPUS390XState *env, uint64_t r2)
2077{
2078    static S390SKeysState *ss;
2079    static S390SKeysClass *skeyclass;
2080    uint64_t addr = wrap_address(env, r2);
2081    uint8_t key;
2082
2083    if (addr > ram_size) {
2084        return 0;
2085    }
2086
2087    if (unlikely(!ss)) {
2088        ss = s390_get_skeys_device();
2089        skeyclass = S390_SKEYS_GET_CLASS(ss);
2090    }
2091
2092    if (skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key)) {
2093        return 0;
2094    }
2095    return key;
2096}
2097
2098/* set storage key extended */
2099void HELPER(sske)(CPUS390XState *env, uint64_t r1, uint64_t r2)
2100{
2101    static S390SKeysState *ss;
2102    static S390SKeysClass *skeyclass;
2103    uint64_t addr = wrap_address(env, r2);
2104    uint8_t key;
2105
2106    if (addr > ram_size) {
2107        return;
2108    }
2109
2110    if (unlikely(!ss)) {
2111        ss = s390_get_skeys_device();
2112        skeyclass = S390_SKEYS_GET_CLASS(ss);
2113    }
2114
2115    key = (uint8_t) r1;
2116    skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
2117   /*
2118    * As we can only flush by virtual address and not all the entries
2119    * that point to a physical address we have to flush the whole TLB.
2120    */
2121    tlb_flush_all_cpus_synced(env_cpu(env));
2122}
2123
2124/* reset reference bit extended */
2125uint32_t HELPER(rrbe)(CPUS390XState *env, uint64_t r2)
2126{
2127    static S390SKeysState *ss;
2128    static S390SKeysClass *skeyclass;
2129    uint8_t re, key;
2130
2131    if (r2 > ram_size) {
2132        return 0;
2133    }
2134
2135    if (unlikely(!ss)) {
2136        ss = s390_get_skeys_device();
2137        skeyclass = S390_SKEYS_GET_CLASS(ss);
2138    }
2139
2140    if (skeyclass->get_skeys(ss, r2 / TARGET_PAGE_SIZE, 1, &key)) {
2141        return 0;
2142    }
2143
2144    re = key & (SK_R | SK_C);
2145    key &= ~SK_R;
2146
2147    if (skeyclass->set_skeys(ss, r2 / TARGET_PAGE_SIZE, 1, &key)) {
2148        return 0;
2149    }
2150   /*
2151    * As we can only flush by virtual address and not all the entries
2152    * that point to a physical address we have to flush the whole TLB.
2153    */
2154    tlb_flush_all_cpus_synced(env_cpu(env));
2155
2156    /*
2157     * cc
2158     *
2159     * 0  Reference bit zero; change bit zero
2160     * 1  Reference bit zero; change bit one
2161     * 2  Reference bit one; change bit zero
2162     * 3  Reference bit one; change bit one
2163     */
2164
2165    return re >> 1;
2166}
2167
2168uint32_t HELPER(mvcs)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2)
2169{
2170    const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2171    S390Access srca, desta;
2172    uintptr_t ra = GETPC();
2173    int cc = 0;
2174
2175    HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2176               __func__, l, a1, a2);
2177
2178    if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2179        psw_as == AS_HOME || psw_as == AS_ACCREG) {
2180        s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2181    }
2182
2183    l = wrap_length32(env, l);
2184    if (l > 256) {
2185        /* max 256 */
2186        l = 256;
2187        cc = 3;
2188    } else if (!l) {
2189        return cc;
2190    }
2191
2192    /* TODO: Access key handling */
2193    srca = access_prepare(env, a2, l, MMU_DATA_LOAD, MMU_PRIMARY_IDX, ra);
2194    desta = access_prepare(env, a1, l, MMU_DATA_STORE, MMU_SECONDARY_IDX, ra);
2195    access_memmove(env, &desta, &srca, ra);
2196    return cc;
2197}
2198
2199uint32_t HELPER(mvcp)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2)
2200{
2201    const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2202    S390Access srca, desta;
2203    uintptr_t ra = GETPC();
2204    int cc = 0;
2205
2206    HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
2207               __func__, l, a1, a2);
2208
2209    if (!(env->psw.mask & PSW_MASK_DAT) || !(env->cregs[0] & CR0_SECONDARY) ||
2210        psw_as == AS_HOME || psw_as == AS_ACCREG) {
2211        s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2212    }
2213
2214    l = wrap_length32(env, l);
2215    if (l > 256) {
2216        /* max 256 */
2217        l = 256;
2218        cc = 3;
2219    } else if (!l) {
2220        return cc;
2221    }
2222
2223    /* TODO: Access key handling */
2224    srca = access_prepare(env, a2, l, MMU_DATA_LOAD, MMU_SECONDARY_IDX, ra);
2225    desta = access_prepare(env, a1, l, MMU_DATA_STORE, MMU_PRIMARY_IDX, ra);
2226    access_memmove(env, &desta, &srca, ra);
2227    return cc;
2228}
2229
2230void HELPER(idte)(CPUS390XState *env, uint64_t r1, uint64_t r2, uint32_t m4)
2231{
2232    CPUState *cs = env_cpu(env);
2233    const uintptr_t ra = GETPC();
2234    uint64_t table, entry, raddr;
2235    uint16_t entries, i, index = 0;
2236
2237    if (r2 & 0xff000) {
2238        tcg_s390_program_interrupt(env, PGM_SPECIFICATION, ra);
2239    }
2240
2241    if (!(r2 & 0x800)) {
2242        /* invalidation-and-clearing operation */
2243        table = r1 & ASCE_ORIGIN;
2244        entries = (r2 & 0x7ff) + 1;
2245
2246        switch (r1 & ASCE_TYPE_MASK) {
2247        case ASCE_TYPE_REGION1:
2248            index = (r2 >> 53) & 0x7ff;
2249            break;
2250        case ASCE_TYPE_REGION2:
2251            index = (r2 >> 42) & 0x7ff;
2252            break;
2253        case ASCE_TYPE_REGION3:
2254            index = (r2 >> 31) & 0x7ff;
2255            break;
2256        case ASCE_TYPE_SEGMENT:
2257            index = (r2 >> 20) & 0x7ff;
2258            break;
2259        }
2260        for (i = 0; i < entries; i++) {
2261            /* addresses are not wrapped in 24/31bit mode but table index is */
2262            raddr = table + ((index + i) & 0x7ff) * sizeof(entry);
2263            entry = cpu_ldq_mmuidx_ra(env, raddr, MMU_REAL_IDX, ra);
2264            if (!(entry & REGION_ENTRY_I)) {
2265                /* we are allowed to not store if already invalid */
2266                entry |= REGION_ENTRY_I;
2267                cpu_stq_mmuidx_ra(env, raddr, entry, MMU_REAL_IDX, ra);
2268            }
2269        }
2270    }
2271
2272    /* We simply flush the complete tlb, therefore we can ignore r3. */
2273    if (m4 & 1) {
2274        tlb_flush(cs);
2275    } else {
2276        tlb_flush_all_cpus_synced(cs);
2277    }
2278}
2279
2280/* invalidate pte */
2281void HELPER(ipte)(CPUS390XState *env, uint64_t pto, uint64_t vaddr,
2282                  uint32_t m4)
2283{
2284    CPUState *cs = env_cpu(env);
2285    const uintptr_t ra = GETPC();
2286    uint64_t page = vaddr & TARGET_PAGE_MASK;
2287    uint64_t pte_addr, pte;
2288
2289    /* Compute the page table entry address */
2290    pte_addr = (pto & SEGMENT_ENTRY_ORIGIN);
2291    pte_addr += VADDR_PAGE_TX(vaddr) * 8;
2292
2293    /* Mark the page table entry as invalid */
2294    pte = cpu_ldq_mmuidx_ra(env, pte_addr, MMU_REAL_IDX, ra);
2295    pte |= PAGE_ENTRY_I;
2296    cpu_stq_mmuidx_ra(env, pte_addr, pte, MMU_REAL_IDX, ra);
2297
2298    /* XXX we exploit the fact that Linux passes the exact virtual
2299       address here - it's not obliged to! */
2300    if (m4 & 1) {
2301        if (vaddr & ~VADDR_PAGE_TX_MASK) {
2302            tlb_flush_page(cs, page);
2303            /* XXX 31-bit hack */
2304            tlb_flush_page(cs, page ^ 0x80000000);
2305        } else {
2306            /* looks like we don't have a valid virtual address */
2307            tlb_flush(cs);
2308        }
2309    } else {
2310        if (vaddr & ~VADDR_PAGE_TX_MASK) {
2311            tlb_flush_page_all_cpus_synced(cs, page);
2312            /* XXX 31-bit hack */
2313            tlb_flush_page_all_cpus_synced(cs, page ^ 0x80000000);
2314        } else {
2315            /* looks like we don't have a valid virtual address */
2316            tlb_flush_all_cpus_synced(cs);
2317        }
2318    }
2319}
2320
2321/* flush local tlb */
2322void HELPER(ptlb)(CPUS390XState *env)
2323{
2324    tlb_flush(env_cpu(env));
2325}
2326
2327/* flush global tlb */
2328void HELPER(purge)(CPUS390XState *env)
2329{
2330    tlb_flush_all_cpus_synced(env_cpu(env));
2331}
2332
2333/* load real address */
2334uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr)
2335{
2336    uint64_t asc = env->psw.mask & PSW_MASK_ASC;
2337    uint64_t ret, tec;
2338    int flags, exc, cc;
2339
2340    /* XXX incomplete - has more corner cases */
2341    if (!(env->psw.mask & PSW_MASK_64) && (addr >> 32)) {
2342        tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, GETPC());
2343    }
2344
2345    exc = mmu_translate(env, addr, 0, asc, &ret, &flags, &tec);
2346    if (exc) {
2347        cc = 3;
2348        ret = exc | 0x80000000;
2349    } else {
2350        cc = 0;
2351        ret |= addr & ~TARGET_PAGE_MASK;
2352    }
2353
2354    env->cc_op = cc;
2355    return ret;
2356}
2357#endif
2358
2359/* load pair from quadword */
2360uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr)
2361{
2362    uintptr_t ra = GETPC();
2363    uint64_t hi, lo;
2364
2365    check_alignment(env, addr, 16, ra);
2366    hi = cpu_ldq_data_ra(env, addr + 0, ra);
2367    lo = cpu_ldq_data_ra(env, addr + 8, ra);
2368
2369    env->retxl = lo;
2370    return hi;
2371}
2372
2373uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr)
2374{
2375    uintptr_t ra = GETPC();
2376    uint64_t hi, lo;
2377    int mem_idx;
2378    TCGMemOpIdx oi;
2379    Int128 v;
2380
2381    assert(HAVE_ATOMIC128);
2382
2383    mem_idx = cpu_mmu_index(env, false);
2384    oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
2385    v = helper_atomic_ldo_be_mmu(env, addr, oi, ra);
2386    hi = int128_gethi(v);
2387    lo = int128_getlo(v);
2388
2389    env->retxl = lo;
2390    return hi;
2391}
2392
2393/* store pair to quadword */
2394void HELPER(stpq)(CPUS390XState *env, uint64_t addr,
2395                  uint64_t low, uint64_t high)
2396{
2397    uintptr_t ra = GETPC();
2398
2399    check_alignment(env, addr, 16, ra);
2400    cpu_stq_data_ra(env, addr + 0, high, ra);
2401    cpu_stq_data_ra(env, addr + 8, low, ra);
2402}
2403
2404void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr,
2405                           uint64_t low, uint64_t high)
2406{
2407    uintptr_t ra = GETPC();
2408    int mem_idx;
2409    TCGMemOpIdx oi;
2410    Int128 v;
2411
2412    assert(HAVE_ATOMIC128);
2413
2414    mem_idx = cpu_mmu_index(env, false);
2415    oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
2416    v = int128_make128(low, high);
2417    helper_atomic_sto_be_mmu(env, addr, v, oi, ra);
2418}
2419
2420/* Execute instruction.  This instruction executes an insn modified with
2421   the contents of r1.  It does not change the executed instruction in memory;
2422   it does not change the program counter.
2423
2424   Perform this by recording the modified instruction in env->ex_value.
2425   This will be noticed by cpu_get_tb_cpu_state and thus tb translation.
2426*/
2427void HELPER(ex)(CPUS390XState *env, uint32_t ilen, uint64_t r1, uint64_t addr)
2428{
2429    uint64_t insn = cpu_lduw_code(env, addr);
2430    uint8_t opc = insn >> 8;
2431
2432    /* Or in the contents of R1[56:63].  */
2433    insn |= r1 & 0xff;
2434
2435    /* Load the rest of the instruction.  */
2436    insn <<= 48;
2437    switch (get_ilen(opc)) {
2438    case 2:
2439        break;
2440    case 4:
2441        insn |= (uint64_t)cpu_lduw_code(env, addr + 2) << 32;
2442        break;
2443    case 6:
2444        insn |= (uint64_t)(uint32_t)cpu_ldl_code(env, addr + 2) << 16;
2445        break;
2446    default:
2447        g_assert_not_reached();
2448    }
2449
2450    /* The very most common cases can be sped up by avoiding a new TB.  */
2451    if ((opc & 0xf0) == 0xd0) {
2452        typedef uint32_t (*dx_helper)(CPUS390XState *, uint32_t, uint64_t,
2453                                      uint64_t, uintptr_t);
2454        static const dx_helper dx[16] = {
2455            [0x0] = do_helper_trt_bkwd,
2456            [0x2] = do_helper_mvc,
2457            [0x4] = do_helper_nc,
2458            [0x5] = do_helper_clc,
2459            [0x6] = do_helper_oc,
2460            [0x7] = do_helper_xc,
2461            [0xc] = do_helper_tr,
2462            [0xd] = do_helper_trt_fwd,
2463        };
2464        dx_helper helper = dx[opc & 0xf];
2465
2466        if (helper) {
2467            uint32_t l = extract64(insn, 48, 8);
2468            uint32_t b1 = extract64(insn, 44, 4);
2469            uint32_t d1 = extract64(insn, 32, 12);
2470            uint32_t b2 = extract64(insn, 28, 4);
2471            uint32_t d2 = extract64(insn, 16, 12);
2472            uint64_t a1 = wrap_address(env, env->regs[b1] + d1);
2473            uint64_t a2 = wrap_address(env, env->regs[b2] + d2);
2474
2475            env->cc_op = helper(env, l, a1, a2, 0);
2476            env->psw.addr += ilen;
2477            return;
2478        }
2479    } else if (opc == 0x0a) {
2480        env->int_svc_code = extract64(insn, 48, 8);
2481        env->int_svc_ilen = ilen;
2482        helper_exception(env, EXCP_SVC);
2483        g_assert_not_reached();
2484    }
2485
2486    /* Record the insn we want to execute as well as the ilen to use
2487       during the execution of the target insn.  This will also ensure
2488       that ex_value is non-zero, which flags that we are in a state
2489       that requires such execution.  */
2490    env->ex_value = insn | ilen;
2491}
2492
2493uint32_t HELPER(mvcos)(CPUS390XState *env, uint64_t dest, uint64_t src,
2494                       uint64_t len)
2495{
2496    const uint8_t psw_key = (env->psw.mask & PSW_MASK_KEY) >> PSW_SHIFT_KEY;
2497    const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2498    const uint64_t r0 = env->regs[0];
2499    const uintptr_t ra = GETPC();
2500    uint8_t dest_key, dest_as, dest_k, dest_a;
2501    uint8_t src_key, src_as, src_k, src_a;
2502    uint64_t val;
2503    int cc = 0;
2504
2505    HELPER_LOG("%s dest %" PRIx64 ", src %" PRIx64 ", len %" PRIx64 "\n",
2506               __func__, dest, src, len);
2507
2508    if (!(env->psw.mask & PSW_MASK_DAT)) {
2509        tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2510    }
2511
2512    /* OAC (operand access control) for the first operand -> dest */
2513    val = (r0 & 0xffff0000ULL) >> 16;
2514    dest_key = (val >> 12) & 0xf;
2515    dest_as = (val >> 6) & 0x3;
2516    dest_k = (val >> 1) & 0x1;
2517    dest_a = val & 0x1;
2518
2519    /* OAC (operand access control) for the second operand -> src */
2520    val = (r0 & 0x0000ffffULL);
2521    src_key = (val >> 12) & 0xf;
2522    src_as = (val >> 6) & 0x3;
2523    src_k = (val >> 1) & 0x1;
2524    src_a = val & 0x1;
2525
2526    if (!dest_k) {
2527        dest_key = psw_key;
2528    }
2529    if (!src_k) {
2530        src_key = psw_key;
2531    }
2532    if (!dest_a) {
2533        dest_as = psw_as;
2534    }
2535    if (!src_a) {
2536        src_as = psw_as;
2537    }
2538
2539    if (dest_a && dest_as == AS_HOME && (env->psw.mask & PSW_MASK_PSTATE)) {
2540        tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2541    }
2542    if (!(env->cregs[0] & CR0_SECONDARY) &&
2543        (dest_as == AS_SECONDARY || src_as == AS_SECONDARY)) {
2544        tcg_s390_program_interrupt(env, PGM_SPECIAL_OP, ra);
2545    }
2546    if (!psw_key_valid(env, dest_key) || !psw_key_valid(env, src_key)) {
2547        tcg_s390_program_interrupt(env, PGM_PRIVILEGED, ra);
2548    }
2549
2550    len = wrap_length32(env, len);
2551    if (len > 4096) {
2552        cc = 3;
2553        len = 4096;
2554    }
2555
2556    /* FIXME: AR-mode and proper problem state mode (using PSW keys) missing */
2557    if (src_as == AS_ACCREG || dest_as == AS_ACCREG ||
2558        (env->psw.mask & PSW_MASK_PSTATE)) {
2559        qemu_log_mask(LOG_UNIMP, "%s: AR-mode and PSTATE support missing\n",
2560                      __func__);
2561        tcg_s390_program_interrupt(env, PGM_ADDRESSING, ra);
2562    }
2563
2564    /* FIXME: Access using correct keys and AR-mode */
2565    if (len) {
2566        S390Access srca = access_prepare(env, src, len, MMU_DATA_LOAD,
2567                                         mmu_idx_from_as(src_as), ra);
2568        S390Access desta = access_prepare(env, dest, len, MMU_DATA_STORE,
2569                                          mmu_idx_from_as(dest_as), ra);
2570
2571        access_memmove(env, &desta, &srca, ra);
2572    }
2573
2574    return cc;
2575}
2576
2577/* Decode a Unicode character.  A return value < 0 indicates success, storing
2578   the UTF-32 result into OCHAR and the input length into OLEN.  A return
2579   value >= 0 indicates failure, and the CC value to be returned.  */
2580typedef int (*decode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2581                                 uint64_t ilen, bool enh_check, uintptr_t ra,
2582                                 uint32_t *ochar, uint32_t *olen);
2583
2584/* Encode a Unicode character.  A return value < 0 indicates success, storing
2585   the bytes into ADDR and the output length into OLEN.  A return value >= 0
2586   indicates failure, and the CC value to be returned.  */
2587typedef int (*encode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2588                                 uint64_t ilen, uintptr_t ra, uint32_t c,
2589                                 uint32_t *olen);
2590
2591static int decode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2592                       bool enh_check, uintptr_t ra,
2593                       uint32_t *ochar, uint32_t *olen)
2594{
2595    uint8_t s0, s1, s2, s3;
2596    uint32_t c, l;
2597
2598    if (ilen < 1) {
2599        return 0;
2600    }
2601    s0 = cpu_ldub_data_ra(env, addr, ra);
2602    if (s0 <= 0x7f) {
2603        /* one byte character */
2604        l = 1;
2605        c = s0;
2606    } else if (s0 <= (enh_check ? 0xc1 : 0xbf)) {
2607        /* invalid character */
2608        return 2;
2609    } else if (s0 <= 0xdf) {
2610        /* two byte character */
2611        l = 2;
2612        if (ilen < 2) {
2613            return 0;
2614        }
2615        s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2616        c = s0 & 0x1f;
2617        c = (c << 6) | (s1 & 0x3f);
2618        if (enh_check && (s1 & 0xc0) != 0x80) {
2619            return 2;
2620        }
2621    } else if (s0 <= 0xef) {
2622        /* three byte character */
2623        l = 3;
2624        if (ilen < 3) {
2625            return 0;
2626        }
2627        s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2628        s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2629        c = s0 & 0x0f;
2630        c = (c << 6) | (s1 & 0x3f);
2631        c = (c << 6) | (s2 & 0x3f);
2632        /* Fold the byte-by-byte range descriptions in the PoO into
2633           tests against the complete value.  It disallows encodings
2634           that could be smaller, and the UTF-16 surrogates.  */
2635        if (enh_check
2636            && ((s1 & 0xc0) != 0x80
2637                || (s2 & 0xc0) != 0x80
2638                || c < 0x1000
2639                || (c >= 0xd800 && c <= 0xdfff))) {
2640            return 2;
2641        }
2642    } else if (s0 <= (enh_check ? 0xf4 : 0xf7)) {
2643        /* four byte character */
2644        l = 4;
2645        if (ilen < 4) {
2646            return 0;
2647        }
2648        s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2649        s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2650        s3 = cpu_ldub_data_ra(env, addr + 3, ra);
2651        c = s0 & 0x07;
2652        c = (c << 6) | (s1 & 0x3f);
2653        c = (c << 6) | (s2 & 0x3f);
2654        c = (c << 6) | (s3 & 0x3f);
2655        /* See above.  */
2656        if (enh_check
2657            && ((s1 & 0xc0) != 0x80
2658                || (s2 & 0xc0) != 0x80
2659                || (s3 & 0xc0) != 0x80
2660                || c < 0x010000
2661                || c > 0x10ffff)) {
2662            return 2;
2663        }
2664    } else {
2665        /* invalid character */
2666        return 2;
2667    }
2668
2669    *ochar = c;
2670    *olen = l;
2671    return -1;
2672}
2673
2674static int decode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2675                        bool enh_check, uintptr_t ra,
2676                        uint32_t *ochar, uint32_t *olen)
2677{
2678    uint16_t s0, s1;
2679    uint32_t c, l;
2680
2681    if (ilen < 2) {
2682        return 0;
2683    }
2684    s0 = cpu_lduw_data_ra(env, addr, ra);
2685    if ((s0 & 0xfc00) != 0xd800) {
2686        /* one word character */
2687        l = 2;
2688        c = s0;
2689    } else {
2690        /* two word character */
2691        l = 4;
2692        if (ilen < 4) {
2693            return 0;
2694        }
2695        s1 = cpu_lduw_data_ra(env, addr + 2, ra);
2696        c = extract32(s0, 6, 4) + 1;
2697        c = (c << 6) | (s0 & 0x3f);
2698        c = (c << 10) | (s1 & 0x3ff);
2699        if (enh_check && (s1 & 0xfc00) != 0xdc00) {
2700            /* invalid surrogate character */
2701            return 2;
2702        }
2703    }
2704
2705    *ochar = c;
2706    *olen = l;
2707    return -1;
2708}
2709
2710static int decode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2711                        bool enh_check, uintptr_t ra,
2712                        uint32_t *ochar, uint32_t *olen)
2713{
2714    uint32_t c;
2715
2716    if (ilen < 4) {
2717        return 0;
2718    }
2719    c = cpu_ldl_data_ra(env, addr, ra);
2720    if ((c >= 0xd800 && c <= 0xdbff) || c > 0x10ffff) {
2721        /* invalid unicode character */
2722        return 2;
2723    }
2724
2725    *ochar = c;
2726    *olen = 4;
2727    return -1;
2728}
2729
2730static int encode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2731                       uintptr_t ra, uint32_t c, uint32_t *olen)
2732{
2733    uint8_t d[4];
2734    uint32_t l, i;
2735
2736    if (c <= 0x7f) {
2737        /* one byte character */
2738        l = 1;
2739        d[0] = c;
2740    } else if (c <= 0x7ff) {
2741        /* two byte character */
2742        l = 2;
2743        d[1] = 0x80 | extract32(c, 0, 6);
2744        d[0] = 0xc0 | extract32(c, 6, 5);
2745    } else if (c <= 0xffff) {
2746        /* three byte character */
2747        l = 3;
2748        d[2] = 0x80 | extract32(c, 0, 6);
2749        d[1] = 0x80 | extract32(c, 6, 6);
2750        d[0] = 0xe0 | extract32(c, 12, 4);
2751    } else {
2752        /* four byte character */
2753        l = 4;
2754        d[3] = 0x80 | extract32(c, 0, 6);
2755        d[2] = 0x80 | extract32(c, 6, 6);
2756        d[1] = 0x80 | extract32(c, 12, 6);
2757        d[0] = 0xf0 | extract32(c, 18, 3);
2758    }
2759
2760    if (ilen < l) {
2761        return 1;
2762    }
2763    for (i = 0; i < l; ++i) {
2764        cpu_stb_data_ra(env, addr + i, d[i], ra);
2765    }
2766
2767    *olen = l;
2768    return -1;
2769}
2770
2771static int encode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2772                        uintptr_t ra, uint32_t c, uint32_t *olen)
2773{
2774    uint16_t d0, d1;
2775
2776    if (c <= 0xffff) {
2777        /* one word character */
2778        if (ilen < 2) {
2779            return 1;
2780        }
2781        cpu_stw_data_ra(env, addr, c, ra);
2782        *olen = 2;
2783    } else {
2784        /* two word character */
2785        if (ilen < 4) {
2786            return 1;
2787        }
2788        d1 = 0xdc00 | extract32(c, 0, 10);
2789        d0 = 0xd800 | extract32(c, 10, 6);
2790        d0 = deposit32(d0, 6, 4, extract32(c, 16, 5) - 1);
2791        cpu_stw_data_ra(env, addr + 0, d0, ra);
2792        cpu_stw_data_ra(env, addr + 2, d1, ra);
2793        *olen = 4;
2794    }
2795
2796    return -1;
2797}
2798
2799static int encode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2800                        uintptr_t ra, uint32_t c, uint32_t *olen)
2801{
2802    if (ilen < 4) {
2803        return 1;
2804    }
2805    cpu_stl_data_ra(env, addr, c, ra);
2806    *olen = 4;
2807    return -1;
2808}
2809
2810static inline uint32_t convert_unicode(CPUS390XState *env, uint32_t r1,
2811                                       uint32_t r2, uint32_t m3, uintptr_t ra,
2812                                       decode_unicode_fn decode,
2813                                       encode_unicode_fn encode)
2814{
2815    uint64_t dst = get_address(env, r1);
2816    uint64_t dlen = get_length(env, r1 + 1);
2817    uint64_t src = get_address(env, r2);
2818    uint64_t slen = get_length(env, r2 + 1);
2819    bool enh_check = m3 & 1;
2820    int cc, i;
2821
2822    /* Lest we fail to service interrupts in a timely manner, limit the
2823       amount of work we're willing to do.  For now, let's cap at 256.  */
2824    for (i = 0; i < 256; ++i) {
2825        uint32_t c, ilen, olen;
2826
2827        cc = decode(env, src, slen, enh_check, ra, &c, &ilen);
2828        if (unlikely(cc >= 0)) {
2829            break;
2830        }
2831        cc = encode(env, dst, dlen, ra, c, &olen);
2832        if (unlikely(cc >= 0)) {
2833            break;
2834        }
2835
2836        src += ilen;
2837        slen -= ilen;
2838        dst += olen;
2839        dlen -= olen;
2840        cc = 3;
2841    }
2842
2843    set_address(env, r1, dst);
2844    set_length(env, r1 + 1, dlen);
2845    set_address(env, r2, src);
2846    set_length(env, r2 + 1, slen);
2847
2848    return cc;
2849}
2850
2851uint32_t HELPER(cu12)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2852{
2853    return convert_unicode(env, r1, r2, m3, GETPC(),
2854                           decode_utf8, encode_utf16);
2855}
2856
2857uint32_t HELPER(cu14)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2858{
2859    return convert_unicode(env, r1, r2, m3, GETPC(),
2860                           decode_utf8, encode_utf32);
2861}
2862
2863uint32_t HELPER(cu21)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2864{
2865    return convert_unicode(env, r1, r2, m3, GETPC(),
2866                           decode_utf16, encode_utf8);
2867}
2868
2869uint32_t HELPER(cu24)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2870{
2871    return convert_unicode(env, r1, r2, m3, GETPC(),
2872                           decode_utf16, encode_utf32);
2873}
2874
2875uint32_t HELPER(cu41)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2876{
2877    return convert_unicode(env, r1, r2, m3, GETPC(),
2878                           decode_utf32, encode_utf8);
2879}
2880
2881uint32_t HELPER(cu42)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2882{
2883    return convert_unicode(env, r1, r2, m3, GETPC(),
2884                           decode_utf32, encode_utf16);
2885}
2886
2887void probe_write_access(CPUS390XState *env, uint64_t addr, uint64_t len,
2888                        uintptr_t ra)
2889{
2890    /* test the actual access, not just any access to the page due to LAP */
2891    while (len) {
2892        const uint64_t pagelen = -(addr | TARGET_PAGE_MASK);
2893        const uint64_t curlen = MIN(pagelen, len);
2894
2895        probe_write(env, addr, curlen, cpu_mmu_index(env, false), ra);
2896        addr = wrap_address(env, addr + curlen);
2897        len -= curlen;
2898    }
2899}
2900
2901void HELPER(probe_write_access)(CPUS390XState *env, uint64_t addr, uint64_t len)
2902{
2903    probe_write_access(env, addr, len, GETPC());
2904}
2905