qemu/target/s390x/mem_helper.c
<<
>>
Prefs
   1/*
   2 *  S/390 memory access helper routines
   3 *
   4 *  Copyright (c) 2009 Ulrich Hecht
   5 *  Copyright (c) 2009 Alexander Graf
   6 *
   7 * This library is free software; you can redistribute it and/or
   8 * modify it under the terms of the GNU Lesser General Public
   9 * License as published by the Free Software Foundation; either
  10 * version 2 of the License, or (at your option) any later version.
  11 *
  12 * This library is distributed in the hope that it will be useful,
  13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 * Lesser General Public License for more details.
  16 *
  17 * You should have received a copy of the GNU Lesser General Public
  18 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  19 */
  20
  21#include "qemu/osdep.h"
  22#include "cpu.h"
  23#include "internal.h"
  24#include "exec/address-spaces.h"
  25#include "exec/helper-proto.h"
  26#include "exec/exec-all.h"
  27#include "exec/cpu_ldst.h"
  28#include "qemu/int128.h"
  29
  30#if !defined(CONFIG_USER_ONLY)
  31#include "hw/s390x/storage-keys.h"
  32#endif
  33
  34/*****************************************************************************/
  35/* Softmmu support */
  36#if !defined(CONFIG_USER_ONLY)
  37
  38/* try to fill the TLB and return an exception if error. If retaddr is
  39   NULL, it means that the function was called in C code (i.e. not
  40   from generated code or from helper.c) */
  41/* XXX: fix it to restore all registers */
  42void tlb_fill(CPUState *cs, target_ulong addr, MMUAccessType access_type,
  43              int mmu_idx, uintptr_t retaddr)
  44{
  45    int ret = s390_cpu_handle_mmu_fault(cs, addr, access_type, mmu_idx);
  46    if (unlikely(ret != 0)) {
  47        cpu_loop_exit_restore(cs, retaddr);
  48    }
  49}
  50
  51#endif
  52
  53/* #define DEBUG_HELPER */
  54#ifdef DEBUG_HELPER
  55#define HELPER_LOG(x...) qemu_log(x)
  56#else
  57#define HELPER_LOG(x...)
  58#endif
  59
  60static inline bool psw_key_valid(CPUS390XState *env, uint8_t psw_key)
  61{
  62    uint16_t pkm = env->cregs[3] >> 16;
  63
  64    if (env->psw.mask & PSW_MASK_PSTATE) {
  65        /* PSW key has range 0..15, it is valid if the bit is 1 in the PKM */
  66        return pkm & (0x80 >> psw_key);
  67    }
  68    return true;
  69}
  70
  71/* Reduce the length so that addr + len doesn't cross a page boundary.  */
  72static inline uint32_t adj_len_to_page(uint32_t len, uint64_t addr)
  73{
  74#ifndef CONFIG_USER_ONLY
  75    if ((addr & ~TARGET_PAGE_MASK) + len - 1 >= TARGET_PAGE_SIZE) {
  76        return -(addr | TARGET_PAGE_MASK);
  77    }
  78#endif
  79    return len;
  80}
  81
  82/* Trigger a SPECIFICATION exception if an address or a length is not
  83   naturally aligned.  */
  84static inline void check_alignment(CPUS390XState *env, uint64_t v,
  85                                   int wordsize, uintptr_t ra)
  86{
  87    if (v % wordsize) {
  88        CPUState *cs = CPU(s390_env_get_cpu(env));
  89        cpu_restore_state(cs, ra);
  90        program_interrupt(env, PGM_SPECIFICATION, 6);
  91    }
  92}
  93
  94/* Load a value from memory according to its size.  */
  95static inline uint64_t cpu_ldusize_data_ra(CPUS390XState *env, uint64_t addr,
  96                                           int wordsize, uintptr_t ra)
  97{
  98    switch (wordsize) {
  99    case 1:
 100        return cpu_ldub_data_ra(env, addr, ra);
 101    case 2:
 102        return cpu_lduw_data_ra(env, addr, ra);
 103    default:
 104        abort();
 105    }
 106}
 107
 108/* Store a to memory according to its size.  */
 109static inline void cpu_stsize_data_ra(CPUS390XState *env, uint64_t addr,
 110                                      uint64_t value, int wordsize,
 111                                      uintptr_t ra)
 112{
 113    switch (wordsize) {
 114    case 1:
 115        cpu_stb_data_ra(env, addr, value, ra);
 116        break;
 117    case 2:
 118        cpu_stw_data_ra(env, addr, value, ra);
 119        break;
 120    default:
 121        abort();
 122    }
 123}
 124
 125static void fast_memset(CPUS390XState *env, uint64_t dest, uint8_t byte,
 126                        uint32_t l, uintptr_t ra)
 127{
 128    int mmu_idx = cpu_mmu_index(env, false);
 129
 130    while (l > 0) {
 131        void *p = tlb_vaddr_to_host(env, dest, MMU_DATA_STORE, mmu_idx);
 132        if (p) {
 133            /* Access to the whole page in write mode granted.  */
 134            uint32_t l_adj = adj_len_to_page(l, dest);
 135            memset(p, byte, l_adj);
 136            dest += l_adj;
 137            l -= l_adj;
 138        } else {
 139            /* We failed to get access to the whole page. The next write
 140               access will likely fill the QEMU TLB for the next iteration.  */
 141            cpu_stb_data_ra(env, dest, byte, ra);
 142            dest++;
 143            l--;
 144        }
 145    }
 146}
 147
 148#ifndef CONFIG_USER_ONLY
 149static void fast_memmove_idx(CPUS390XState *env, uint64_t dest, uint64_t src,
 150                             uint32_t len, int dest_idx, int src_idx,
 151                             uintptr_t ra)
 152{
 153    TCGMemOpIdx oi_dest = make_memop_idx(MO_UB, dest_idx);
 154    TCGMemOpIdx oi_src = make_memop_idx(MO_UB, src_idx);
 155    uint32_t len_adj;
 156    void *src_p;
 157    void *dest_p;
 158    uint8_t x;
 159
 160    while (len > 0) {
 161        src = wrap_address(env, src);
 162        dest = wrap_address(env, dest);
 163        src_p = tlb_vaddr_to_host(env, src, MMU_DATA_LOAD, src_idx);
 164        dest_p = tlb_vaddr_to_host(env, dest, MMU_DATA_STORE, dest_idx);
 165
 166        if (src_p && dest_p) {
 167            /* Access to both whole pages granted.  */
 168            len_adj = adj_len_to_page(adj_len_to_page(len, src), dest);
 169            memmove(dest_p, src_p, len_adj);
 170        } else {
 171            /* We failed to get access to one or both whole pages. The next
 172               read or write access will likely fill the QEMU TLB for the
 173               next iteration.  */
 174            len_adj = 1;
 175            x = helper_ret_ldub_mmu(env, src, oi_src, ra);
 176            helper_ret_stb_mmu(env, dest, x, oi_dest, ra);
 177        }
 178        src += len_adj;
 179        dest += len_adj;
 180        len -= len_adj;
 181    }
 182}
 183
 184static int mmu_idx_from_as(uint8_t as)
 185{
 186    switch (as) {
 187    case AS_PRIMARY:
 188        return MMU_PRIMARY_IDX;
 189    case AS_SECONDARY:
 190        return MMU_SECONDARY_IDX;
 191    case AS_HOME:
 192        return MMU_HOME_IDX;
 193    default:
 194        /* FIXME AS_ACCREG */
 195        g_assert_not_reached();
 196    }
 197}
 198
 199static void fast_memmove_as(CPUS390XState *env, uint64_t dest, uint64_t src,
 200                            uint32_t len, uint8_t dest_as, uint8_t src_as,
 201                            uintptr_t ra)
 202{
 203    int src_idx = mmu_idx_from_as(src_as);
 204    int dest_idx = mmu_idx_from_as(dest_as);
 205
 206    fast_memmove_idx(env, dest, src, len, dest_idx, src_idx, ra);
 207}
 208#endif
 209
 210static void fast_memmove(CPUS390XState *env, uint64_t dest, uint64_t src,
 211                         uint32_t l, uintptr_t ra)
 212{
 213    int mmu_idx = cpu_mmu_index(env, false);
 214
 215    while (l > 0) {
 216        void *src_p = tlb_vaddr_to_host(env, src, MMU_DATA_LOAD, mmu_idx);
 217        void *dest_p = tlb_vaddr_to_host(env, dest, MMU_DATA_STORE, mmu_idx);
 218        if (src_p && dest_p) {
 219            /* Access to both whole pages granted.  */
 220            uint32_t l_adj = adj_len_to_page(l, src);
 221            l_adj = adj_len_to_page(l_adj, dest);
 222            memmove(dest_p, src_p, l_adj);
 223            src += l_adj;
 224            dest += l_adj;
 225            l -= l_adj;
 226        } else {
 227            /* We failed to get access to one or both whole pages. The next
 228               read or write access will likely fill the QEMU TLB for the
 229               next iteration.  */
 230            cpu_stb_data_ra(env, dest, cpu_ldub_data_ra(env, src, ra), ra);
 231            src++;
 232            dest++;
 233            l--;
 234        }
 235    }
 236}
 237
 238/* and on array */
 239static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest,
 240                             uint64_t src, uintptr_t ra)
 241{
 242    uint32_t i;
 243    uint8_t c = 0;
 244
 245    HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
 246               __func__, l, dest, src);
 247
 248    for (i = 0; i <= l; i++) {
 249        uint8_t x = cpu_ldub_data_ra(env, src + i, ra);
 250        x &= cpu_ldub_data_ra(env, dest + i, ra);
 251        c |= x;
 252        cpu_stb_data_ra(env, dest + i, x, ra);
 253    }
 254    return c != 0;
 255}
 256
 257uint32_t HELPER(nc)(CPUS390XState *env, uint32_t l, uint64_t dest,
 258                    uint64_t src)
 259{
 260    return do_helper_nc(env, l, dest, src, GETPC());
 261}
 262
 263/* xor on array */
 264static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest,
 265                             uint64_t src, uintptr_t ra)
 266{
 267    uint32_t i;
 268    uint8_t c = 0;
 269
 270    HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
 271               __func__, l, dest, src);
 272
 273    /* xor with itself is the same as memset(0) */
 274    if (src == dest) {
 275        fast_memset(env, dest, 0, l + 1, ra);
 276        return 0;
 277    }
 278
 279    for (i = 0; i <= l; i++) {
 280        uint8_t x = cpu_ldub_data_ra(env, src + i, ra);
 281        x ^= cpu_ldub_data_ra(env, dest + i, ra);
 282        c |= x;
 283        cpu_stb_data_ra(env, dest + i, x, ra);
 284    }
 285    return c != 0;
 286}
 287
 288uint32_t HELPER(xc)(CPUS390XState *env, uint32_t l, uint64_t dest,
 289                    uint64_t src)
 290{
 291    return do_helper_xc(env, l, dest, src, GETPC());
 292}
 293
 294/* or on array */
 295static uint32_t do_helper_oc(CPUS390XState *env, uint32_t l, uint64_t dest,
 296                             uint64_t src, uintptr_t ra)
 297{
 298    uint32_t i;
 299    uint8_t c = 0;
 300
 301    HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
 302               __func__, l, dest, src);
 303
 304    for (i = 0; i <= l; i++) {
 305        uint8_t x = cpu_ldub_data_ra(env, src + i, ra);
 306        x |= cpu_ldub_data_ra(env, dest + i, ra);
 307        c |= x;
 308        cpu_stb_data_ra(env, dest + i, x, ra);
 309    }
 310    return c != 0;
 311}
 312
 313uint32_t HELPER(oc)(CPUS390XState *env, uint32_t l, uint64_t dest,
 314                    uint64_t src)
 315{
 316    return do_helper_oc(env, l, dest, src, GETPC());
 317}
 318
 319/* memmove */
 320static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
 321                              uint64_t src, uintptr_t ra)
 322{
 323    uint32_t i;
 324
 325    HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
 326               __func__, l, dest, src);
 327
 328    /* mvc and memmove do not behave the same when areas overlap! */
 329    /* mvc with source pointing to the byte after the destination is the
 330       same as memset with the first source byte */
 331    if (dest == src + 1) {
 332        fast_memset(env, dest, cpu_ldub_data_ra(env, src, ra), l + 1, ra);
 333    } else if (dest < src || src + l < dest) {
 334        fast_memmove(env, dest, src, l + 1, ra);
 335    } else {
 336        /* slow version with byte accesses which always work */
 337        for (i = 0; i <= l; i++) {
 338            uint8_t x = cpu_ldub_data_ra(env, src + i, ra);
 339            cpu_stb_data_ra(env, dest + i, x, ra);
 340        }
 341    }
 342
 343    return env->cc_op;
 344}
 345
 346void HELPER(mvc)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 347{
 348    do_helper_mvc(env, l, dest, src, GETPC());
 349}
 350
 351/* move inverse  */
 352void HELPER(mvcin)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 353{
 354    uintptr_t ra = GETPC();
 355    int i;
 356
 357    for (i = 0; i <= l; i++) {
 358        uint8_t v = cpu_ldub_data_ra(env, src - i, ra);
 359        cpu_stb_data_ra(env, dest + i, v, ra);
 360    }
 361}
 362
 363/* move numerics  */
 364void HELPER(mvn)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 365{
 366    uintptr_t ra = GETPC();
 367    int i;
 368
 369    for (i = 0; i <= l; i++) {
 370        uint8_t v = cpu_ldub_data_ra(env, dest + i, ra) & 0xf0;
 371        v |= cpu_ldub_data_ra(env, src + i, ra) & 0x0f;
 372        cpu_stb_data_ra(env, dest + i, v, ra);
 373    }
 374}
 375
 376/* move with offset  */
 377void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 378{
 379    uintptr_t ra = GETPC();
 380    int len_dest = l >> 4;
 381    int len_src = l & 0xf;
 382    uint8_t byte_dest, byte_src;
 383    int i;
 384
 385    src += len_src;
 386    dest += len_dest;
 387
 388    /* Handle rightmost byte */
 389    byte_src = cpu_ldub_data_ra(env, src, ra);
 390    byte_dest = cpu_ldub_data_ra(env, dest, ra);
 391    byte_dest = (byte_dest & 0x0f) | (byte_src << 4);
 392    cpu_stb_data_ra(env, dest, byte_dest, ra);
 393
 394    /* Process remaining bytes from right to left */
 395    for (i = 1; i <= len_dest; i++) {
 396        byte_dest = byte_src >> 4;
 397        if (len_src - i >= 0) {
 398            byte_src = cpu_ldub_data_ra(env, src - i, ra);
 399        } else {
 400            byte_src = 0;
 401        }
 402        byte_dest |= byte_src << 4;
 403        cpu_stb_data_ra(env, dest - i, byte_dest, ra);
 404    }
 405}
 406
 407/* move zones  */
 408void HELPER(mvz)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 409{
 410    uintptr_t ra = GETPC();
 411    int i;
 412
 413    for (i = 0; i <= l; i++) {
 414        uint8_t b = cpu_ldub_data_ra(env, dest + i, ra) & 0x0f;
 415        b |= cpu_ldub_data_ra(env, src + i, ra) & 0xf0;
 416        cpu_stb_data_ra(env, dest + i, b, ra);
 417    }
 418}
 419
 420/* compare unsigned byte arrays */
 421static uint32_t do_helper_clc(CPUS390XState *env, uint32_t l, uint64_t s1,
 422                              uint64_t s2, uintptr_t ra)
 423{
 424    uint32_t i;
 425    uint32_t cc = 0;
 426
 427    HELPER_LOG("%s l %d s1 %" PRIx64 " s2 %" PRIx64 "\n",
 428               __func__, l, s1, s2);
 429
 430    for (i = 0; i <= l; i++) {
 431        uint8_t x = cpu_ldub_data_ra(env, s1 + i, ra);
 432        uint8_t y = cpu_ldub_data_ra(env, s2 + i, ra);
 433        HELPER_LOG("%02x (%c)/%02x (%c) ", x, x, y, y);
 434        if (x < y) {
 435            cc = 1;
 436            break;
 437        } else if (x > y) {
 438            cc = 2;
 439            break;
 440        }
 441    }
 442
 443    HELPER_LOG("\n");
 444    return cc;
 445}
 446
 447uint32_t HELPER(clc)(CPUS390XState *env, uint32_t l, uint64_t s1, uint64_t s2)
 448{
 449    return do_helper_clc(env, l, s1, s2, GETPC());
 450}
 451
 452/* compare logical under mask */
 453uint32_t HELPER(clm)(CPUS390XState *env, uint32_t r1, uint32_t mask,
 454                     uint64_t addr)
 455{
 456    uintptr_t ra = GETPC();
 457    uint32_t cc = 0;
 458
 459    HELPER_LOG("%s: r1 0x%x mask 0x%x addr 0x%" PRIx64 "\n", __func__, r1,
 460               mask, addr);
 461
 462    while (mask) {
 463        if (mask & 8) {
 464            uint8_t d = cpu_ldub_data_ra(env, addr, ra);
 465            uint8_t r = extract32(r1, 24, 8);
 466            HELPER_LOG("mask 0x%x %02x/%02x (0x%" PRIx64 ") ", mask, r, d,
 467                       addr);
 468            if (r < d) {
 469                cc = 1;
 470                break;
 471            } else if (r > d) {
 472                cc = 2;
 473                break;
 474            }
 475            addr++;
 476        }
 477        mask = (mask << 1) & 0xf;
 478        r1 <<= 8;
 479    }
 480
 481    HELPER_LOG("\n");
 482    return cc;
 483}
 484
 485static inline uint64_t get_address(CPUS390XState *env, int reg)
 486{
 487    return wrap_address(env, env->regs[reg]);
 488}
 489
 490static inline void set_address(CPUS390XState *env, int reg, uint64_t address)
 491{
 492    if (env->psw.mask & PSW_MASK_64) {
 493        /* 64-Bit mode */
 494        env->regs[reg] = address;
 495    } else {
 496        if (!(env->psw.mask & PSW_MASK_32)) {
 497            /* 24-Bit mode. According to the PoO it is implementation
 498            dependent if bits 32-39 remain unchanged or are set to
 499            zeros.  Choose the former so that the function can also be
 500            used for TRT.  */
 501            env->regs[reg] = deposit64(env->regs[reg], 0, 24, address);
 502        } else {
 503            /* 31-Bit mode. According to the PoO it is implementation
 504            dependent if bit 32 remains unchanged or is set to zero.
 505            Choose the latter so that the function can also be used for
 506            TRT.  */
 507            address &= 0x7fffffff;
 508            env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
 509        }
 510    }
 511}
 512
 513static inline uint64_t wrap_length(CPUS390XState *env, uint64_t length)
 514{
 515    if (!(env->psw.mask & PSW_MASK_64)) {
 516        /* 24-Bit and 31-Bit mode */
 517        length &= 0x7fffffff;
 518    }
 519    return length;
 520}
 521
 522static inline uint64_t get_length(CPUS390XState *env, int reg)
 523{
 524    return wrap_length(env, env->regs[reg]);
 525}
 526
 527static inline void set_length(CPUS390XState *env, int reg, uint64_t length)
 528{
 529    if (env->psw.mask & PSW_MASK_64) {
 530        /* 64-Bit mode */
 531        env->regs[reg] = length;
 532    } else {
 533        /* 24-Bit and 31-Bit mode */
 534        env->regs[reg] = deposit64(env->regs[reg], 0, 32, length);
 535    }
 536}
 537
 538/* search string (c is byte to search, r2 is string, r1 end of string) */
 539void HELPER(srst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
 540{
 541    uintptr_t ra = GETPC();
 542    uint64_t end, str;
 543    uint32_t len;
 544    uint8_t v, c = env->regs[0];
 545
 546    /* Bits 32-55 must contain all 0.  */
 547    if (env->regs[0] & 0xffffff00u) {
 548        cpu_restore_state(ENV_GET_CPU(env), ra);
 549        program_interrupt(env, PGM_SPECIFICATION, 6);
 550    }
 551
 552    str = get_address(env, r2);
 553    end = get_address(env, r1);
 554
 555    /* Lest we fail to service interrupts in a timely manner, limit the
 556       amount of work we're willing to do.  For now, let's cap at 8k.  */
 557    for (len = 0; len < 0x2000; ++len) {
 558        if (str + len == end) {
 559            /* Character not found.  R1 & R2 are unmodified.  */
 560            env->cc_op = 2;
 561            return;
 562        }
 563        v = cpu_ldub_data_ra(env, str + len, ra);
 564        if (v == c) {
 565            /* Character found.  Set R1 to the location; R2 is unmodified.  */
 566            env->cc_op = 1;
 567            set_address(env, r1, str + len);
 568            return;
 569        }
 570    }
 571
 572    /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
 573    env->cc_op = 3;
 574    set_address(env, r2, str + len);
 575}
 576
 577void HELPER(srstu)(CPUS390XState *env, uint32_t r1, uint32_t r2)
 578{
 579    uintptr_t ra = GETPC();
 580    uint32_t len;
 581    uint16_t v, c = env->regs[0];
 582    uint64_t end, str, adj_end;
 583
 584    /* Bits 32-47 of R0 must be zero.  */
 585    if (env->regs[0] & 0xffff0000u) {
 586        cpu_restore_state(ENV_GET_CPU(env), ra);
 587        program_interrupt(env, PGM_SPECIFICATION, 6);
 588    }
 589
 590    str = get_address(env, r2);
 591    end = get_address(env, r1);
 592
 593    /* If the LSB of the two addresses differ, use one extra byte.  */
 594    adj_end = end + ((str ^ end) & 1);
 595
 596    /* Lest we fail to service interrupts in a timely manner, limit the
 597       amount of work we're willing to do.  For now, let's cap at 8k.  */
 598    for (len = 0; len < 0x2000; len += 2) {
 599        if (str + len == adj_end) {
 600            /* End of input found.  */
 601            env->cc_op = 2;
 602            return;
 603        }
 604        v = cpu_lduw_data_ra(env, str + len, ra);
 605        if (v == c) {
 606            /* Character found.  Set R1 to the location; R2 is unmodified.  */
 607            env->cc_op = 1;
 608            set_address(env, r1, str + len);
 609            return;
 610        }
 611    }
 612
 613    /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
 614    env->cc_op = 3;
 615    set_address(env, r2, str + len);
 616}
 617
 618/* unsigned string compare (c is string terminator) */
 619uint64_t HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2)
 620{
 621    uintptr_t ra = GETPC();
 622    uint32_t len;
 623
 624    c = c & 0xff;
 625    s1 = wrap_address(env, s1);
 626    s2 = wrap_address(env, s2);
 627
 628    /* Lest we fail to service interrupts in a timely manner, limit the
 629       amount of work we're willing to do.  For now, let's cap at 8k.  */
 630    for (len = 0; len < 0x2000; ++len) {
 631        uint8_t v1 = cpu_ldub_data_ra(env, s1 + len, ra);
 632        uint8_t v2 = cpu_ldub_data_ra(env, s2 + len, ra);
 633        if (v1 == v2) {
 634            if (v1 == c) {
 635                /* Equal.  CC=0, and don't advance the registers.  */
 636                env->cc_op = 0;
 637                env->retxl = s2;
 638                return s1;
 639            }
 640        } else {
 641            /* Unequal.  CC={1,2}, and advance the registers.  Note that
 642               the terminator need not be zero, but the string that contains
 643               the terminator is by definition "low".  */
 644            env->cc_op = (v1 == c ? 1 : v2 == c ? 2 : v1 < v2 ? 1 : 2);
 645            env->retxl = s2 + len;
 646            return s1 + len;
 647        }
 648    }
 649
 650    /* CPU-determined bytes equal; advance the registers.  */
 651    env->cc_op = 3;
 652    env->retxl = s2 + len;
 653    return s1 + len;
 654}
 655
 656/* move page */
 657uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint64_t r1, uint64_t r2)
 658{
 659    /* ??? missing r0 handling, which includes access keys, but more
 660       importantly optional suppression of the exception!  */
 661    fast_memmove(env, r1, r2, TARGET_PAGE_SIZE, GETPC());
 662    return 0; /* data moved */
 663}
 664
 665/* string copy (c is string terminator) */
 666uint64_t HELPER(mvst)(CPUS390XState *env, uint64_t c, uint64_t d, uint64_t s)
 667{
 668    uintptr_t ra = GETPC();
 669    uint32_t len;
 670
 671    c = c & 0xff;
 672    d = wrap_address(env, d);
 673    s = wrap_address(env, s);
 674
 675    /* Lest we fail to service interrupts in a timely manner, limit the
 676       amount of work we're willing to do.  For now, let's cap at 8k.  */
 677    for (len = 0; len < 0x2000; ++len) {
 678        uint8_t v = cpu_ldub_data_ra(env, s + len, ra);
 679        cpu_stb_data_ra(env, d + len, v, ra);
 680        if (v == c) {
 681            /* Complete.  Set CC=1 and advance R1.  */
 682            env->cc_op = 1;
 683            env->retxl = s;
 684            return d + len;
 685        }
 686    }
 687
 688    /* Incomplete.  Set CC=3 and signal to advance R1 and R2.  */
 689    env->cc_op = 3;
 690    env->retxl = s + len;
 691    return d + len;
 692}
 693
 694/* load access registers r1 to r3 from memory at a2 */
 695void HELPER(lam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
 696{
 697    uintptr_t ra = GETPC();
 698    int i;
 699
 700    for (i = r1;; i = (i + 1) % 16) {
 701        env->aregs[i] = cpu_ldl_data_ra(env, a2, ra);
 702        a2 += 4;
 703
 704        if (i == r3) {
 705            break;
 706        }
 707    }
 708}
 709
 710/* store access registers r1 to r3 in memory at a2 */
 711void HELPER(stam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
 712{
 713    uintptr_t ra = GETPC();
 714    int i;
 715
 716    for (i = r1;; i = (i + 1) % 16) {
 717        cpu_stl_data_ra(env, a2, env->aregs[i], ra);
 718        a2 += 4;
 719
 720        if (i == r3) {
 721            break;
 722        }
 723    }
 724}
 725
 726/* move long helper */
 727static inline uint32_t do_mvcl(CPUS390XState *env,
 728                               uint64_t *dest, uint64_t *destlen,
 729                               uint64_t *src, uint64_t *srclen,
 730                               uint16_t pad, int wordsize, uintptr_t ra)
 731{
 732    uint64_t len = MIN(*srclen, *destlen);
 733    uint32_t cc;
 734
 735    if (*destlen == *srclen) {
 736        cc = 0;
 737    } else if (*destlen < *srclen) {
 738        cc = 1;
 739    } else {
 740        cc = 2;
 741    }
 742
 743    /* Copy the src array */
 744    fast_memmove(env, *dest, *src, len, ra);
 745    *src += len;
 746    *srclen -= len;
 747    *dest += len;
 748    *destlen -= len;
 749
 750    /* Pad the remaining area */
 751    if (wordsize == 1) {
 752        fast_memset(env, *dest, pad, *destlen, ra);
 753        *dest += *destlen;
 754        *destlen = 0;
 755    } else {
 756        /* If remaining length is odd, pad with odd byte first.  */
 757        if (*destlen & 1) {
 758            cpu_stb_data_ra(env, *dest, pad & 0xff, ra);
 759            *dest += 1;
 760            *destlen -= 1;
 761        }
 762        /* The remaining length is even, pad using words.  */
 763        for (; *destlen; *dest += 2, *destlen -= 2) {
 764            cpu_stw_data_ra(env, *dest, pad, ra);
 765        }
 766    }
 767
 768    return cc;
 769}
 770
 771/* move long */
 772uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
 773{
 774    uintptr_t ra = GETPC();
 775    uint64_t destlen = env->regs[r1 + 1] & 0xffffff;
 776    uint64_t dest = get_address(env, r1);
 777    uint64_t srclen = env->regs[r2 + 1] & 0xffffff;
 778    uint64_t src = get_address(env, r2);
 779    uint8_t pad = env->regs[r2 + 1] >> 24;
 780    uint32_t cc;
 781
 782    cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 1, ra);
 783
 784    env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, destlen);
 785    env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen);
 786    set_address(env, r1, dest);
 787    set_address(env, r2, src);
 788
 789    return cc;
 790}
 791
 792/* move long extended */
 793uint32_t HELPER(mvcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
 794                       uint32_t r3)
 795{
 796    uintptr_t ra = GETPC();
 797    uint64_t destlen = get_length(env, r1 + 1);
 798    uint64_t dest = get_address(env, r1);
 799    uint64_t srclen = get_length(env, r3 + 1);
 800    uint64_t src = get_address(env, r3);
 801    uint8_t pad = a2;
 802    uint32_t cc;
 803
 804    cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 1, ra);
 805
 806    set_length(env, r1 + 1, destlen);
 807    set_length(env, r3 + 1, srclen);
 808    set_address(env, r1, dest);
 809    set_address(env, r3, src);
 810
 811    return cc;
 812}
 813
 814/* move long unicode */
 815uint32_t HELPER(mvclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
 816                       uint32_t r3)
 817{
 818    uintptr_t ra = GETPC();
 819    uint64_t destlen = get_length(env, r1 + 1);
 820    uint64_t dest = get_address(env, r1);
 821    uint64_t srclen = get_length(env, r3 + 1);
 822    uint64_t src = get_address(env, r3);
 823    uint16_t pad = a2;
 824    uint32_t cc;
 825
 826    cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 2, ra);
 827
 828    set_length(env, r1 + 1, destlen);
 829    set_length(env, r3 + 1, srclen);
 830    set_address(env, r1, dest);
 831    set_address(env, r3, src);
 832
 833    return cc;
 834}
 835
 836/* compare logical long helper */
 837static inline uint32_t do_clcl(CPUS390XState *env,
 838                               uint64_t *src1, uint64_t *src1len,
 839                               uint64_t *src3, uint64_t *src3len,
 840                               uint16_t pad, uint64_t limit,
 841                               int wordsize, uintptr_t ra)
 842{
 843    uint64_t len = MAX(*src1len, *src3len);
 844    uint32_t cc = 0;
 845
 846    check_alignment(env, *src1len | *src3len, wordsize, ra);
 847
 848    if (!len) {
 849        return cc;
 850    }
 851
 852    /* Lest we fail to service interrupts in a timely manner, limit the
 853       amount of work we're willing to do.  */
 854    if (len > limit) {
 855        len = limit;
 856        cc = 3;
 857    }
 858
 859    for (; len; len -= wordsize) {
 860        uint16_t v1 = pad;
 861        uint16_t v3 = pad;
 862
 863        if (*src1len) {
 864            v1 = cpu_ldusize_data_ra(env, *src1, wordsize, ra);
 865        }
 866        if (*src3len) {
 867            v3 = cpu_ldusize_data_ra(env, *src3, wordsize, ra);
 868        }
 869
 870        if (v1 != v3) {
 871            cc = (v1 < v3) ? 1 : 2;
 872            break;
 873        }
 874
 875        if (*src1len) {
 876            *src1 += wordsize;
 877            *src1len -= wordsize;
 878        }
 879        if (*src3len) {
 880            *src3 += wordsize;
 881            *src3len -= wordsize;
 882        }
 883    }
 884
 885    return cc;
 886}
 887
 888
 889/* compare logical long */
 890uint32_t HELPER(clcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
 891{
 892    uintptr_t ra = GETPC();
 893    uint64_t src1len = extract64(env->regs[r1 + 1], 0, 24);
 894    uint64_t src1 = get_address(env, r1);
 895    uint64_t src3len = extract64(env->regs[r2 + 1], 0, 24);
 896    uint64_t src3 = get_address(env, r2);
 897    uint8_t pad = env->regs[r2 + 1] >> 24;
 898    uint32_t cc;
 899
 900    cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, -1, 1, ra);
 901
 902    env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, src1len);
 903    env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, src3len);
 904    set_address(env, r1, src1);
 905    set_address(env, r2, src3);
 906
 907    return cc;
 908}
 909
 910/* compare logical long extended memcompare insn with padding */
 911uint32_t HELPER(clcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
 912                       uint32_t r3)
 913{
 914    uintptr_t ra = GETPC();
 915    uint64_t src1len = get_length(env, r1 + 1);
 916    uint64_t src1 = get_address(env, r1);
 917    uint64_t src3len = get_length(env, r3 + 1);
 918    uint64_t src3 = get_address(env, r3);
 919    uint8_t pad = a2;
 920    uint32_t cc;
 921
 922    cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x2000, 1, ra);
 923
 924    set_length(env, r1 + 1, src1len);
 925    set_length(env, r3 + 1, src3len);
 926    set_address(env, r1, src1);
 927    set_address(env, r3, src3);
 928
 929    return cc;
 930}
 931
 932/* compare logical long unicode memcompare insn with padding */
 933uint32_t HELPER(clclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
 934                       uint32_t r3)
 935{
 936    uintptr_t ra = GETPC();
 937    uint64_t src1len = get_length(env, r1 + 1);
 938    uint64_t src1 = get_address(env, r1);
 939    uint64_t src3len = get_length(env, r3 + 1);
 940    uint64_t src3 = get_address(env, r3);
 941    uint16_t pad = a2;
 942    uint32_t cc = 0;
 943
 944    cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x1000, 2, ra);
 945
 946    set_length(env, r1 + 1, src1len);
 947    set_length(env, r3 + 1, src3len);
 948    set_address(env, r1, src1);
 949    set_address(env, r3, src3);
 950
 951    return cc;
 952}
 953
 954/* checksum */
 955uint64_t HELPER(cksm)(CPUS390XState *env, uint64_t r1,
 956                      uint64_t src, uint64_t src_len)
 957{
 958    uintptr_t ra = GETPC();
 959    uint64_t max_len, len;
 960    uint64_t cksm = (uint32_t)r1;
 961
 962    /* Lest we fail to service interrupts in a timely manner, limit the
 963       amount of work we're willing to do.  For now, let's cap at 8k.  */
 964    max_len = (src_len > 0x2000 ? 0x2000 : src_len);
 965
 966    /* Process full words as available.  */
 967    for (len = 0; len + 4 <= max_len; len += 4, src += 4) {
 968        cksm += (uint32_t)cpu_ldl_data_ra(env, src, ra);
 969    }
 970
 971    switch (max_len - len) {
 972    case 1:
 973        cksm += cpu_ldub_data_ra(env, src, ra) << 24;
 974        len += 1;
 975        break;
 976    case 2:
 977        cksm += cpu_lduw_data_ra(env, src, ra) << 16;
 978        len += 2;
 979        break;
 980    case 3:
 981        cksm += cpu_lduw_data_ra(env, src, ra) << 16;
 982        cksm += cpu_ldub_data_ra(env, src + 2, ra) << 8;
 983        len += 3;
 984        break;
 985    }
 986
 987    /* Fold the carry from the checksum.  Note that we can see carry-out
 988       during folding more than once (but probably not more than twice).  */
 989    while (cksm > 0xffffffffull) {
 990        cksm = (uint32_t)cksm + (cksm >> 32);
 991    }
 992
 993    /* Indicate whether or not we've processed everything.  */
 994    env->cc_op = (len == src_len ? 0 : 3);
 995
 996    /* Return both cksm and processed length.  */
 997    env->retxl = cksm;
 998    return len;
 999}
1000
1001void HELPER(pack)(CPUS390XState *env, uint32_t len, uint64_t dest, uint64_t src)
1002{
1003    uintptr_t ra = GETPC();
1004    int len_dest = len >> 4;
1005    int len_src = len & 0xf;
1006    uint8_t b;
1007
1008    dest += len_dest;
1009    src += len_src;
1010
1011    /* last byte is special, it only flips the nibbles */
1012    b = cpu_ldub_data_ra(env, src, ra);
1013    cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1014    src--;
1015    len_src--;
1016
1017    /* now pack every value */
1018    while (len_dest >= 0) {
1019        b = 0;
1020
1021        if (len_src > 0) {
1022            b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1023            src--;
1024            len_src--;
1025        }
1026        if (len_src > 0) {
1027            b |= cpu_ldub_data_ra(env, src, ra) << 4;
1028            src--;
1029            len_src--;
1030        }
1031
1032        len_dest--;
1033        dest--;
1034        cpu_stb_data_ra(env, dest, b, ra);
1035    }
1036}
1037
1038static inline void do_pkau(CPUS390XState *env, uint64_t dest, uint64_t src,
1039                           uint32_t srclen, int ssize, uintptr_t ra)
1040{
1041    int i;
1042    /* The destination operand is always 16 bytes long.  */
1043    const int destlen = 16;
1044
1045    /* The operands are processed from right to left.  */
1046    src += srclen - 1;
1047    dest += destlen - 1;
1048
1049    for (i = 0; i < destlen; i++) {
1050        uint8_t b = 0;
1051
1052        /* Start with a positive sign */
1053        if (i == 0) {
1054            b = 0xc;
1055        } else if (srclen > ssize) {
1056            b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1057            src -= ssize;
1058            srclen -= ssize;
1059        }
1060
1061        if (srclen > ssize) {
1062            b |= cpu_ldub_data_ra(env, src, ra) << 4;
1063            src -= ssize;
1064            srclen -= ssize;
1065        }
1066
1067        cpu_stb_data_ra(env, dest, b, ra);
1068        dest--;
1069    }
1070}
1071
1072
1073void HELPER(pka)(CPUS390XState *env, uint64_t dest, uint64_t src,
1074                 uint32_t srclen)
1075{
1076    do_pkau(env, dest, src, srclen, 1, GETPC());
1077}
1078
1079void HELPER(pku)(CPUS390XState *env, uint64_t dest, uint64_t src,
1080                 uint32_t srclen)
1081{
1082    do_pkau(env, dest, src, srclen, 2, GETPC());
1083}
1084
1085void HELPER(unpk)(CPUS390XState *env, uint32_t len, uint64_t dest,
1086                  uint64_t src)
1087{
1088    uintptr_t ra = GETPC();
1089    int len_dest = len >> 4;
1090    int len_src = len & 0xf;
1091    uint8_t b;
1092    int second_nibble = 0;
1093
1094    dest += len_dest;
1095    src += len_src;
1096
1097    /* last byte is special, it only flips the nibbles */
1098    b = cpu_ldub_data_ra(env, src, ra);
1099    cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1100    src--;
1101    len_src--;
1102
1103    /* now pad every nibble with 0xf0 */
1104
1105    while (len_dest > 0) {
1106        uint8_t cur_byte = 0;
1107
1108        if (len_src > 0) {
1109            cur_byte = cpu_ldub_data_ra(env, src, ra);
1110        }
1111
1112        len_dest--;
1113        dest--;
1114
1115        /* only advance one nibble at a time */
1116        if (second_nibble) {
1117            cur_byte >>= 4;
1118            len_src--;
1119            src--;
1120        }
1121        second_nibble = !second_nibble;
1122
1123        /* digit */
1124        cur_byte = (cur_byte & 0xf);
1125        /* zone bits */
1126        cur_byte |= 0xf0;
1127
1128        cpu_stb_data_ra(env, dest, cur_byte, ra);
1129    }
1130}
1131
1132static inline uint32_t do_unpkau(CPUS390XState *env, uint64_t dest,
1133                                 uint32_t destlen, int dsize, uint64_t src,
1134                                 uintptr_t ra)
1135{
1136    int i;
1137    uint32_t cc;
1138    uint8_t b;
1139    /* The source operand is always 16 bytes long.  */
1140    const int srclen = 16;
1141
1142    /* The operands are processed from right to left.  */
1143    src += srclen - 1;
1144    dest += destlen - dsize;
1145
1146    /* Check for the sign.  */
1147    b = cpu_ldub_data_ra(env, src, ra);
1148    src--;
1149    switch (b & 0xf) {
1150    case 0xa:
1151    case 0xc:
1152    case 0xe ... 0xf:
1153        cc = 0;  /* plus */
1154        break;
1155    case 0xb:
1156    case 0xd:
1157        cc = 1;  /* minus */
1158        break;
1159    default:
1160    case 0x0 ... 0x9:
1161        cc = 3;  /* invalid */
1162        break;
1163    }
1164
1165    /* Now pad every nibble with 0x30, advancing one nibble at a time. */
1166    for (i = 0; i < destlen; i += dsize) {
1167        if (i == (31 * dsize)) {
1168            /* If length is 32/64 bytes, the leftmost byte is 0. */
1169            b = 0;
1170        } else if (i % (2 * dsize)) {
1171            b = cpu_ldub_data_ra(env, src, ra);
1172            src--;
1173        } else {
1174            b >>= 4;
1175        }
1176        cpu_stsize_data_ra(env, dest, 0x30 + (b & 0xf), dsize, ra);
1177        dest -= dsize;
1178    }
1179
1180    return cc;
1181}
1182
1183uint32_t HELPER(unpka)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1184                       uint64_t src)
1185{
1186    return do_unpkau(env, dest, destlen, 1, src, GETPC());
1187}
1188
1189uint32_t HELPER(unpku)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1190                       uint64_t src)
1191{
1192    return do_unpkau(env, dest, destlen, 2, src, GETPC());
1193}
1194
1195uint32_t HELPER(tp)(CPUS390XState *env, uint64_t dest, uint32_t destlen)
1196{
1197    uintptr_t ra = GETPC();
1198    uint32_t cc = 0;
1199    int i;
1200
1201    for (i = 0; i < destlen; i++) {
1202        uint8_t b = cpu_ldub_data_ra(env, dest + i, ra);
1203        /* digit */
1204        cc |= (b & 0xf0) > 0x90 ? 2 : 0;
1205
1206        if (i == (destlen - 1)) {
1207            /* sign */
1208            cc |= (b & 0xf) < 0xa ? 1 : 0;
1209        } else {
1210            /* digit */
1211            cc |= (b & 0xf) > 0x9 ? 2 : 0;
1212        }
1213    }
1214
1215    return cc;
1216}
1217
1218static uint32_t do_helper_tr(CPUS390XState *env, uint32_t len, uint64_t array,
1219                             uint64_t trans, uintptr_t ra)
1220{
1221    uint32_t i;
1222
1223    for (i = 0; i <= len; i++) {
1224        uint8_t byte = cpu_ldub_data_ra(env, array + i, ra);
1225        uint8_t new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1226        cpu_stb_data_ra(env, array + i, new_byte, ra);
1227    }
1228
1229    return env->cc_op;
1230}
1231
1232void HELPER(tr)(CPUS390XState *env, uint32_t len, uint64_t array,
1233                uint64_t trans)
1234{
1235    do_helper_tr(env, len, array, trans, GETPC());
1236}
1237
1238uint64_t HELPER(tre)(CPUS390XState *env, uint64_t array,
1239                     uint64_t len, uint64_t trans)
1240{
1241    uintptr_t ra = GETPC();
1242    uint8_t end = env->regs[0] & 0xff;
1243    uint64_t l = len;
1244    uint64_t i;
1245    uint32_t cc = 0;
1246
1247    if (!(env->psw.mask & PSW_MASK_64)) {
1248        array &= 0x7fffffff;
1249        l = (uint32_t)l;
1250    }
1251
1252    /* Lest we fail to service interrupts in a timely manner, limit the
1253       amount of work we're willing to do.  For now, let's cap at 8k.  */
1254    if (l > 0x2000) {
1255        l = 0x2000;
1256        cc = 3;
1257    }
1258
1259    for (i = 0; i < l; i++) {
1260        uint8_t byte, new_byte;
1261
1262        byte = cpu_ldub_data_ra(env, array + i, ra);
1263
1264        if (byte == end) {
1265            cc = 1;
1266            break;
1267        }
1268
1269        new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1270        cpu_stb_data_ra(env, array + i, new_byte, ra);
1271    }
1272
1273    env->cc_op = cc;
1274    env->retxl = len - i;
1275    return array + i;
1276}
1277
1278static inline uint32_t do_helper_trt(CPUS390XState *env, int len,
1279                                     uint64_t array, uint64_t trans,
1280                                     int inc, uintptr_t ra)
1281{
1282    int i;
1283
1284    for (i = 0; i <= len; i++) {
1285        uint8_t byte = cpu_ldub_data_ra(env, array + i * inc, ra);
1286        uint8_t sbyte = cpu_ldub_data_ra(env, trans + byte, ra);
1287
1288        if (sbyte != 0) {
1289            set_address(env, 1, array + i * inc);
1290            env->regs[2] = deposit64(env->regs[2], 0, 8, sbyte);
1291            return (i == len) ? 2 : 1;
1292        }
1293    }
1294
1295    return 0;
1296}
1297
1298uint32_t HELPER(trt)(CPUS390XState *env, uint32_t len, uint64_t array,
1299                     uint64_t trans)
1300{
1301    return do_helper_trt(env, len, array, trans, 1, GETPC());
1302}
1303
1304uint32_t HELPER(trtr)(CPUS390XState *env, uint32_t len, uint64_t array,
1305                      uint64_t trans)
1306{
1307    return do_helper_trt(env, len, array, trans, -1, GETPC());
1308}
1309
1310/* Translate one/two to one/two */
1311uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
1312                      uint32_t tst, uint32_t sizes)
1313{
1314    uintptr_t ra = GETPC();
1315    int dsize = (sizes & 1) ? 1 : 2;
1316    int ssize = (sizes & 2) ? 1 : 2;
1317    uint64_t tbl = get_address(env, 1);
1318    uint64_t dst = get_address(env, r1);
1319    uint64_t len = get_length(env, r1 + 1);
1320    uint64_t src = get_address(env, r2);
1321    uint32_t cc = 3;
1322    int i;
1323
1324    /* The lower address bits of TBL are ignored.  For TROO, TROT, it's
1325       the low 3 bits (double-word aligned).  For TRTO, TRTT, it's either
1326       the low 12 bits (4K, without ETF2-ENH) or 3 bits (with ETF2-ENH).  */
1327    if (ssize == 2 && !s390_has_feat(S390_FEAT_ETF2_ENH)) {
1328        tbl &= -4096;
1329    } else {
1330        tbl &= -8;
1331    }
1332
1333    check_alignment(env, len, ssize, ra);
1334
1335    /* Lest we fail to service interrupts in a timely manner, */
1336    /* limit the amount of work we're willing to do.   */
1337    for (i = 0; i < 0x2000; i++) {
1338        uint16_t sval = cpu_ldusize_data_ra(env, src, ssize, ra);
1339        uint64_t tble = tbl + (sval * dsize);
1340        uint16_t dval = cpu_ldusize_data_ra(env, tble, dsize, ra);
1341        if (dval == tst) {
1342            cc = 1;
1343            break;
1344        }
1345        cpu_stsize_data_ra(env, dst, dval, dsize, ra);
1346
1347        len -= ssize;
1348        src += ssize;
1349        dst += dsize;
1350
1351        if (len == 0) {
1352            cc = 0;
1353            break;
1354        }
1355    }
1356
1357    set_address(env, r1, dst);
1358    set_length(env, r1 + 1, len);
1359    set_address(env, r2, src);
1360
1361    return cc;
1362}
1363
1364static void do_cdsg(CPUS390XState *env, uint64_t addr,
1365                    uint32_t r1, uint32_t r3, bool parallel)
1366{
1367    uintptr_t ra = GETPC();
1368    Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
1369    Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1370    Int128 oldv;
1371    bool fail;
1372
1373    if (parallel) {
1374#ifndef CONFIG_ATOMIC128
1375        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
1376#else
1377        int mem_idx = cpu_mmu_index(env, false);
1378        TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
1379        oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
1380        fail = !int128_eq(oldv, cmpv);
1381#endif
1382    } else {
1383        uint64_t oldh, oldl;
1384
1385        check_alignment(env, addr, 16, ra);
1386
1387        oldh = cpu_ldq_data_ra(env, addr + 0, ra);
1388        oldl = cpu_ldq_data_ra(env, addr + 8, ra);
1389
1390        oldv = int128_make128(oldl, oldh);
1391        fail = !int128_eq(oldv, cmpv);
1392        if (fail) {
1393            newv = oldv;
1394        }
1395
1396        cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra);
1397        cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra);
1398    }
1399
1400    env->cc_op = fail;
1401    env->regs[r1] = int128_gethi(oldv);
1402    env->regs[r1 + 1] = int128_getlo(oldv);
1403}
1404
1405void HELPER(cdsg)(CPUS390XState *env, uint64_t addr,
1406                  uint32_t r1, uint32_t r3)
1407{
1408    do_cdsg(env, addr, r1, r3, false);
1409}
1410
1411void HELPER(cdsg_parallel)(CPUS390XState *env, uint64_t addr,
1412                           uint32_t r1, uint32_t r3)
1413{
1414    do_cdsg(env, addr, r1, r3, true);
1415}
1416
1417static uint32_t do_csst(CPUS390XState *env, uint32_t r3, uint64_t a1,
1418                        uint64_t a2, bool parallel)
1419{
1420#if !defined(CONFIG_USER_ONLY) || defined(CONFIG_ATOMIC128)
1421    uint32_t mem_idx = cpu_mmu_index(env, false);
1422#endif
1423    uintptr_t ra = GETPC();
1424    uint32_t fc = extract32(env->regs[0], 0, 8);
1425    uint32_t sc = extract32(env->regs[0], 8, 8);
1426    uint64_t pl = get_address(env, 1) & -16;
1427    uint64_t svh, svl;
1428    uint32_t cc;
1429
1430    /* Sanity check the function code and storage characteristic.  */
1431    if (fc > 1 || sc > 3) {
1432        if (!s390_has_feat(S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2)) {
1433            goto spec_exception;
1434        }
1435        if (fc > 2 || sc > 4 || (fc == 2 && (r3 & 1))) {
1436            goto spec_exception;
1437        }
1438    }
1439
1440    /* Sanity check the alignments.  */
1441    if (extract32(a1, 0, 4 << fc) || extract32(a2, 0, 1 << sc)) {
1442        goto spec_exception;
1443    }
1444
1445    /* Sanity check writability of the store address.  */
1446#ifndef CONFIG_USER_ONLY
1447    probe_write(env, a2, mem_idx, ra);
1448#endif
1449
1450    /* Note that the compare-and-swap is atomic, and the store is atomic, but
1451       the complete operation is not.  Therefore we do not need to assert serial
1452       context in order to implement this.  That said, restart early if we can't
1453       support either operation that is supposed to be atomic.  */
1454    if (parallel) {
1455        int mask = 0;
1456#if !defined(CONFIG_ATOMIC64)
1457        mask = -8;
1458#elif !defined(CONFIG_ATOMIC128)
1459        mask = -16;
1460#endif
1461        if (((4 << fc) | (1 << sc)) & mask) {
1462            cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
1463        }
1464    }
1465
1466    /* All loads happen before all stores.  For simplicity, load the entire
1467       store value area from the parameter list.  */
1468    svh = cpu_ldq_data_ra(env, pl + 16, ra);
1469    svl = cpu_ldq_data_ra(env, pl + 24, ra);
1470
1471    switch (fc) {
1472    case 0:
1473        {
1474            uint32_t nv = cpu_ldl_data_ra(env, pl, ra);
1475            uint32_t cv = env->regs[r3];
1476            uint32_t ov;
1477
1478            if (parallel) {
1479#ifdef CONFIG_USER_ONLY
1480                uint32_t *haddr = g2h(a1);
1481                ov = atomic_cmpxchg__nocheck(haddr, cv, nv);
1482#else
1483                TCGMemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mem_idx);
1484                ov = helper_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi, ra);
1485#endif
1486            } else {
1487                ov = cpu_ldl_data_ra(env, a1, ra);
1488                cpu_stl_data_ra(env, a1, (ov == cv ? nv : ov), ra);
1489            }
1490            cc = (ov != cv);
1491            env->regs[r3] = deposit64(env->regs[r3], 32, 32, ov);
1492        }
1493        break;
1494
1495    case 1:
1496        {
1497            uint64_t nv = cpu_ldq_data_ra(env, pl, ra);
1498            uint64_t cv = env->regs[r3];
1499            uint64_t ov;
1500
1501            if (parallel) {
1502#ifdef CONFIG_ATOMIC64
1503# ifdef CONFIG_USER_ONLY
1504                uint64_t *haddr = g2h(a1);
1505                ov = atomic_cmpxchg__nocheck(haddr, cv, nv);
1506# else
1507                TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN, mem_idx);
1508                ov = helper_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi, ra);
1509# endif
1510#else
1511                /* Note that we asserted !parallel above.  */
1512                g_assert_not_reached();
1513#endif
1514            } else {
1515                ov = cpu_ldq_data_ra(env, a1, ra);
1516                cpu_stq_data_ra(env, a1, (ov == cv ? nv : ov), ra);
1517            }
1518            cc = (ov != cv);
1519            env->regs[r3] = ov;
1520        }
1521        break;
1522
1523    case 2:
1524        {
1525            uint64_t nvh = cpu_ldq_data_ra(env, pl, ra);
1526            uint64_t nvl = cpu_ldq_data_ra(env, pl + 8, ra);
1527            Int128 nv = int128_make128(nvl, nvh);
1528            Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1529            Int128 ov;
1530
1531            if (parallel) {
1532#ifdef CONFIG_ATOMIC128
1533                TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
1534                ov = helper_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra);
1535                cc = !int128_eq(ov, cv);
1536#else
1537                /* Note that we asserted !parallel above.  */
1538                g_assert_not_reached();
1539#endif
1540            } else {
1541                uint64_t oh = cpu_ldq_data_ra(env, a1 + 0, ra);
1542                uint64_t ol = cpu_ldq_data_ra(env, a1 + 8, ra);
1543
1544                ov = int128_make128(ol, oh);
1545                cc = !int128_eq(ov, cv);
1546                if (cc) {
1547                    nv = ov;
1548                }
1549
1550                cpu_stq_data_ra(env, a1 + 0, int128_gethi(nv), ra);
1551                cpu_stq_data_ra(env, a1 + 8, int128_getlo(nv), ra);
1552            }
1553
1554            env->regs[r3 + 0] = int128_gethi(ov);
1555            env->regs[r3 + 1] = int128_getlo(ov);
1556        }
1557        break;
1558
1559    default:
1560        g_assert_not_reached();
1561    }
1562
1563    /* Store only if the comparison succeeded.  Note that above we use a pair
1564       of 64-bit big-endian loads, so for sc < 3 we must extract the value
1565       from the most-significant bits of svh.  */
1566    if (cc == 0) {
1567        switch (sc) {
1568        case 0:
1569            cpu_stb_data_ra(env, a2, svh >> 56, ra);
1570            break;
1571        case 1:
1572            cpu_stw_data_ra(env, a2, svh >> 48, ra);
1573            break;
1574        case 2:
1575            cpu_stl_data_ra(env, a2, svh >> 32, ra);
1576            break;
1577        case 3:
1578            cpu_stq_data_ra(env, a2, svh, ra);
1579            break;
1580        case 4:
1581            if (parallel) {
1582#ifdef CONFIG_ATOMIC128
1583                TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
1584                Int128 sv = int128_make128(svl, svh);
1585                helper_atomic_sto_be_mmu(env, a2, sv, oi, ra);
1586#else
1587                /* Note that we asserted !parallel above.  */
1588                g_assert_not_reached();
1589#endif
1590            } else {
1591                cpu_stq_data_ra(env, a2 + 0, svh, ra);
1592                cpu_stq_data_ra(env, a2 + 8, svl, ra);
1593            }
1594            break;
1595        default:
1596            g_assert_not_reached();
1597        }
1598    }
1599
1600    return cc;
1601
1602 spec_exception:
1603    cpu_restore_state(ENV_GET_CPU(env), ra);
1604    program_interrupt(env, PGM_SPECIFICATION, 6);
1605    g_assert_not_reached();
1606}
1607
1608uint32_t HELPER(csst)(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t a2)
1609{
1610    return do_csst(env, r3, a1, a2, false);
1611}
1612
1613uint32_t HELPER(csst_parallel)(CPUS390XState *env, uint32_t r3, uint64_t a1,
1614                               uint64_t a2)
1615{
1616    return do_csst(env, r3, a1, a2, true);
1617}
1618
1619#if !defined(CONFIG_USER_ONLY)
1620void HELPER(lctlg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1621{
1622    uintptr_t ra = GETPC();
1623    S390CPU *cpu = s390_env_get_cpu(env);
1624    bool PERchanged = false;
1625    uint64_t src = a2;
1626    uint32_t i;
1627
1628    for (i = r1;; i = (i + 1) % 16) {
1629        uint64_t val = cpu_ldq_data_ra(env, src, ra);
1630        if (env->cregs[i] != val && i >= 9 && i <= 11) {
1631            PERchanged = true;
1632        }
1633        env->cregs[i] = val;
1634        HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%" PRIx64 "\n",
1635                   i, src, val);
1636        src += sizeof(uint64_t);
1637
1638        if (i == r3) {
1639            break;
1640        }
1641    }
1642
1643    if (PERchanged && env->psw.mask & PSW_MASK_PER) {
1644        s390_cpu_recompute_watchpoints(CPU(cpu));
1645    }
1646
1647    tlb_flush(CPU(cpu));
1648}
1649
1650void HELPER(lctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1651{
1652    uintptr_t ra = GETPC();
1653    S390CPU *cpu = s390_env_get_cpu(env);
1654    bool PERchanged = false;
1655    uint64_t src = a2;
1656    uint32_t i;
1657
1658    for (i = r1;; i = (i + 1) % 16) {
1659        uint32_t val = cpu_ldl_data_ra(env, src, ra);
1660        if ((uint32_t)env->cregs[i] != val && i >= 9 && i <= 11) {
1661            PERchanged = true;
1662        }
1663        env->cregs[i] = deposit64(env->cregs[i], 0, 32, val);
1664        HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%x\n", i, src, val);
1665        src += sizeof(uint32_t);
1666
1667        if (i == r3) {
1668            break;
1669        }
1670    }
1671
1672    if (PERchanged && env->psw.mask & PSW_MASK_PER) {
1673        s390_cpu_recompute_watchpoints(CPU(cpu));
1674    }
1675
1676    tlb_flush(CPU(cpu));
1677}
1678
1679void HELPER(stctg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1680{
1681    uintptr_t ra = GETPC();
1682    uint64_t dest = a2;
1683    uint32_t i;
1684
1685    for (i = r1;; i = (i + 1) % 16) {
1686        cpu_stq_data_ra(env, dest, env->cregs[i], ra);
1687        dest += sizeof(uint64_t);
1688
1689        if (i == r3) {
1690            break;
1691        }
1692    }
1693}
1694
1695void HELPER(stctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1696{
1697    uintptr_t ra = GETPC();
1698    uint64_t dest = a2;
1699    uint32_t i;
1700
1701    for (i = r1;; i = (i + 1) % 16) {
1702        cpu_stl_data_ra(env, dest, env->cregs[i], ra);
1703        dest += sizeof(uint32_t);
1704
1705        if (i == r3) {
1706            break;
1707        }
1708    }
1709}
1710
1711uint32_t HELPER(testblock)(CPUS390XState *env, uint64_t real_addr)
1712{
1713    uintptr_t ra = GETPC();
1714    int i;
1715
1716    real_addr = wrap_address(env, real_addr) & TARGET_PAGE_MASK;
1717
1718    for (i = 0; i < TARGET_PAGE_SIZE; i += 8) {
1719        cpu_stq_real_ra(env, real_addr + i, 0, ra);
1720    }
1721
1722    return 0;
1723}
1724
1725uint32_t HELPER(tprot)(uint64_t a1, uint64_t a2)
1726{
1727    /* XXX implement */
1728    return 0;
1729}
1730
1731/* insert storage key extended */
1732uint64_t HELPER(iske)(CPUS390XState *env, uint64_t r2)
1733{
1734    static S390SKeysState *ss;
1735    static S390SKeysClass *skeyclass;
1736    uint64_t addr = wrap_address(env, r2);
1737    uint8_t key;
1738
1739    if (addr > ram_size) {
1740        return 0;
1741    }
1742
1743    if (unlikely(!ss)) {
1744        ss = s390_get_skeys_device();
1745        skeyclass = S390_SKEYS_GET_CLASS(ss);
1746    }
1747
1748    if (skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key)) {
1749        return 0;
1750    }
1751    return key;
1752}
1753
1754/* set storage key extended */
1755void HELPER(sske)(CPUS390XState *env, uint64_t r1, uint64_t r2)
1756{
1757    static S390SKeysState *ss;
1758    static S390SKeysClass *skeyclass;
1759    uint64_t addr = wrap_address(env, r2);
1760    uint8_t key;
1761
1762    if (addr > ram_size) {
1763        return;
1764    }
1765
1766    if (unlikely(!ss)) {
1767        ss = s390_get_skeys_device();
1768        skeyclass = S390_SKEYS_GET_CLASS(ss);
1769    }
1770
1771    key = (uint8_t) r1;
1772    skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
1773}
1774
1775/* reset reference bit extended */
1776uint32_t HELPER(rrbe)(CPUS390XState *env, uint64_t r2)
1777{
1778    static S390SKeysState *ss;
1779    static S390SKeysClass *skeyclass;
1780    uint8_t re, key;
1781
1782    if (r2 > ram_size) {
1783        return 0;
1784    }
1785
1786    if (unlikely(!ss)) {
1787        ss = s390_get_skeys_device();
1788        skeyclass = S390_SKEYS_GET_CLASS(ss);
1789    }
1790
1791    if (skeyclass->get_skeys(ss, r2 / TARGET_PAGE_SIZE, 1, &key)) {
1792        return 0;
1793    }
1794
1795    re = key & (SK_R | SK_C);
1796    key &= ~SK_R;
1797
1798    if (skeyclass->set_skeys(ss, r2 / TARGET_PAGE_SIZE, 1, &key)) {
1799        return 0;
1800    }
1801
1802    /*
1803     * cc
1804     *
1805     * 0  Reference bit zero; change bit zero
1806     * 1  Reference bit zero; change bit one
1807     * 2  Reference bit one; change bit zero
1808     * 3  Reference bit one; change bit one
1809     */
1810
1811    return re >> 1;
1812}
1813
1814uint32_t HELPER(mvcs)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2)
1815{
1816    uintptr_t ra = GETPC();
1817    int cc = 0, i;
1818
1819    HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
1820               __func__, l, a1, a2);
1821
1822    if (l > 256) {
1823        /* max 256 */
1824        l = 256;
1825        cc = 3;
1826    }
1827
1828    /* XXX replace w/ memcpy */
1829    for (i = 0; i < l; i++) {
1830        uint8_t x = cpu_ldub_primary_ra(env, a2 + i, ra);
1831        cpu_stb_secondary_ra(env, a1 + i, x, ra);
1832    }
1833
1834    return cc;
1835}
1836
1837uint32_t HELPER(mvcp)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2)
1838{
1839    uintptr_t ra = GETPC();
1840    int cc = 0, i;
1841
1842    HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
1843               __func__, l, a1, a2);
1844
1845    if (l > 256) {
1846        /* max 256 */
1847        l = 256;
1848        cc = 3;
1849    }
1850
1851    /* XXX replace w/ memcpy */
1852    for (i = 0; i < l; i++) {
1853        uint8_t x = cpu_ldub_secondary_ra(env, a2 + i, ra);
1854        cpu_stb_primary_ra(env, a1 + i, x, ra);
1855    }
1856
1857    return cc;
1858}
1859
1860void HELPER(idte)(CPUS390XState *env, uint64_t r1, uint64_t r2, uint32_t m4)
1861{
1862    CPUState *cs = CPU(s390_env_get_cpu(env));
1863    const uintptr_t ra = GETPC();
1864    uint64_t table, entry, raddr;
1865    uint16_t entries, i, index = 0;
1866
1867    if (r2 & 0xff000) {
1868        cpu_restore_state(cs, ra);
1869        program_interrupt(env, PGM_SPECIFICATION, 4);
1870    }
1871
1872    if (!(r2 & 0x800)) {
1873        /* invalidation-and-clearing operation */
1874        table = r1 & _ASCE_ORIGIN;
1875        entries = (r2 & 0x7ff) + 1;
1876
1877        switch (r1 & _ASCE_TYPE_MASK) {
1878        case _ASCE_TYPE_REGION1:
1879            index = (r2 >> 53) & 0x7ff;
1880            break;
1881        case _ASCE_TYPE_REGION2:
1882            index = (r2 >> 42) & 0x7ff;
1883            break;
1884        case _ASCE_TYPE_REGION3:
1885            index = (r2 >> 31) & 0x7ff;
1886            break;
1887        case _ASCE_TYPE_SEGMENT:
1888            index = (r2 >> 20) & 0x7ff;
1889            break;
1890        }
1891        for (i = 0; i < entries; i++) {
1892            /* addresses are not wrapped in 24/31bit mode but table index is */
1893            raddr = table + ((index + i) & 0x7ff) * sizeof(entry);
1894            entry = cpu_ldq_real_ra(env, raddr, ra);
1895            if (!(entry & _REGION_ENTRY_INV)) {
1896                /* we are allowed to not store if already invalid */
1897                entry |= _REGION_ENTRY_INV;
1898                cpu_stq_real_ra(env, raddr, entry, ra);
1899            }
1900        }
1901    }
1902
1903    /* We simply flush the complete tlb, therefore we can ignore r3. */
1904    if (m4 & 1) {
1905        tlb_flush(cs);
1906    } else {
1907        tlb_flush_all_cpus_synced(cs);
1908    }
1909}
1910
1911/* invalidate pte */
1912void HELPER(ipte)(CPUS390XState *env, uint64_t pto, uint64_t vaddr,
1913                  uint32_t m4)
1914{
1915    CPUState *cs = CPU(s390_env_get_cpu(env));
1916    const uintptr_t ra = GETPC();
1917    uint64_t page = vaddr & TARGET_PAGE_MASK;
1918    uint64_t pte_addr, pte;
1919
1920    /* Compute the page table entry address */
1921    pte_addr = (pto & _SEGMENT_ENTRY_ORIGIN);
1922    pte_addr += (vaddr & VADDR_PX) >> 9;
1923
1924    /* Mark the page table entry as invalid */
1925    pte = cpu_ldq_real_ra(env, pte_addr, ra);
1926    pte |= _PAGE_INVALID;
1927    cpu_stq_real_ra(env, pte_addr, pte, ra);
1928
1929    /* XXX we exploit the fact that Linux passes the exact virtual
1930       address here - it's not obliged to! */
1931    if (m4 & 1) {
1932        if (vaddr & ~VADDR_PX) {
1933            tlb_flush_page(cs, page);
1934            /* XXX 31-bit hack */
1935            tlb_flush_page(cs, page ^ 0x80000000);
1936        } else {
1937            /* looks like we don't have a valid virtual address */
1938            tlb_flush(cs);
1939        }
1940    } else {
1941        if (vaddr & ~VADDR_PX) {
1942            tlb_flush_page_all_cpus_synced(cs, page);
1943            /* XXX 31-bit hack */
1944            tlb_flush_page_all_cpus_synced(cs, page ^ 0x80000000);
1945        } else {
1946            /* looks like we don't have a valid virtual address */
1947            tlb_flush_all_cpus_synced(cs);
1948        }
1949    }
1950}
1951
1952/* flush local tlb */
1953void HELPER(ptlb)(CPUS390XState *env)
1954{
1955    S390CPU *cpu = s390_env_get_cpu(env);
1956
1957    tlb_flush(CPU(cpu));
1958}
1959
1960/* flush global tlb */
1961void HELPER(purge)(CPUS390XState *env)
1962{
1963    S390CPU *cpu = s390_env_get_cpu(env);
1964
1965    tlb_flush_all_cpus_synced(CPU(cpu));
1966}
1967
1968/* load using real address */
1969uint64_t HELPER(lura)(CPUS390XState *env, uint64_t addr)
1970{
1971    return cpu_ldl_real_ra(env, wrap_address(env, addr), GETPC());
1972}
1973
1974uint64_t HELPER(lurag)(CPUS390XState *env, uint64_t addr)
1975{
1976    return cpu_ldq_real_ra(env, wrap_address(env, addr), GETPC());
1977}
1978
1979/* store using real address */
1980void HELPER(stura)(CPUS390XState *env, uint64_t addr, uint64_t v1)
1981{
1982    cpu_stl_real_ra(env, wrap_address(env, addr), (uint32_t)v1, GETPC());
1983
1984    if ((env->psw.mask & PSW_MASK_PER) &&
1985        (env->cregs[9] & PER_CR9_EVENT_STORE) &&
1986        (env->cregs[9] & PER_CR9_EVENT_STORE_REAL)) {
1987        /* PSW is saved just before calling the helper.  */
1988        env->per_address = env->psw.addr;
1989        env->per_perc_atmid = PER_CODE_EVENT_STORE_REAL | get_per_atmid(env);
1990    }
1991}
1992
1993void HELPER(sturg)(CPUS390XState *env, uint64_t addr, uint64_t v1)
1994{
1995    cpu_stq_real_ra(env, wrap_address(env, addr), v1, GETPC());
1996
1997    if ((env->psw.mask & PSW_MASK_PER) &&
1998        (env->cregs[9] & PER_CR9_EVENT_STORE) &&
1999        (env->cregs[9] & PER_CR9_EVENT_STORE_REAL)) {
2000        /* PSW is saved just before calling the helper.  */
2001        env->per_address = env->psw.addr;
2002        env->per_perc_atmid = PER_CODE_EVENT_STORE_REAL | get_per_atmid(env);
2003    }
2004}
2005
2006/* load real address */
2007uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr)
2008{
2009    CPUState *cs = CPU(s390_env_get_cpu(env));
2010    uint32_t cc = 0;
2011    uint64_t asc = env->psw.mask & PSW_MASK_ASC;
2012    uint64_t ret;
2013    int old_exc, flags;
2014
2015    /* XXX incomplete - has more corner cases */
2016    if (!(env->psw.mask & PSW_MASK_64) && (addr >> 32)) {
2017        cpu_restore_state(cs, GETPC());
2018        program_interrupt(env, PGM_SPECIAL_OP, 2);
2019    }
2020
2021    old_exc = cs->exception_index;
2022    if (mmu_translate(env, addr, 0, asc, &ret, &flags, true)) {
2023        cc = 3;
2024    }
2025    if (cs->exception_index == EXCP_PGM) {
2026        ret = env->int_pgm_code | 0x80000000;
2027    } else {
2028        ret |= addr & ~TARGET_PAGE_MASK;
2029    }
2030    cs->exception_index = old_exc;
2031
2032    env->cc_op = cc;
2033    return ret;
2034}
2035#endif
2036
2037/* load pair from quadword */
2038static uint64_t do_lpq(CPUS390XState *env, uint64_t addr, bool parallel)
2039{
2040    uintptr_t ra = GETPC();
2041    uint64_t hi, lo;
2042
2043    if (parallel) {
2044#ifndef CONFIG_ATOMIC128
2045        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
2046#else
2047        int mem_idx = cpu_mmu_index(env, false);
2048        TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
2049        Int128 v = helper_atomic_ldo_be_mmu(env, addr, oi, ra);
2050        hi = int128_gethi(v);
2051        lo = int128_getlo(v);
2052#endif
2053    } else {
2054        check_alignment(env, addr, 16, ra);
2055
2056        hi = cpu_ldq_data_ra(env, addr + 0, ra);
2057        lo = cpu_ldq_data_ra(env, addr + 8, ra);
2058    }
2059
2060    env->retxl = lo;
2061    return hi;
2062}
2063
2064uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr)
2065{
2066    return do_lpq(env, addr, false);
2067}
2068
2069uint64_t HELPER(lpq_parallel)(CPUS390XState *env, uint64_t addr)
2070{
2071    return do_lpq(env, addr, true);
2072}
2073
2074/* store pair to quadword */
2075static void do_stpq(CPUS390XState *env, uint64_t addr,
2076                    uint64_t low, uint64_t high, bool parallel)
2077{
2078    uintptr_t ra = GETPC();
2079
2080    if (parallel) {
2081#ifndef CONFIG_ATOMIC128
2082        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
2083#else
2084        int mem_idx = cpu_mmu_index(env, false);
2085        TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
2086
2087        Int128 v = int128_make128(low, high);
2088        helper_atomic_sto_be_mmu(env, addr, v, oi, ra);
2089#endif
2090    } else {
2091        check_alignment(env, addr, 16, ra);
2092
2093        cpu_stq_data_ra(env, addr + 0, high, ra);
2094        cpu_stq_data_ra(env, addr + 8, low, ra);
2095    }
2096}
2097
2098void HELPER(stpq)(CPUS390XState *env, uint64_t addr,
2099                  uint64_t low, uint64_t high)
2100{
2101    do_stpq(env, addr, low, high, false);
2102}
2103
2104void HELPER(stpq_parallel)(CPUS390XState *env, uint64_t addr,
2105                           uint64_t low, uint64_t high)
2106{
2107    do_stpq(env, addr, low, high, true);
2108}
2109
2110/* Execute instruction.  This instruction executes an insn modified with
2111   the contents of r1.  It does not change the executed instruction in memory;
2112   it does not change the program counter.
2113
2114   Perform this by recording the modified instruction in env->ex_value.
2115   This will be noticed by cpu_get_tb_cpu_state and thus tb translation.
2116*/
2117void HELPER(ex)(CPUS390XState *env, uint32_t ilen, uint64_t r1, uint64_t addr)
2118{
2119    uint64_t insn = cpu_lduw_code(env, addr);
2120    uint8_t opc = insn >> 8;
2121
2122    /* Or in the contents of R1[56:63].  */
2123    insn |= r1 & 0xff;
2124
2125    /* Load the rest of the instruction.  */
2126    insn <<= 48;
2127    switch (get_ilen(opc)) {
2128    case 2:
2129        break;
2130    case 4:
2131        insn |= (uint64_t)cpu_lduw_code(env, addr + 2) << 32;
2132        break;
2133    case 6:
2134        insn |= (uint64_t)(uint32_t)cpu_ldl_code(env, addr + 2) << 16;
2135        break;
2136    default:
2137        g_assert_not_reached();
2138    }
2139
2140    /* The very most common cases can be sped up by avoiding a new TB.  */
2141    if ((opc & 0xf0) == 0xd0) {
2142        typedef uint32_t (*dx_helper)(CPUS390XState *, uint32_t, uint64_t,
2143                                      uint64_t, uintptr_t);
2144        static const dx_helper dx[16] = {
2145            [0x2] = do_helper_mvc,
2146            [0x4] = do_helper_nc,
2147            [0x5] = do_helper_clc,
2148            [0x6] = do_helper_oc,
2149            [0x7] = do_helper_xc,
2150            [0xc] = do_helper_tr,
2151        };
2152        dx_helper helper = dx[opc & 0xf];
2153
2154        if (helper) {
2155            uint32_t l = extract64(insn, 48, 8);
2156            uint32_t b1 = extract64(insn, 44, 4);
2157            uint32_t d1 = extract64(insn, 32, 12);
2158            uint32_t b2 = extract64(insn, 28, 4);
2159            uint32_t d2 = extract64(insn, 16, 12);
2160            uint64_t a1 = wrap_address(env, env->regs[b1] + d1);
2161            uint64_t a2 = wrap_address(env, env->regs[b2] + d2);
2162
2163            env->cc_op = helper(env, l, a1, a2, 0);
2164            env->psw.addr += ilen;
2165            return;
2166        }
2167    } else if (opc == 0x0a) {
2168        env->int_svc_code = extract64(insn, 48, 8);
2169        env->int_svc_ilen = ilen;
2170        helper_exception(env, EXCP_SVC);
2171        g_assert_not_reached();
2172    }
2173
2174    /* Record the insn we want to execute as well as the ilen to use
2175       during the execution of the target insn.  This will also ensure
2176       that ex_value is non-zero, which flags that we are in a state
2177       that requires such execution.  */
2178    env->ex_value = insn | ilen;
2179}
2180
2181uint32_t HELPER(mvcos)(CPUS390XState *env, uint64_t dest, uint64_t src,
2182                       uint64_t len)
2183{
2184    const uint8_t psw_key = (env->psw.mask & PSW_MASK_KEY) >> PSW_SHIFT_KEY;
2185    const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2186    const uint64_t r0 = env->regs[0];
2187    const uintptr_t ra = GETPC();
2188    CPUState *cs = CPU(s390_env_get_cpu(env));
2189    uint8_t dest_key, dest_as, dest_k, dest_a;
2190    uint8_t src_key, src_as, src_k, src_a;
2191    uint64_t val;
2192    int cc = 0;
2193
2194    HELPER_LOG("%s dest %" PRIx64 ", src %" PRIx64 ", len %" PRIx64 "\n",
2195               __func__, dest, src, len);
2196
2197    if (!(env->psw.mask & PSW_MASK_DAT)) {
2198        cpu_restore_state(cs, ra);
2199        program_interrupt(env, PGM_SPECIAL_OP, 6);
2200    }
2201
2202    /* OAC (operand access control) for the first operand -> dest */
2203    val = (r0 & 0xffff0000ULL) >> 16;
2204    dest_key = (val >> 12) & 0xf;
2205    dest_as = (val >> 6) & 0x3;
2206    dest_k = (val >> 1) & 0x1;
2207    dest_a = val & 0x1;
2208
2209    /* OAC (operand access control) for the second operand -> src */
2210    val = (r0 & 0x0000ffffULL);
2211    src_key = (val >> 12) & 0xf;
2212    src_as = (val >> 6) & 0x3;
2213    src_k = (val >> 1) & 0x1;
2214    src_a = val & 0x1;
2215
2216    if (!dest_k) {
2217        dest_key = psw_key;
2218    }
2219    if (!src_k) {
2220        src_key = psw_key;
2221    }
2222    if (!dest_a) {
2223        dest_as = psw_as;
2224    }
2225    if (!src_a) {
2226        src_as = psw_as;
2227    }
2228
2229    if (dest_a && dest_as == AS_HOME && (env->psw.mask & PSW_MASK_PSTATE)) {
2230        cpu_restore_state(cs, ra);
2231        program_interrupt(env, PGM_SPECIAL_OP, 6);
2232    }
2233    if (!(env->cregs[0] & CR0_SECONDARY) &&
2234        (dest_as == AS_SECONDARY || src_as == AS_SECONDARY)) {
2235        cpu_restore_state(cs, ra);
2236        program_interrupt(env, PGM_SPECIAL_OP, 6);
2237    }
2238    if (!psw_key_valid(env, dest_key) || !psw_key_valid(env, src_key)) {
2239        cpu_restore_state(cs, ra);
2240        program_interrupt(env, PGM_PRIVILEGED, 6);
2241    }
2242
2243    len = wrap_length(env, len);
2244    if (len > 4096) {
2245        cc = 3;
2246        len = 4096;
2247    }
2248
2249    /* FIXME: AR-mode and proper problem state mode (using PSW keys) missing */
2250    if (src_as == AS_ACCREG || dest_as == AS_ACCREG ||
2251        (env->psw.mask & PSW_MASK_PSTATE)) {
2252        qemu_log_mask(LOG_UNIMP, "%s: AR-mode and PSTATE support missing\n",
2253                      __func__);
2254        cpu_restore_state(cs, ra);
2255        program_interrupt(env, PGM_ADDRESSING, 6);
2256    }
2257
2258    /* FIXME: a) LAP
2259     *        b) Access using correct keys
2260     *        c) AR-mode
2261     */
2262#ifdef CONFIG_USER_ONLY
2263    /* psw keys are never valid in user mode, we will never reach this */
2264    g_assert_not_reached();
2265#else
2266    fast_memmove_as(env, dest, src, len, dest_as, src_as, ra);
2267#endif
2268
2269    return cc;
2270}
2271
2272/* Decode a Unicode character.  A return value < 0 indicates success, storing
2273   the UTF-32 result into OCHAR and the input length into OLEN.  A return
2274   value >= 0 indicates failure, and the CC value to be returned.  */
2275typedef int (*decode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2276                                 uint64_t ilen, bool enh_check, uintptr_t ra,
2277                                 uint32_t *ochar, uint32_t *olen);
2278
2279/* Encode a Unicode character.  A return value < 0 indicates success, storing
2280   the bytes into ADDR and the output length into OLEN.  A return value >= 0
2281   indicates failure, and the CC value to be returned.  */
2282typedef int (*encode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2283                                 uint64_t ilen, uintptr_t ra, uint32_t c,
2284                                 uint32_t *olen);
2285
2286static int decode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2287                       bool enh_check, uintptr_t ra,
2288                       uint32_t *ochar, uint32_t *olen)
2289{
2290    uint8_t s0, s1, s2, s3;
2291    uint32_t c, l;
2292
2293    if (ilen < 1) {
2294        return 0;
2295    }
2296    s0 = cpu_ldub_data_ra(env, addr, ra);
2297    if (s0 <= 0x7f) {
2298        /* one byte character */
2299        l = 1;
2300        c = s0;
2301    } else if (s0 <= (enh_check ? 0xc1 : 0xbf)) {
2302        /* invalid character */
2303        return 2;
2304    } else if (s0 <= 0xdf) {
2305        /* two byte character */
2306        l = 2;
2307        if (ilen < 2) {
2308            return 0;
2309        }
2310        s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2311        c = s0 & 0x1f;
2312        c = (c << 6) | (s1 & 0x3f);
2313        if (enh_check && (s1 & 0xc0) != 0x80) {
2314            return 2;
2315        }
2316    } else if (s0 <= 0xef) {
2317        /* three byte character */
2318        l = 3;
2319        if (ilen < 3) {
2320            return 0;
2321        }
2322        s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2323        s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2324        c = s0 & 0x0f;
2325        c = (c << 6) | (s1 & 0x3f);
2326        c = (c << 6) | (s2 & 0x3f);
2327        /* Fold the byte-by-byte range descriptions in the PoO into
2328           tests against the complete value.  It disallows encodings
2329           that could be smaller, and the UTF-16 surrogates.  */
2330        if (enh_check
2331            && ((s1 & 0xc0) != 0x80
2332                || (s2 & 0xc0) != 0x80
2333                || c < 0x1000
2334                || (c >= 0xd800 && c <= 0xdfff))) {
2335            return 2;
2336        }
2337    } else if (s0 <= (enh_check ? 0xf4 : 0xf7)) {
2338        /* four byte character */
2339        l = 4;
2340        if (ilen < 4) {
2341            return 0;
2342        }
2343        s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2344        s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2345        s3 = cpu_ldub_data_ra(env, addr + 3, ra);
2346        c = s0 & 0x07;
2347        c = (c << 6) | (s1 & 0x3f);
2348        c = (c << 6) | (s2 & 0x3f);
2349        c = (c << 6) | (s3 & 0x3f);
2350        /* See above.  */
2351        if (enh_check
2352            && ((s1 & 0xc0) != 0x80
2353                || (s2 & 0xc0) != 0x80
2354                || (s3 & 0xc0) != 0x80
2355                || c < 0x010000
2356                || c > 0x10ffff)) {
2357            return 2;
2358        }
2359    } else {
2360        /* invalid character */
2361        return 2;
2362    }
2363
2364    *ochar = c;
2365    *olen = l;
2366    return -1;
2367}
2368
2369static int decode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2370                        bool enh_check, uintptr_t ra,
2371                        uint32_t *ochar, uint32_t *olen)
2372{
2373    uint16_t s0, s1;
2374    uint32_t c, l;
2375
2376    if (ilen < 2) {
2377        return 0;
2378    }
2379    s0 = cpu_lduw_data_ra(env, addr, ra);
2380    if ((s0 & 0xfc00) != 0xd800) {
2381        /* one word character */
2382        l = 2;
2383        c = s0;
2384    } else {
2385        /* two word character */
2386        l = 4;
2387        if (ilen < 4) {
2388            return 0;
2389        }
2390        s1 = cpu_lduw_data_ra(env, addr + 2, ra);
2391        c = extract32(s0, 6, 4) + 1;
2392        c = (c << 6) | (s0 & 0x3f);
2393        c = (c << 10) | (s1 & 0x3ff);
2394        if (enh_check && (s1 & 0xfc00) != 0xdc00) {
2395            /* invalid surrogate character */
2396            return 2;
2397        }
2398    }
2399
2400    *ochar = c;
2401    *olen = l;
2402    return -1;
2403}
2404
2405static int decode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2406                        bool enh_check, uintptr_t ra,
2407                        uint32_t *ochar, uint32_t *olen)
2408{
2409    uint32_t c;
2410
2411    if (ilen < 4) {
2412        return 0;
2413    }
2414    c = cpu_ldl_data_ra(env, addr, ra);
2415    if ((c >= 0xd800 && c <= 0xdbff) || c > 0x10ffff) {
2416        /* invalid unicode character */
2417        return 2;
2418    }
2419
2420    *ochar = c;
2421    *olen = 4;
2422    return -1;
2423}
2424
2425static int encode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2426                       uintptr_t ra, uint32_t c, uint32_t *olen)
2427{
2428    uint8_t d[4];
2429    uint32_t l, i;
2430
2431    if (c <= 0x7f) {
2432        /* one byte character */
2433        l = 1;
2434        d[0] = c;
2435    } else if (c <= 0x7ff) {
2436        /* two byte character */
2437        l = 2;
2438        d[1] = 0x80 | extract32(c, 0, 6);
2439        d[0] = 0xc0 | extract32(c, 6, 5);
2440    } else if (c <= 0xffff) {
2441        /* three byte character */
2442        l = 3;
2443        d[2] = 0x80 | extract32(c, 0, 6);
2444        d[1] = 0x80 | extract32(c, 6, 6);
2445        d[0] = 0xe0 | extract32(c, 12, 4);
2446    } else {
2447        /* four byte character */
2448        l = 4;
2449        d[3] = 0x80 | extract32(c, 0, 6);
2450        d[2] = 0x80 | extract32(c, 6, 6);
2451        d[1] = 0x80 | extract32(c, 12, 6);
2452        d[0] = 0xf0 | extract32(c, 18, 3);
2453    }
2454
2455    if (ilen < l) {
2456        return 1;
2457    }
2458    for (i = 0; i < l; ++i) {
2459        cpu_stb_data_ra(env, addr + i, d[i], ra);
2460    }
2461
2462    *olen = l;
2463    return -1;
2464}
2465
2466static int encode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2467                        uintptr_t ra, uint32_t c, uint32_t *olen)
2468{
2469    uint16_t d0, d1;
2470
2471    if (c <= 0xffff) {
2472        /* one word character */
2473        if (ilen < 2) {
2474            return 1;
2475        }
2476        cpu_stw_data_ra(env, addr, c, ra);
2477        *olen = 2;
2478    } else {
2479        /* two word character */
2480        if (ilen < 4) {
2481            return 1;
2482        }
2483        d1 = 0xdc00 | extract32(c, 0, 10);
2484        d0 = 0xd800 | extract32(c, 10, 6);
2485        d0 = deposit32(d0, 6, 4, extract32(c, 16, 5) - 1);
2486        cpu_stw_data_ra(env, addr + 0, d0, ra);
2487        cpu_stw_data_ra(env, addr + 2, d1, ra);
2488        *olen = 4;
2489    }
2490
2491    return -1;
2492}
2493
2494static int encode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2495                        uintptr_t ra, uint32_t c, uint32_t *olen)
2496{
2497    if (ilen < 4) {
2498        return 1;
2499    }
2500    cpu_stl_data_ra(env, addr, c, ra);
2501    *olen = 4;
2502    return -1;
2503}
2504
2505static inline uint32_t convert_unicode(CPUS390XState *env, uint32_t r1,
2506                                       uint32_t r2, uint32_t m3, uintptr_t ra,
2507                                       decode_unicode_fn decode,
2508                                       encode_unicode_fn encode)
2509{
2510    uint64_t dst = get_address(env, r1);
2511    uint64_t dlen = get_length(env, r1 + 1);
2512    uint64_t src = get_address(env, r2);
2513    uint64_t slen = get_length(env, r2 + 1);
2514    bool enh_check = m3 & 1;
2515    int cc, i;
2516
2517    /* Lest we fail to service interrupts in a timely manner, limit the
2518       amount of work we're willing to do.  For now, let's cap at 256.  */
2519    for (i = 0; i < 256; ++i) {
2520        uint32_t c, ilen, olen;
2521
2522        cc = decode(env, src, slen, enh_check, ra, &c, &ilen);
2523        if (unlikely(cc >= 0)) {
2524            break;
2525        }
2526        cc = encode(env, dst, dlen, ra, c, &olen);
2527        if (unlikely(cc >= 0)) {
2528            break;
2529        }
2530
2531        src += ilen;
2532        slen -= ilen;
2533        dst += olen;
2534        dlen -= olen;
2535        cc = 3;
2536    }
2537
2538    set_address(env, r1, dst);
2539    set_length(env, r1 + 1, dlen);
2540    set_address(env, r2, src);
2541    set_length(env, r2 + 1, slen);
2542
2543    return cc;
2544}
2545
2546uint32_t HELPER(cu12)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2547{
2548    return convert_unicode(env, r1, r2, m3, GETPC(),
2549                           decode_utf8, encode_utf16);
2550}
2551
2552uint32_t HELPER(cu14)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2553{
2554    return convert_unicode(env, r1, r2, m3, GETPC(),
2555                           decode_utf8, encode_utf32);
2556}
2557
2558uint32_t HELPER(cu21)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2559{
2560    return convert_unicode(env, r1, r2, m3, GETPC(),
2561                           decode_utf16, encode_utf8);
2562}
2563
2564uint32_t HELPER(cu24)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2565{
2566    return convert_unicode(env, r1, r2, m3, GETPC(),
2567                           decode_utf16, encode_utf32);
2568}
2569
2570uint32_t HELPER(cu41)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2571{
2572    return convert_unicode(env, r1, r2, m3, GETPC(),
2573                           decode_utf32, encode_utf8);
2574}
2575
2576uint32_t HELPER(cu42)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2577{
2578    return convert_unicode(env, r1, r2, m3, GETPC(),
2579                           decode_utf32, encode_utf16);
2580}
2581