qemu/target/s390x/mem_helper.c
<<
>>
Prefs
   1/*
   2 *  S/390 memory access helper routines
   3 *
   4 *  Copyright (c) 2009 Ulrich Hecht
   5 *  Copyright (c) 2009 Alexander Graf
   6 *
   7 * This library is free software; you can redistribute it and/or
   8 * modify it under the terms of the GNU Lesser General Public
   9 * License as published by the Free Software Foundation; either
  10 * version 2 of the License, or (at your option) any later version.
  11 *
  12 * This library is distributed in the hope that it will be useful,
  13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 * Lesser General Public License for more details.
  16 *
  17 * You should have received a copy of the GNU Lesser General Public
  18 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  19 */
  20
  21#include "qemu/osdep.h"
  22#include "cpu.h"
  23#include "exec/address-spaces.h"
  24#include "exec/helper-proto.h"
  25#include "exec/exec-all.h"
  26#include "exec/cpu_ldst.h"
  27#include "qemu/int128.h"
  28
  29#if !defined(CONFIG_USER_ONLY)
  30#include "hw/s390x/storage-keys.h"
  31#endif
  32
  33/*****************************************************************************/
  34/* Softmmu support */
  35#if !defined(CONFIG_USER_ONLY)
  36
  37/* try to fill the TLB and return an exception if error. If retaddr is
  38   NULL, it means that the function was called in C code (i.e. not
  39   from generated code or from helper.c) */
  40/* XXX: fix it to restore all registers */
  41void tlb_fill(CPUState *cs, target_ulong addr, MMUAccessType access_type,
  42              int mmu_idx, uintptr_t retaddr)
  43{
  44    int ret = s390_cpu_handle_mmu_fault(cs, addr, access_type, mmu_idx);
  45    if (unlikely(ret != 0)) {
  46        cpu_loop_exit_restore(cs, retaddr);
  47    }
  48}
  49
  50#endif
  51
  52/* #define DEBUG_HELPER */
  53#ifdef DEBUG_HELPER
  54#define HELPER_LOG(x...) qemu_log(x)
  55#else
  56#define HELPER_LOG(x...)
  57#endif
  58
  59/* Reduce the length so that addr + len doesn't cross a page boundary.  */
  60static inline uint32_t adj_len_to_page(uint32_t len, uint64_t addr)
  61{
  62#ifndef CONFIG_USER_ONLY
  63    if ((addr & ~TARGET_PAGE_MASK) + len - 1 >= TARGET_PAGE_SIZE) {
  64        return -(addr | TARGET_PAGE_MASK);
  65    }
  66#endif
  67    return len;
  68}
  69
  70/* Trigger a SPECIFICATION exception if an address or a length is not
  71   naturally aligned.  */
  72static inline void check_alignment(CPUS390XState *env, uint64_t v,
  73                                   int wordsize, uintptr_t ra)
  74{
  75    if (v % wordsize) {
  76        CPUState *cs = CPU(s390_env_get_cpu(env));
  77        cpu_restore_state(cs, ra);
  78        program_interrupt(env, PGM_SPECIFICATION, 6);
  79    }
  80}
  81
  82/* Load a value from memory according to its size.  */
  83static inline uint64_t cpu_ldusize_data_ra(CPUS390XState *env, uint64_t addr,
  84                                           int wordsize, uintptr_t ra)
  85{
  86    switch (wordsize) {
  87    case 1:
  88        return cpu_ldub_data_ra(env, addr, ra);
  89    case 2:
  90        return cpu_lduw_data_ra(env, addr, ra);
  91    default:
  92        abort();
  93    }
  94}
  95
  96/* Store a to memory according to its size.  */
  97static inline void cpu_stsize_data_ra(CPUS390XState *env, uint64_t addr,
  98                                      uint64_t value, int wordsize,
  99                                      uintptr_t ra)
 100{
 101    switch (wordsize) {
 102    case 1:
 103        cpu_stb_data_ra(env, addr, value, ra);
 104        break;
 105    case 2:
 106        cpu_stw_data_ra(env, addr, value, ra);
 107        break;
 108    default:
 109        abort();
 110    }
 111}
 112
 113static inline uint64_t wrap_address(CPUS390XState *env, uint64_t a)
 114{
 115    if (!(env->psw.mask & PSW_MASK_64)) {
 116        if (!(env->psw.mask & PSW_MASK_32)) {
 117            /* 24-Bit mode */
 118            a &= 0x00ffffff;
 119        } else {
 120            /* 31-Bit mode */
 121            a &= 0x7fffffff;
 122        }
 123    }
 124    return a;
 125}
 126
 127static void fast_memset(CPUS390XState *env, uint64_t dest, uint8_t byte,
 128                        uint32_t l, uintptr_t ra)
 129{
 130    int mmu_idx = cpu_mmu_index(env, false);
 131
 132    while (l > 0) {
 133        void *p = tlb_vaddr_to_host(env, dest, MMU_DATA_STORE, mmu_idx);
 134        if (p) {
 135            /* Access to the whole page in write mode granted.  */
 136            uint32_t l_adj = adj_len_to_page(l, dest);
 137            memset(p, byte, l_adj);
 138            dest += l_adj;
 139            l -= l_adj;
 140        } else {
 141            /* We failed to get access to the whole page. The next write
 142               access will likely fill the QEMU TLB for the next iteration.  */
 143            cpu_stb_data_ra(env, dest, byte, ra);
 144            dest++;
 145            l--;
 146        }
 147    }
 148}
 149
 150#ifndef CONFIG_USER_ONLY
 151static void fast_memmove_idx(CPUS390XState *env, uint64_t dest, uint64_t src,
 152                             uint32_t len, int dest_idx, int src_idx,
 153                             uintptr_t ra)
 154{
 155    TCGMemOpIdx oi_dest = make_memop_idx(MO_UB, dest_idx);
 156    TCGMemOpIdx oi_src = make_memop_idx(MO_UB, src_idx);
 157    uint32_t len_adj;
 158    void *src_p;
 159    void *dest_p;
 160    uint8_t x;
 161
 162    while (len > 0) {
 163        src = wrap_address(env, src);
 164        dest = wrap_address(env, dest);
 165        src_p = tlb_vaddr_to_host(env, src, MMU_DATA_LOAD, src_idx);
 166        dest_p = tlb_vaddr_to_host(env, dest, MMU_DATA_STORE, dest_idx);
 167
 168        if (src_p && dest_p) {
 169            /* Access to both whole pages granted.  */
 170            len_adj = adj_len_to_page(adj_len_to_page(len, src), dest);
 171            memmove(dest_p, src_p, len_adj);
 172        } else {
 173            /* We failed to get access to one or both whole pages. The next
 174               read or write access will likely fill the QEMU TLB for the
 175               next iteration.  */
 176            len_adj = 1;
 177            x = helper_ret_ldub_mmu(env, src, oi_src, ra);
 178            helper_ret_stb_mmu(env, dest, x, oi_dest, ra);
 179        }
 180        src += len_adj;
 181        dest += len_adj;
 182        len -= len_adj;
 183    }
 184}
 185
 186static int mmu_idx_from_as(uint8_t as)
 187{
 188    switch (as) {
 189    case AS_PRIMARY:
 190        return MMU_PRIMARY_IDX;
 191    case AS_SECONDARY:
 192        return MMU_SECONDARY_IDX;
 193    case AS_HOME:
 194        return MMU_HOME_IDX;
 195    default:
 196        /* FIXME AS_ACCREG */
 197        g_assert_not_reached();
 198    }
 199}
 200
 201static void fast_memmove_as(CPUS390XState *env, uint64_t dest, uint64_t src,
 202                            uint32_t len, uint8_t dest_as, uint8_t src_as,
 203                            uintptr_t ra)
 204{
 205    int src_idx = mmu_idx_from_as(src_as);
 206    int dest_idx = mmu_idx_from_as(dest_as);
 207
 208    fast_memmove_idx(env, dest, src, len, dest_idx, src_idx, ra);
 209}
 210#endif
 211
 212static void fast_memmove(CPUS390XState *env, uint64_t dest, uint64_t src,
 213                         uint32_t l, uintptr_t ra)
 214{
 215    int mmu_idx = cpu_mmu_index(env, false);
 216
 217    while (l > 0) {
 218        void *src_p = tlb_vaddr_to_host(env, src, MMU_DATA_LOAD, mmu_idx);
 219        void *dest_p = tlb_vaddr_to_host(env, dest, MMU_DATA_STORE, mmu_idx);
 220        if (src_p && dest_p) {
 221            /* Access to both whole pages granted.  */
 222            uint32_t l_adj = adj_len_to_page(l, src);
 223            l_adj = adj_len_to_page(l_adj, dest);
 224            memmove(dest_p, src_p, l_adj);
 225            src += l_adj;
 226            dest += l_adj;
 227            l -= l_adj;
 228        } else {
 229            /* We failed to get access to one or both whole pages. The next
 230               read or write access will likely fill the QEMU TLB for the
 231               next iteration.  */
 232            cpu_stb_data_ra(env, dest, cpu_ldub_data_ra(env, src, ra), ra);
 233            src++;
 234            dest++;
 235            l--;
 236        }
 237    }
 238}
 239
 240/* and on array */
 241static uint32_t do_helper_nc(CPUS390XState *env, uint32_t l, uint64_t dest,
 242                             uint64_t src, uintptr_t ra)
 243{
 244    uint32_t i;
 245    uint8_t c = 0;
 246
 247    HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
 248               __func__, l, dest, src);
 249
 250    for (i = 0; i <= l; i++) {
 251        uint8_t x = cpu_ldub_data_ra(env, src + i, ra);
 252        x &= cpu_ldub_data_ra(env, dest + i, ra);
 253        c |= x;
 254        cpu_stb_data_ra(env, dest + i, x, ra);
 255    }
 256    return c != 0;
 257}
 258
 259uint32_t HELPER(nc)(CPUS390XState *env, uint32_t l, uint64_t dest,
 260                    uint64_t src)
 261{
 262    return do_helper_nc(env, l, dest, src, GETPC());
 263}
 264
 265/* xor on array */
 266static uint32_t do_helper_xc(CPUS390XState *env, uint32_t l, uint64_t dest,
 267                             uint64_t src, uintptr_t ra)
 268{
 269    uint32_t i;
 270    uint8_t c = 0;
 271
 272    HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
 273               __func__, l, dest, src);
 274
 275    /* xor with itself is the same as memset(0) */
 276    if (src == dest) {
 277        fast_memset(env, dest, 0, l + 1, ra);
 278        return 0;
 279    }
 280
 281    for (i = 0; i <= l; i++) {
 282        uint8_t x = cpu_ldub_data_ra(env, src + i, ra);
 283        x ^= cpu_ldub_data_ra(env, dest + i, ra);
 284        c |= x;
 285        cpu_stb_data_ra(env, dest + i, x, ra);
 286    }
 287    return c != 0;
 288}
 289
 290uint32_t HELPER(xc)(CPUS390XState *env, uint32_t l, uint64_t dest,
 291                    uint64_t src)
 292{
 293    return do_helper_xc(env, l, dest, src, GETPC());
 294}
 295
 296/* or on array */
 297static uint32_t do_helper_oc(CPUS390XState *env, uint32_t l, uint64_t dest,
 298                             uint64_t src, uintptr_t ra)
 299{
 300    uint32_t i;
 301    uint8_t c = 0;
 302
 303    HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
 304               __func__, l, dest, src);
 305
 306    for (i = 0; i <= l; i++) {
 307        uint8_t x = cpu_ldub_data_ra(env, src + i, ra);
 308        x |= cpu_ldub_data_ra(env, dest + i, ra);
 309        c |= x;
 310        cpu_stb_data_ra(env, dest + i, x, ra);
 311    }
 312    return c != 0;
 313}
 314
 315uint32_t HELPER(oc)(CPUS390XState *env, uint32_t l, uint64_t dest,
 316                    uint64_t src)
 317{
 318    return do_helper_oc(env, l, dest, src, GETPC());
 319}
 320
 321/* memmove */
 322static uint32_t do_helper_mvc(CPUS390XState *env, uint32_t l, uint64_t dest,
 323                              uint64_t src, uintptr_t ra)
 324{
 325    uint32_t i;
 326
 327    HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
 328               __func__, l, dest, src);
 329
 330    /* mvc and memmove do not behave the same when areas overlap! */
 331    /* mvc with source pointing to the byte after the destination is the
 332       same as memset with the first source byte */
 333    if (dest == src + 1) {
 334        fast_memset(env, dest, cpu_ldub_data_ra(env, src, ra), l + 1, ra);
 335    } else if (dest < src || src + l < dest) {
 336        fast_memmove(env, dest, src, l + 1, ra);
 337    } else {
 338        /* slow version with byte accesses which always work */
 339        for (i = 0; i <= l; i++) {
 340            uint8_t x = cpu_ldub_data_ra(env, src + i, ra);
 341            cpu_stb_data_ra(env, dest + i, x, ra);
 342        }
 343    }
 344
 345    return env->cc_op;
 346}
 347
 348void HELPER(mvc)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 349{
 350    do_helper_mvc(env, l, dest, src, GETPC());
 351}
 352
 353/* move inverse  */
 354void HELPER(mvcin)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 355{
 356    uintptr_t ra = GETPC();
 357    int i;
 358
 359    for (i = 0; i <= l; i++) {
 360        uint8_t v = cpu_ldub_data_ra(env, src - i, ra);
 361        cpu_stb_data_ra(env, dest + i, v, ra);
 362    }
 363}
 364
 365/* move numerics  */
 366void HELPER(mvn)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 367{
 368    uintptr_t ra = GETPC();
 369    int i;
 370
 371    for (i = 0; i <= l; i++) {
 372        uint8_t v = cpu_ldub_data_ra(env, dest + i, ra) & 0xf0;
 373        v |= cpu_ldub_data_ra(env, src + i, ra) & 0x0f;
 374        cpu_stb_data_ra(env, dest + i, v, ra);
 375    }
 376}
 377
 378/* move with offset  */
 379void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 380{
 381    uintptr_t ra = GETPC();
 382    int len_dest = l >> 4;
 383    int len_src = l & 0xf;
 384    uint8_t byte_dest, byte_src;
 385    int i;
 386
 387    src += len_src;
 388    dest += len_dest;
 389
 390    /* Handle rightmost byte */
 391    byte_src = cpu_ldub_data_ra(env, src, ra);
 392    byte_dest = cpu_ldub_data_ra(env, dest, ra);
 393    byte_dest = (byte_dest & 0x0f) | (byte_src << 4);
 394    cpu_stb_data_ra(env, dest, byte_dest, ra);
 395
 396    /* Process remaining bytes from right to left */
 397    for (i = 1; i <= len_dest; i++) {
 398        byte_dest = byte_src >> 4;
 399        if (len_src - i >= 0) {
 400            byte_src = cpu_ldub_data_ra(env, src - i, ra);
 401        } else {
 402            byte_src = 0;
 403        }
 404        byte_dest |= byte_src << 4;
 405        cpu_stb_data_ra(env, dest - i, byte_dest, ra);
 406    }
 407}
 408
 409/* move zones  */
 410void HELPER(mvz)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
 411{
 412    uintptr_t ra = GETPC();
 413    int i;
 414
 415    for (i = 0; i <= l; i++) {
 416        uint8_t b = cpu_ldub_data_ra(env, dest + i, ra) & 0x0f;
 417        b |= cpu_ldub_data_ra(env, src + i, ra) & 0xf0;
 418        cpu_stb_data_ra(env, dest + i, b, ra);
 419    }
 420}
 421
 422/* compare unsigned byte arrays */
 423static uint32_t do_helper_clc(CPUS390XState *env, uint32_t l, uint64_t s1,
 424                              uint64_t s2, uintptr_t ra)
 425{
 426    uint32_t i;
 427    uint32_t cc = 0;
 428
 429    HELPER_LOG("%s l %d s1 %" PRIx64 " s2 %" PRIx64 "\n",
 430               __func__, l, s1, s2);
 431
 432    for (i = 0; i <= l; i++) {
 433        uint8_t x = cpu_ldub_data_ra(env, s1 + i, ra);
 434        uint8_t y = cpu_ldub_data_ra(env, s2 + i, ra);
 435        HELPER_LOG("%02x (%c)/%02x (%c) ", x, x, y, y);
 436        if (x < y) {
 437            cc = 1;
 438            break;
 439        } else if (x > y) {
 440            cc = 2;
 441            break;
 442        }
 443    }
 444
 445    HELPER_LOG("\n");
 446    return cc;
 447}
 448
 449uint32_t HELPER(clc)(CPUS390XState *env, uint32_t l, uint64_t s1, uint64_t s2)
 450{
 451    return do_helper_clc(env, l, s1, s2, GETPC());
 452}
 453
 454/* compare logical under mask */
 455uint32_t HELPER(clm)(CPUS390XState *env, uint32_t r1, uint32_t mask,
 456                     uint64_t addr)
 457{
 458    uintptr_t ra = GETPC();
 459    uint32_t cc = 0;
 460
 461    HELPER_LOG("%s: r1 0x%x mask 0x%x addr 0x%" PRIx64 "\n", __func__, r1,
 462               mask, addr);
 463
 464    while (mask) {
 465        if (mask & 8) {
 466            uint8_t d = cpu_ldub_data_ra(env, addr, ra);
 467            uint8_t r = extract32(r1, 24, 8);
 468            HELPER_LOG("mask 0x%x %02x/%02x (0x%" PRIx64 ") ", mask, r, d,
 469                       addr);
 470            if (r < d) {
 471                cc = 1;
 472                break;
 473            } else if (r > d) {
 474                cc = 2;
 475                break;
 476            }
 477            addr++;
 478        }
 479        mask = (mask << 1) & 0xf;
 480        r1 <<= 8;
 481    }
 482
 483    HELPER_LOG("\n");
 484    return cc;
 485}
 486
 487static inline uint64_t get_address(CPUS390XState *env, int reg)
 488{
 489    return wrap_address(env, env->regs[reg]);
 490}
 491
 492static inline void set_address(CPUS390XState *env, int reg, uint64_t address)
 493{
 494    if (env->psw.mask & PSW_MASK_64) {
 495        /* 64-Bit mode */
 496        env->regs[reg] = address;
 497    } else {
 498        if (!(env->psw.mask & PSW_MASK_32)) {
 499            /* 24-Bit mode. According to the PoO it is implementation
 500            dependent if bits 32-39 remain unchanged or are set to
 501            zeros.  Choose the former so that the function can also be
 502            used for TRT.  */
 503            env->regs[reg] = deposit64(env->regs[reg], 0, 24, address);
 504        } else {
 505            /* 31-Bit mode. According to the PoO it is implementation
 506            dependent if bit 32 remains unchanged or is set to zero.
 507            Choose the latter so that the function can also be used for
 508            TRT.  */
 509            address &= 0x7fffffff;
 510            env->regs[reg] = deposit64(env->regs[reg], 0, 32, address);
 511        }
 512    }
 513}
 514
 515static inline uint64_t wrap_length(CPUS390XState *env, uint64_t length)
 516{
 517    if (!(env->psw.mask & PSW_MASK_64)) {
 518        /* 24-Bit and 31-Bit mode */
 519        length &= 0x7fffffff;
 520    }
 521    return length;
 522}
 523
 524static inline uint64_t get_length(CPUS390XState *env, int reg)
 525{
 526    return wrap_length(env, env->regs[reg]);
 527}
 528
 529static inline void set_length(CPUS390XState *env, int reg, uint64_t length)
 530{
 531    if (env->psw.mask & PSW_MASK_64) {
 532        /* 64-Bit mode */
 533        env->regs[reg] = length;
 534    } else {
 535        /* 24-Bit and 31-Bit mode */
 536        env->regs[reg] = deposit64(env->regs[reg], 0, 32, length);
 537    }
 538}
 539
 540/* search string (c is byte to search, r2 is string, r1 end of string) */
 541void HELPER(srst)(CPUS390XState *env, uint32_t r1, uint32_t r2)
 542{
 543    uintptr_t ra = GETPC();
 544    uint64_t end, str;
 545    uint32_t len;
 546    uint8_t v, c = env->regs[0];
 547
 548    /* Bits 32-55 must contain all 0.  */
 549    if (env->regs[0] & 0xffffff00u) {
 550        cpu_restore_state(ENV_GET_CPU(env), ra);
 551        program_interrupt(env, PGM_SPECIFICATION, 6);
 552    }
 553
 554    str = get_address(env, r2);
 555    end = get_address(env, r1);
 556
 557    /* Lest we fail to service interrupts in a timely manner, limit the
 558       amount of work we're willing to do.  For now, let's cap at 8k.  */
 559    for (len = 0; len < 0x2000; ++len) {
 560        if (str + len == end) {
 561            /* Character not found.  R1 & R2 are unmodified.  */
 562            env->cc_op = 2;
 563            return;
 564        }
 565        v = cpu_ldub_data_ra(env, str + len, ra);
 566        if (v == c) {
 567            /* Character found.  Set R1 to the location; R2 is unmodified.  */
 568            env->cc_op = 1;
 569            set_address(env, r1, str + len);
 570            return;
 571        }
 572    }
 573
 574    /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
 575    env->cc_op = 3;
 576    set_address(env, r2, str + len);
 577}
 578
 579void HELPER(srstu)(CPUS390XState *env, uint32_t r1, uint32_t r2)
 580{
 581    uintptr_t ra = GETPC();
 582    uint32_t len;
 583    uint16_t v, c = env->regs[0];
 584    uint64_t end, str, adj_end;
 585
 586    /* Bits 32-47 of R0 must be zero.  */
 587    if (env->regs[0] & 0xffff0000u) {
 588        cpu_restore_state(ENV_GET_CPU(env), ra);
 589        program_interrupt(env, PGM_SPECIFICATION, 6);
 590    }
 591
 592    str = get_address(env, r2);
 593    end = get_address(env, r1);
 594
 595    /* If the LSB of the two addresses differ, use one extra byte.  */
 596    adj_end = end + ((str ^ end) & 1);
 597
 598    /* Lest we fail to service interrupts in a timely manner, limit the
 599       amount of work we're willing to do.  For now, let's cap at 8k.  */
 600    for (len = 0; len < 0x2000; len += 2) {
 601        if (str + len == adj_end) {
 602            /* End of input found.  */
 603            env->cc_op = 2;
 604            return;
 605        }
 606        v = cpu_lduw_data_ra(env, str + len, ra);
 607        if (v == c) {
 608            /* Character found.  Set R1 to the location; R2 is unmodified.  */
 609            env->cc_op = 1;
 610            set_address(env, r1, str + len);
 611            return;
 612        }
 613    }
 614
 615    /* CPU-determined bytes processed.  Advance R2 to next byte to process.  */
 616    env->cc_op = 3;
 617    set_address(env, r2, str + len);
 618}
 619
 620/* unsigned string compare (c is string terminator) */
 621uint64_t HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t s2)
 622{
 623    uintptr_t ra = GETPC();
 624    uint32_t len;
 625
 626    c = c & 0xff;
 627    s1 = wrap_address(env, s1);
 628    s2 = wrap_address(env, s2);
 629
 630    /* Lest we fail to service interrupts in a timely manner, limit the
 631       amount of work we're willing to do.  For now, let's cap at 8k.  */
 632    for (len = 0; len < 0x2000; ++len) {
 633        uint8_t v1 = cpu_ldub_data_ra(env, s1 + len, ra);
 634        uint8_t v2 = cpu_ldub_data_ra(env, s2 + len, ra);
 635        if (v1 == v2) {
 636            if (v1 == c) {
 637                /* Equal.  CC=0, and don't advance the registers.  */
 638                env->cc_op = 0;
 639                env->retxl = s2;
 640                return s1;
 641            }
 642        } else {
 643            /* Unequal.  CC={1,2}, and advance the registers.  Note that
 644               the terminator need not be zero, but the string that contains
 645               the terminator is by definition "low".  */
 646            env->cc_op = (v1 == c ? 1 : v2 == c ? 2 : v1 < v2 ? 1 : 2);
 647            env->retxl = s2 + len;
 648            return s1 + len;
 649        }
 650    }
 651
 652    /* CPU-determined bytes equal; advance the registers.  */
 653    env->cc_op = 3;
 654    env->retxl = s2 + len;
 655    return s1 + len;
 656}
 657
 658/* move page */
 659uint32_t HELPER(mvpg)(CPUS390XState *env, uint64_t r0, uint64_t r1, uint64_t r2)
 660{
 661    /* ??? missing r0 handling, which includes access keys, but more
 662       importantly optional suppression of the exception!  */
 663    fast_memmove(env, r1, r2, TARGET_PAGE_SIZE, GETPC());
 664    return 0; /* data moved */
 665}
 666
 667/* string copy (c is string terminator) */
 668uint64_t HELPER(mvst)(CPUS390XState *env, uint64_t c, uint64_t d, uint64_t s)
 669{
 670    uintptr_t ra = GETPC();
 671    uint32_t len;
 672
 673    c = c & 0xff;
 674    d = wrap_address(env, d);
 675    s = wrap_address(env, s);
 676
 677    /* Lest we fail to service interrupts in a timely manner, limit the
 678       amount of work we're willing to do.  For now, let's cap at 8k.  */
 679    for (len = 0; len < 0x2000; ++len) {
 680        uint8_t v = cpu_ldub_data_ra(env, s + len, ra);
 681        cpu_stb_data_ra(env, d + len, v, ra);
 682        if (v == c) {
 683            /* Complete.  Set CC=1 and advance R1.  */
 684            env->cc_op = 1;
 685            env->retxl = s;
 686            return d + len;
 687        }
 688    }
 689
 690    /* Incomplete.  Set CC=3 and signal to advance R1 and R2.  */
 691    env->cc_op = 3;
 692    env->retxl = s + len;
 693    return d + len;
 694}
 695
 696/* load access registers r1 to r3 from memory at a2 */
 697void HELPER(lam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
 698{
 699    uintptr_t ra = GETPC();
 700    int i;
 701
 702    for (i = r1;; i = (i + 1) % 16) {
 703        env->aregs[i] = cpu_ldl_data_ra(env, a2, ra);
 704        a2 += 4;
 705
 706        if (i == r3) {
 707            break;
 708        }
 709    }
 710}
 711
 712/* store access registers r1 to r3 in memory at a2 */
 713void HELPER(stam)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
 714{
 715    uintptr_t ra = GETPC();
 716    int i;
 717
 718    for (i = r1;; i = (i + 1) % 16) {
 719        cpu_stl_data_ra(env, a2, env->aregs[i], ra);
 720        a2 += 4;
 721
 722        if (i == r3) {
 723            break;
 724        }
 725    }
 726}
 727
 728/* move long helper */
 729static inline uint32_t do_mvcl(CPUS390XState *env,
 730                               uint64_t *dest, uint64_t *destlen,
 731                               uint64_t *src, uint64_t *srclen,
 732                               uint16_t pad, int wordsize, uintptr_t ra)
 733{
 734    uint64_t len = MIN(*srclen, *destlen);
 735    uint32_t cc;
 736
 737    if (*destlen == *srclen) {
 738        cc = 0;
 739    } else if (*destlen < *srclen) {
 740        cc = 1;
 741    } else {
 742        cc = 2;
 743    }
 744
 745    /* Copy the src array */
 746    fast_memmove(env, *dest, *src, len, ra);
 747    *src += len;
 748    *srclen -= len;
 749    *dest += len;
 750    *destlen -= len;
 751
 752    /* Pad the remaining area */
 753    if (wordsize == 1) {
 754        fast_memset(env, *dest, pad, *destlen, ra);
 755        *dest += *destlen;
 756        *destlen = 0;
 757    } else {
 758        /* If remaining length is odd, pad with odd byte first.  */
 759        if (*destlen & 1) {
 760            cpu_stb_data_ra(env, *dest, pad & 0xff, ra);
 761            *dest += 1;
 762            *destlen -= 1;
 763        }
 764        /* The remaining length is even, pad using words.  */
 765        for (; *destlen; *dest += 2, *destlen -= 2) {
 766            cpu_stw_data_ra(env, *dest, pad, ra);
 767        }
 768    }
 769
 770    return cc;
 771}
 772
 773/* move long */
 774uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
 775{
 776    uintptr_t ra = GETPC();
 777    uint64_t destlen = env->regs[r1 + 1] & 0xffffff;
 778    uint64_t dest = get_address(env, r1);
 779    uint64_t srclen = env->regs[r2 + 1] & 0xffffff;
 780    uint64_t src = get_address(env, r2);
 781    uint8_t pad = env->regs[r2 + 1] >> 24;
 782    uint32_t cc;
 783
 784    cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 1, ra);
 785
 786    env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, destlen);
 787    env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen);
 788    set_address(env, r1, dest);
 789    set_address(env, r2, src);
 790
 791    return cc;
 792}
 793
 794/* move long extended */
 795uint32_t HELPER(mvcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
 796                       uint32_t r3)
 797{
 798    uintptr_t ra = GETPC();
 799    uint64_t destlen = get_length(env, r1 + 1);
 800    uint64_t dest = get_address(env, r1);
 801    uint64_t srclen = get_length(env, r3 + 1);
 802    uint64_t src = get_address(env, r3);
 803    uint8_t pad = a2;
 804    uint32_t cc;
 805
 806    cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 1, ra);
 807
 808    set_length(env, r1 + 1, destlen);
 809    set_length(env, r3 + 1, srclen);
 810    set_address(env, r1, dest);
 811    set_address(env, r3, src);
 812
 813    return cc;
 814}
 815
 816/* move long unicode */
 817uint32_t HELPER(mvclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
 818                       uint32_t r3)
 819{
 820    uintptr_t ra = GETPC();
 821    uint64_t destlen = get_length(env, r1 + 1);
 822    uint64_t dest = get_address(env, r1);
 823    uint64_t srclen = get_length(env, r3 + 1);
 824    uint64_t src = get_address(env, r3);
 825    uint16_t pad = a2;
 826    uint32_t cc;
 827
 828    cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, 2, ra);
 829
 830    set_length(env, r1 + 1, destlen);
 831    set_length(env, r3 + 1, srclen);
 832    set_address(env, r1, dest);
 833    set_address(env, r3, src);
 834
 835    return cc;
 836}
 837
 838/* compare logical long helper */
 839static inline uint32_t do_clcl(CPUS390XState *env,
 840                               uint64_t *src1, uint64_t *src1len,
 841                               uint64_t *src3, uint64_t *src3len,
 842                               uint16_t pad, uint64_t limit,
 843                               int wordsize, uintptr_t ra)
 844{
 845    uint64_t len = MAX(*src1len, *src3len);
 846    uint32_t cc = 0;
 847
 848    check_alignment(env, *src1len | *src3len, wordsize, ra);
 849
 850    if (!len) {
 851        return cc;
 852    }
 853
 854    /* Lest we fail to service interrupts in a timely manner, limit the
 855       amount of work we're willing to do.  */
 856    if (len > limit) {
 857        len = limit;
 858        cc = 3;
 859    }
 860
 861    for (; len; len -= wordsize) {
 862        uint16_t v1 = pad;
 863        uint16_t v3 = pad;
 864
 865        if (*src1len) {
 866            v1 = cpu_ldusize_data_ra(env, *src1, wordsize, ra);
 867        }
 868        if (*src3len) {
 869            v3 = cpu_ldusize_data_ra(env, *src3, wordsize, ra);
 870        }
 871
 872        if (v1 != v3) {
 873            cc = (v1 < v3) ? 1 : 2;
 874            break;
 875        }
 876
 877        if (*src1len) {
 878            *src1 += wordsize;
 879            *src1len -= wordsize;
 880        }
 881        if (*src3len) {
 882            *src3 += wordsize;
 883            *src3len -= wordsize;
 884        }
 885    }
 886
 887    return cc;
 888}
 889
 890
 891/* compare logical long */
 892uint32_t HELPER(clcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
 893{
 894    uintptr_t ra = GETPC();
 895    uint64_t src1len = extract64(env->regs[r1 + 1], 0, 24);
 896    uint64_t src1 = get_address(env, r1);
 897    uint64_t src3len = extract64(env->regs[r2 + 1], 0, 24);
 898    uint64_t src3 = get_address(env, r2);
 899    uint8_t pad = env->regs[r2 + 1] >> 24;
 900    uint32_t cc;
 901
 902    cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, -1, 1, ra);
 903
 904    env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, src1len);
 905    env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, src3len);
 906    set_address(env, r1, src1);
 907    set_address(env, r2, src3);
 908
 909    return cc;
 910}
 911
 912/* compare logical long extended memcompare insn with padding */
 913uint32_t HELPER(clcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
 914                       uint32_t r3)
 915{
 916    uintptr_t ra = GETPC();
 917    uint64_t src1len = get_length(env, r1 + 1);
 918    uint64_t src1 = get_address(env, r1);
 919    uint64_t src3len = get_length(env, r3 + 1);
 920    uint64_t src3 = get_address(env, r3);
 921    uint8_t pad = a2;
 922    uint32_t cc;
 923
 924    cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x2000, 1, ra);
 925
 926    set_length(env, r1 + 1, src1len);
 927    set_length(env, r3 + 1, src3len);
 928    set_address(env, r1, src1);
 929    set_address(env, r3, src3);
 930
 931    return cc;
 932}
 933
 934/* compare logical long unicode memcompare insn with padding */
 935uint32_t HELPER(clclu)(CPUS390XState *env, uint32_t r1, uint64_t a2,
 936                       uint32_t r3)
 937{
 938    uintptr_t ra = GETPC();
 939    uint64_t src1len = get_length(env, r1 + 1);
 940    uint64_t src1 = get_address(env, r1);
 941    uint64_t src3len = get_length(env, r3 + 1);
 942    uint64_t src3 = get_address(env, r3);
 943    uint16_t pad = a2;
 944    uint32_t cc = 0;
 945
 946    cc = do_clcl(env, &src1, &src1len, &src3, &src3len, pad, 0x1000, 2, ra);
 947
 948    set_length(env, r1 + 1, src1len);
 949    set_length(env, r3 + 1, src3len);
 950    set_address(env, r1, src1);
 951    set_address(env, r3, src3);
 952
 953    return cc;
 954}
 955
 956/* checksum */
 957uint64_t HELPER(cksm)(CPUS390XState *env, uint64_t r1,
 958                      uint64_t src, uint64_t src_len)
 959{
 960    uintptr_t ra = GETPC();
 961    uint64_t max_len, len;
 962    uint64_t cksm = (uint32_t)r1;
 963
 964    /* Lest we fail to service interrupts in a timely manner, limit the
 965       amount of work we're willing to do.  For now, let's cap at 8k.  */
 966    max_len = (src_len > 0x2000 ? 0x2000 : src_len);
 967
 968    /* Process full words as available.  */
 969    for (len = 0; len + 4 <= max_len; len += 4, src += 4) {
 970        cksm += (uint32_t)cpu_ldl_data_ra(env, src, ra);
 971    }
 972
 973    switch (max_len - len) {
 974    case 1:
 975        cksm += cpu_ldub_data_ra(env, src, ra) << 24;
 976        len += 1;
 977        break;
 978    case 2:
 979        cksm += cpu_lduw_data_ra(env, src, ra) << 16;
 980        len += 2;
 981        break;
 982    case 3:
 983        cksm += cpu_lduw_data_ra(env, src, ra) << 16;
 984        cksm += cpu_ldub_data_ra(env, src + 2, ra) << 8;
 985        len += 3;
 986        break;
 987    }
 988
 989    /* Fold the carry from the checksum.  Note that we can see carry-out
 990       during folding more than once (but probably not more than twice).  */
 991    while (cksm > 0xffffffffull) {
 992        cksm = (uint32_t)cksm + (cksm >> 32);
 993    }
 994
 995    /* Indicate whether or not we've processed everything.  */
 996    env->cc_op = (len == src_len ? 0 : 3);
 997
 998    /* Return both cksm and processed length.  */
 999    env->retxl = cksm;
1000    return len;
1001}
1002
1003void HELPER(pack)(CPUS390XState *env, uint32_t len, uint64_t dest, uint64_t src)
1004{
1005    uintptr_t ra = GETPC();
1006    int len_dest = len >> 4;
1007    int len_src = len & 0xf;
1008    uint8_t b;
1009
1010    dest += len_dest;
1011    src += len_src;
1012
1013    /* last byte is special, it only flips the nibbles */
1014    b = cpu_ldub_data_ra(env, src, ra);
1015    cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1016    src--;
1017    len_src--;
1018
1019    /* now pack every value */
1020    while (len_dest >= 0) {
1021        b = 0;
1022
1023        if (len_src > 0) {
1024            b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1025            src--;
1026            len_src--;
1027        }
1028        if (len_src > 0) {
1029            b |= cpu_ldub_data_ra(env, src, ra) << 4;
1030            src--;
1031            len_src--;
1032        }
1033
1034        len_dest--;
1035        dest--;
1036        cpu_stb_data_ra(env, dest, b, ra);
1037    }
1038}
1039
1040static inline void do_pkau(CPUS390XState *env, uint64_t dest, uint64_t src,
1041                           uint32_t srclen, int ssize, uintptr_t ra)
1042{
1043    int i;
1044    /* The destination operand is always 16 bytes long.  */
1045    const int destlen = 16;
1046
1047    /* The operands are processed from right to left.  */
1048    src += srclen - 1;
1049    dest += destlen - 1;
1050
1051    for (i = 0; i < destlen; i++) {
1052        uint8_t b = 0;
1053
1054        /* Start with a positive sign */
1055        if (i == 0) {
1056            b = 0xc;
1057        } else if (srclen > ssize) {
1058            b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
1059            src -= ssize;
1060            srclen -= ssize;
1061        }
1062
1063        if (srclen > ssize) {
1064            b |= cpu_ldub_data_ra(env, src, ra) << 4;
1065            src -= ssize;
1066            srclen -= ssize;
1067        }
1068
1069        cpu_stb_data_ra(env, dest, b, ra);
1070        dest--;
1071    }
1072}
1073
1074
1075void HELPER(pka)(CPUS390XState *env, uint64_t dest, uint64_t src,
1076                 uint32_t srclen)
1077{
1078    do_pkau(env, dest, src, srclen, 1, GETPC());
1079}
1080
1081void HELPER(pku)(CPUS390XState *env, uint64_t dest, uint64_t src,
1082                 uint32_t srclen)
1083{
1084    do_pkau(env, dest, src, srclen, 2, GETPC());
1085}
1086
1087void HELPER(unpk)(CPUS390XState *env, uint32_t len, uint64_t dest,
1088                  uint64_t src)
1089{
1090    uintptr_t ra = GETPC();
1091    int len_dest = len >> 4;
1092    int len_src = len & 0xf;
1093    uint8_t b;
1094    int second_nibble = 0;
1095
1096    dest += len_dest;
1097    src += len_src;
1098
1099    /* last byte is special, it only flips the nibbles */
1100    b = cpu_ldub_data_ra(env, src, ra);
1101    cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
1102    src--;
1103    len_src--;
1104
1105    /* now pad every nibble with 0xf0 */
1106
1107    while (len_dest > 0) {
1108        uint8_t cur_byte = 0;
1109
1110        if (len_src > 0) {
1111            cur_byte = cpu_ldub_data_ra(env, src, ra);
1112        }
1113
1114        len_dest--;
1115        dest--;
1116
1117        /* only advance one nibble at a time */
1118        if (second_nibble) {
1119            cur_byte >>= 4;
1120            len_src--;
1121            src--;
1122        }
1123        second_nibble = !second_nibble;
1124
1125        /* digit */
1126        cur_byte = (cur_byte & 0xf);
1127        /* zone bits */
1128        cur_byte |= 0xf0;
1129
1130        cpu_stb_data_ra(env, dest, cur_byte, ra);
1131    }
1132}
1133
1134static inline uint32_t do_unpkau(CPUS390XState *env, uint64_t dest,
1135                                 uint32_t destlen, int dsize, uint64_t src,
1136                                 uintptr_t ra)
1137{
1138    int i;
1139    uint32_t cc;
1140    uint8_t b;
1141    /* The source operand is always 16 bytes long.  */
1142    const int srclen = 16;
1143
1144    /* The operands are processed from right to left.  */
1145    src += srclen - 1;
1146    dest += destlen - dsize;
1147
1148    /* Check for the sign.  */
1149    b = cpu_ldub_data_ra(env, src, ra);
1150    src--;
1151    switch (b & 0xf) {
1152    case 0xa:
1153    case 0xc:
1154    case 0xe ... 0xf:
1155        cc = 0;  /* plus */
1156        break;
1157    case 0xb:
1158    case 0xd:
1159        cc = 1;  /* minus */
1160        break;
1161    default:
1162    case 0x0 ... 0x9:
1163        cc = 3;  /* invalid */
1164        break;
1165    }
1166
1167    /* Now pad every nibble with 0x30, advancing one nibble at a time. */
1168    for (i = 0; i < destlen; i += dsize) {
1169        if (i == (31 * dsize)) {
1170            /* If length is 32/64 bytes, the leftmost byte is 0. */
1171            b = 0;
1172        } else if (i % (2 * dsize)) {
1173            b = cpu_ldub_data_ra(env, src, ra);
1174            src--;
1175        } else {
1176            b >>= 4;
1177        }
1178        cpu_stsize_data_ra(env, dest, 0x30 + (b & 0xf), dsize, ra);
1179        dest -= dsize;
1180    }
1181
1182    return cc;
1183}
1184
1185uint32_t HELPER(unpka)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1186                       uint64_t src)
1187{
1188    return do_unpkau(env, dest, destlen, 1, src, GETPC());
1189}
1190
1191uint32_t HELPER(unpku)(CPUS390XState *env, uint64_t dest, uint32_t destlen,
1192                       uint64_t src)
1193{
1194    return do_unpkau(env, dest, destlen, 2, src, GETPC());
1195}
1196
1197uint32_t HELPER(tp)(CPUS390XState *env, uint64_t dest, uint32_t destlen)
1198{
1199    uintptr_t ra = GETPC();
1200    uint32_t cc = 0;
1201    int i;
1202
1203    for (i = 0; i < destlen; i++) {
1204        uint8_t b = cpu_ldub_data_ra(env, dest + i, ra);
1205        /* digit */
1206        cc |= (b & 0xf0) > 0x90 ? 2 : 0;
1207
1208        if (i == (destlen - 1)) {
1209            /* sign */
1210            cc |= (b & 0xf) < 0xa ? 1 : 0;
1211        } else {
1212            /* digit */
1213            cc |= (b & 0xf) > 0x9 ? 2 : 0;
1214        }
1215    }
1216
1217    return cc;
1218}
1219
1220static uint32_t do_helper_tr(CPUS390XState *env, uint32_t len, uint64_t array,
1221                             uint64_t trans, uintptr_t ra)
1222{
1223    uint32_t i;
1224
1225    for (i = 0; i <= len; i++) {
1226        uint8_t byte = cpu_ldub_data_ra(env, array + i, ra);
1227        uint8_t new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1228        cpu_stb_data_ra(env, array + i, new_byte, ra);
1229    }
1230
1231    return env->cc_op;
1232}
1233
1234void HELPER(tr)(CPUS390XState *env, uint32_t len, uint64_t array,
1235                uint64_t trans)
1236{
1237    do_helper_tr(env, len, array, trans, GETPC());
1238}
1239
1240uint64_t HELPER(tre)(CPUS390XState *env, uint64_t array,
1241                     uint64_t len, uint64_t trans)
1242{
1243    uintptr_t ra = GETPC();
1244    uint8_t end = env->regs[0] & 0xff;
1245    uint64_t l = len;
1246    uint64_t i;
1247    uint32_t cc = 0;
1248
1249    if (!(env->psw.mask & PSW_MASK_64)) {
1250        array &= 0x7fffffff;
1251        l = (uint32_t)l;
1252    }
1253
1254    /* Lest we fail to service interrupts in a timely manner, limit the
1255       amount of work we're willing to do.  For now, let's cap at 8k.  */
1256    if (l > 0x2000) {
1257        l = 0x2000;
1258        cc = 3;
1259    }
1260
1261    for (i = 0; i < l; i++) {
1262        uint8_t byte, new_byte;
1263
1264        byte = cpu_ldub_data_ra(env, array + i, ra);
1265
1266        if (byte == end) {
1267            cc = 1;
1268            break;
1269        }
1270
1271        new_byte = cpu_ldub_data_ra(env, trans + byte, ra);
1272        cpu_stb_data_ra(env, array + i, new_byte, ra);
1273    }
1274
1275    env->cc_op = cc;
1276    env->retxl = len - i;
1277    return array + i;
1278}
1279
1280static inline uint32_t do_helper_trt(CPUS390XState *env, int len,
1281                                     uint64_t array, uint64_t trans,
1282                                     int inc, uintptr_t ra)
1283{
1284    int i;
1285
1286    for (i = 0; i <= len; i++) {
1287        uint8_t byte = cpu_ldub_data_ra(env, array + i * inc, ra);
1288        uint8_t sbyte = cpu_ldub_data_ra(env, trans + byte, ra);
1289
1290        if (sbyte != 0) {
1291            set_address(env, 1, array + i * inc);
1292            env->regs[2] = deposit64(env->regs[2], 0, 8, sbyte);
1293            return (i == len) ? 2 : 1;
1294        }
1295    }
1296
1297    return 0;
1298}
1299
1300uint32_t HELPER(trt)(CPUS390XState *env, uint32_t len, uint64_t array,
1301                     uint64_t trans)
1302{
1303    return do_helper_trt(env, len, array, trans, 1, GETPC());
1304}
1305
1306uint32_t HELPER(trtr)(CPUS390XState *env, uint32_t len, uint64_t array,
1307                      uint64_t trans)
1308{
1309    return do_helper_trt(env, len, array, trans, -1, GETPC());
1310}
1311
1312/* Translate one/two to one/two */
1313uint32_t HELPER(trXX)(CPUS390XState *env, uint32_t r1, uint32_t r2,
1314                      uint32_t tst, uint32_t sizes)
1315{
1316    uintptr_t ra = GETPC();
1317    int dsize = (sizes & 1) ? 1 : 2;
1318    int ssize = (sizes & 2) ? 1 : 2;
1319    uint64_t tbl = get_address(env, 1);
1320    uint64_t dst = get_address(env, r1);
1321    uint64_t len = get_length(env, r1 + 1);
1322    uint64_t src = get_address(env, r2);
1323    uint32_t cc = 3;
1324    int i;
1325
1326    /* The lower address bits of TBL are ignored.  For TROO, TROT, it's
1327       the low 3 bits (double-word aligned).  For TRTO, TRTT, it's either
1328       the low 12 bits (4K, without ETF2-ENH) or 3 bits (with ETF2-ENH).  */
1329    if (ssize == 2 && !s390_has_feat(S390_FEAT_ETF2_ENH)) {
1330        tbl &= -4096;
1331    } else {
1332        tbl &= -8;
1333    }
1334
1335    check_alignment(env, len, ssize, ra);
1336
1337    /* Lest we fail to service interrupts in a timely manner, */
1338    /* limit the amount of work we're willing to do.   */
1339    for (i = 0; i < 0x2000; i++) {
1340        uint16_t sval = cpu_ldusize_data_ra(env, src, ssize, ra);
1341        uint64_t tble = tbl + (sval * dsize);
1342        uint16_t dval = cpu_ldusize_data_ra(env, tble, dsize, ra);
1343        if (dval == tst) {
1344            cc = 1;
1345            break;
1346        }
1347        cpu_stsize_data_ra(env, dst, dval, dsize, ra);
1348
1349        len -= ssize;
1350        src += ssize;
1351        dst += dsize;
1352
1353        if (len == 0) {
1354            cc = 0;
1355            break;
1356        }
1357    }
1358
1359    set_address(env, r1, dst);
1360    set_length(env, r1 + 1, len);
1361    set_address(env, r2, src);
1362
1363    return cc;
1364}
1365
1366void HELPER(cdsg)(CPUS390XState *env, uint64_t addr,
1367                  uint32_t r1, uint32_t r3)
1368{
1369    uintptr_t ra = GETPC();
1370    Int128 cmpv = int128_make128(env->regs[r1 + 1], env->regs[r1]);
1371    Int128 newv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1372    Int128 oldv;
1373    bool fail;
1374
1375    if (parallel_cpus) {
1376#ifndef CONFIG_ATOMIC128
1377        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
1378#else
1379        int mem_idx = cpu_mmu_index(env, false);
1380        TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
1381        oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
1382        fail = !int128_eq(oldv, cmpv);
1383#endif
1384    } else {
1385        uint64_t oldh, oldl;
1386
1387        check_alignment(env, addr, 16, ra);
1388
1389        oldh = cpu_ldq_data_ra(env, addr + 0, ra);
1390        oldl = cpu_ldq_data_ra(env, addr + 8, ra);
1391
1392        oldv = int128_make128(oldl, oldh);
1393        fail = !int128_eq(oldv, cmpv);
1394        if (fail) {
1395            newv = oldv;
1396        }
1397
1398        cpu_stq_data_ra(env, addr + 0, int128_gethi(newv), ra);
1399        cpu_stq_data_ra(env, addr + 8, int128_getlo(newv), ra);
1400    }
1401
1402    env->cc_op = fail;
1403    env->regs[r1] = int128_gethi(oldv);
1404    env->regs[r1 + 1] = int128_getlo(oldv);
1405}
1406
1407uint32_t HELPER(csst)(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t a2)
1408{
1409#if !defined(CONFIG_USER_ONLY) || defined(CONFIG_ATOMIC128)
1410    uint32_t mem_idx = cpu_mmu_index(env, false);
1411#endif
1412    uintptr_t ra = GETPC();
1413    uint32_t fc = extract32(env->regs[0], 0, 8);
1414    uint32_t sc = extract32(env->regs[0], 8, 8);
1415    uint64_t pl = get_address(env, 1) & -16;
1416    uint64_t svh, svl;
1417    uint32_t cc;
1418
1419    /* Sanity check the function code and storage characteristic.  */
1420    if (fc > 1 || sc > 3) {
1421        if (!s390_has_feat(S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2)) {
1422            goto spec_exception;
1423        }
1424        if (fc > 2 || sc > 4 || (fc == 2 && (r3 & 1))) {
1425            goto spec_exception;
1426        }
1427    }
1428
1429    /* Sanity check the alignments.  */
1430    if (extract32(a1, 0, 4 << fc) || extract32(a2, 0, 1 << sc)) {
1431        goto spec_exception;
1432    }
1433
1434    /* Sanity check writability of the store address.  */
1435#ifndef CONFIG_USER_ONLY
1436    probe_write(env, a2, mem_idx, ra);
1437#endif
1438
1439    /* Note that the compare-and-swap is atomic, and the store is atomic, but
1440       the complete operation is not.  Therefore we do not need to assert serial
1441       context in order to implement this.  That said, restart early if we can't
1442       support either operation that is supposed to be atomic.  */
1443    if (parallel_cpus) {
1444        int mask = 0;
1445#if !defined(CONFIG_ATOMIC64)
1446        mask = -8;
1447#elif !defined(CONFIG_ATOMIC128)
1448        mask = -16;
1449#endif
1450        if (((4 << fc) | (1 << sc)) & mask) {
1451            cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
1452        }
1453    }
1454
1455    /* All loads happen before all stores.  For simplicity, load the entire
1456       store value area from the parameter list.  */
1457    svh = cpu_ldq_data_ra(env, pl + 16, ra);
1458    svl = cpu_ldq_data_ra(env, pl + 24, ra);
1459
1460    switch (fc) {
1461    case 0:
1462        {
1463            uint32_t nv = cpu_ldl_data_ra(env, pl, ra);
1464            uint32_t cv = env->regs[r3];
1465            uint32_t ov;
1466
1467            if (parallel_cpus) {
1468#ifdef CONFIG_USER_ONLY
1469                uint32_t *haddr = g2h(a1);
1470                ov = atomic_cmpxchg__nocheck(haddr, cv, nv);
1471#else
1472                TCGMemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mem_idx);
1473                ov = helper_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi, ra);
1474#endif
1475            } else {
1476                ov = cpu_ldl_data_ra(env, a1, ra);
1477                cpu_stl_data_ra(env, a1, (ov == cv ? nv : ov), ra);
1478            }
1479            cc = (ov != cv);
1480            env->regs[r3] = deposit64(env->regs[r3], 32, 32, ov);
1481        }
1482        break;
1483
1484    case 1:
1485        {
1486            uint64_t nv = cpu_ldq_data_ra(env, pl, ra);
1487            uint64_t cv = env->regs[r3];
1488            uint64_t ov;
1489
1490            if (parallel_cpus) {
1491#ifdef CONFIG_ATOMIC64
1492# ifdef CONFIG_USER_ONLY
1493                uint64_t *haddr = g2h(a1);
1494                ov = atomic_cmpxchg__nocheck(haddr, cv, nv);
1495# else
1496                TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN, mem_idx);
1497                ov = helper_atomic_cmpxchgq_be_mmu(env, a1, cv, nv, oi, ra);
1498# endif
1499#else
1500                /* Note that we asserted !parallel_cpus above.  */
1501                g_assert_not_reached();
1502#endif
1503            } else {
1504                ov = cpu_ldq_data_ra(env, a1, ra);
1505                cpu_stq_data_ra(env, a1, (ov == cv ? nv : ov), ra);
1506            }
1507            cc = (ov != cv);
1508            env->regs[r3] = ov;
1509        }
1510        break;
1511
1512    case 2:
1513        {
1514            uint64_t nvh = cpu_ldq_data_ra(env, pl, ra);
1515            uint64_t nvl = cpu_ldq_data_ra(env, pl + 8, ra);
1516            Int128 nv = int128_make128(nvl, nvh);
1517            Int128 cv = int128_make128(env->regs[r3 + 1], env->regs[r3]);
1518            Int128 ov;
1519
1520            if (parallel_cpus) {
1521#ifdef CONFIG_ATOMIC128
1522                TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
1523                ov = helper_atomic_cmpxchgo_be_mmu(env, a1, cv, nv, oi, ra);
1524                cc = !int128_eq(ov, cv);
1525#else
1526                /* Note that we asserted !parallel_cpus above.  */
1527                g_assert_not_reached();
1528#endif
1529            } else {
1530                uint64_t oh = cpu_ldq_data_ra(env, a1 + 0, ra);
1531                uint64_t ol = cpu_ldq_data_ra(env, a1 + 8, ra);
1532
1533                ov = int128_make128(ol, oh);
1534                cc = !int128_eq(ov, cv);
1535                if (cc) {
1536                    nv = ov;
1537                }
1538
1539                cpu_stq_data_ra(env, a1 + 0, int128_gethi(nv), ra);
1540                cpu_stq_data_ra(env, a1 + 8, int128_getlo(nv), ra);
1541            }
1542
1543            env->regs[r3 + 0] = int128_gethi(ov);
1544            env->regs[r3 + 1] = int128_getlo(ov);
1545        }
1546        break;
1547
1548    default:
1549        g_assert_not_reached();
1550    }
1551
1552    /* Store only if the comparison succeeded.  Note that above we use a pair
1553       of 64-bit big-endian loads, so for sc < 3 we must extract the value
1554       from the most-significant bits of svh.  */
1555    if (cc == 0) {
1556        switch (sc) {
1557        case 0:
1558            cpu_stb_data_ra(env, a2, svh >> 56, ra);
1559            break;
1560        case 1:
1561            cpu_stw_data_ra(env, a2, svh >> 48, ra);
1562            break;
1563        case 2:
1564            cpu_stl_data_ra(env, a2, svh >> 32, ra);
1565            break;
1566        case 3:
1567            cpu_stq_data_ra(env, a2, svh, ra);
1568            break;
1569        case 4:
1570            if (parallel_cpus) {
1571#ifdef CONFIG_ATOMIC128
1572                TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
1573                Int128 sv = int128_make128(svl, svh);
1574                helper_atomic_sto_be_mmu(env, a2, sv, oi, ra);
1575#else
1576                /* Note that we asserted !parallel_cpus above.  */
1577                g_assert_not_reached();
1578#endif
1579            } else {
1580                cpu_stq_data_ra(env, a2 + 0, svh, ra);
1581                cpu_stq_data_ra(env, a2 + 8, svl, ra);
1582            }
1583            break;
1584        default:
1585            g_assert_not_reached();
1586        }
1587    }
1588
1589    return cc;
1590
1591 spec_exception:
1592    cpu_restore_state(ENV_GET_CPU(env), ra);
1593    program_interrupt(env, PGM_SPECIFICATION, 6);
1594    g_assert_not_reached();
1595}
1596
1597#if !defined(CONFIG_USER_ONLY)
1598void HELPER(lctlg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1599{
1600    uintptr_t ra = GETPC();
1601    S390CPU *cpu = s390_env_get_cpu(env);
1602    bool PERchanged = false;
1603    uint64_t src = a2;
1604    uint32_t i;
1605
1606    for (i = r1;; i = (i + 1) % 16) {
1607        uint64_t val = cpu_ldq_data_ra(env, src, ra);
1608        if (env->cregs[i] != val && i >= 9 && i <= 11) {
1609            PERchanged = true;
1610        }
1611        env->cregs[i] = val;
1612        HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%" PRIx64 "\n",
1613                   i, src, val);
1614        src += sizeof(uint64_t);
1615
1616        if (i == r3) {
1617            break;
1618        }
1619    }
1620
1621    if (PERchanged && env->psw.mask & PSW_MASK_PER) {
1622        s390_cpu_recompute_watchpoints(CPU(cpu));
1623    }
1624
1625    tlb_flush(CPU(cpu));
1626}
1627
1628void HELPER(lctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1629{
1630    uintptr_t ra = GETPC();
1631    S390CPU *cpu = s390_env_get_cpu(env);
1632    bool PERchanged = false;
1633    uint64_t src = a2;
1634    uint32_t i;
1635
1636    for (i = r1;; i = (i + 1) % 16) {
1637        uint32_t val = cpu_ldl_data_ra(env, src, ra);
1638        if ((uint32_t)env->cregs[i] != val && i >= 9 && i <= 11) {
1639            PERchanged = true;
1640        }
1641        env->cregs[i] = deposit64(env->cregs[i], 0, 32, val);
1642        HELPER_LOG("load ctl %d from 0x%" PRIx64 " == 0x%x\n", i, src, val);
1643        src += sizeof(uint32_t);
1644
1645        if (i == r3) {
1646            break;
1647        }
1648    }
1649
1650    if (PERchanged && env->psw.mask & PSW_MASK_PER) {
1651        s390_cpu_recompute_watchpoints(CPU(cpu));
1652    }
1653
1654    tlb_flush(CPU(cpu));
1655}
1656
1657void HELPER(stctg)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1658{
1659    uintptr_t ra = GETPC();
1660    uint64_t dest = a2;
1661    uint32_t i;
1662
1663    for (i = r1;; i = (i + 1) % 16) {
1664        cpu_stq_data_ra(env, dest, env->cregs[i], ra);
1665        dest += sizeof(uint64_t);
1666
1667        if (i == r3) {
1668            break;
1669        }
1670    }
1671}
1672
1673void HELPER(stctl)(CPUS390XState *env, uint32_t r1, uint64_t a2, uint32_t r3)
1674{
1675    uintptr_t ra = GETPC();
1676    uint64_t dest = a2;
1677    uint32_t i;
1678
1679    for (i = r1;; i = (i + 1) % 16) {
1680        cpu_stl_data_ra(env, dest, env->cregs[i], ra);
1681        dest += sizeof(uint32_t);
1682
1683        if (i == r3) {
1684            break;
1685        }
1686    }
1687}
1688
1689uint32_t HELPER(testblock)(CPUS390XState *env, uint64_t real_addr)
1690{
1691    uintptr_t ra = GETPC();
1692    CPUState *cs = CPU(s390_env_get_cpu(env));
1693    uint64_t abs_addr;
1694    int i;
1695
1696    real_addr = wrap_address(env, real_addr);
1697    abs_addr = mmu_real2abs(env, real_addr) & TARGET_PAGE_MASK;
1698    if (!address_space_access_valid(&address_space_memory, abs_addr,
1699                                    TARGET_PAGE_SIZE, true)) {
1700        cpu_restore_state(cs, ra);
1701        program_interrupt(env, PGM_ADDRESSING, 4);
1702        return 1;
1703    }
1704
1705    /* Check low-address protection */
1706    if ((env->cregs[0] & CR0_LOWPROT) && real_addr < 0x2000) {
1707        cpu_restore_state(cs, ra);
1708        program_interrupt(env, PGM_PROTECTION, 4);
1709        return 1;
1710    }
1711
1712    for (i = 0; i < TARGET_PAGE_SIZE; i += 8) {
1713        stq_phys(cs->as, abs_addr + i, 0);
1714    }
1715
1716    return 0;
1717}
1718
1719uint32_t HELPER(tprot)(uint64_t a1, uint64_t a2)
1720{
1721    /* XXX implement */
1722    return 0;
1723}
1724
1725/* insert storage key extended */
1726uint64_t HELPER(iske)(CPUS390XState *env, uint64_t r2)
1727{
1728    static S390SKeysState *ss;
1729    static S390SKeysClass *skeyclass;
1730    uint64_t addr = wrap_address(env, r2);
1731    uint8_t key;
1732
1733    if (addr > ram_size) {
1734        return 0;
1735    }
1736
1737    if (unlikely(!ss)) {
1738        ss = s390_get_skeys_device();
1739        skeyclass = S390_SKEYS_GET_CLASS(ss);
1740    }
1741
1742    if (skeyclass->get_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key)) {
1743        return 0;
1744    }
1745    return key;
1746}
1747
1748/* set storage key extended */
1749void HELPER(sske)(CPUS390XState *env, uint64_t r1, uint64_t r2)
1750{
1751    static S390SKeysState *ss;
1752    static S390SKeysClass *skeyclass;
1753    uint64_t addr = wrap_address(env, r2);
1754    uint8_t key;
1755
1756    if (addr > ram_size) {
1757        return;
1758    }
1759
1760    if (unlikely(!ss)) {
1761        ss = s390_get_skeys_device();
1762        skeyclass = S390_SKEYS_GET_CLASS(ss);
1763    }
1764
1765    key = (uint8_t) r1;
1766    skeyclass->set_skeys(ss, addr / TARGET_PAGE_SIZE, 1, &key);
1767}
1768
1769/* reset reference bit extended */
1770uint32_t HELPER(rrbe)(CPUS390XState *env, uint64_t r2)
1771{
1772    static S390SKeysState *ss;
1773    static S390SKeysClass *skeyclass;
1774    uint8_t re, key;
1775
1776    if (r2 > ram_size) {
1777        return 0;
1778    }
1779
1780    if (unlikely(!ss)) {
1781        ss = s390_get_skeys_device();
1782        skeyclass = S390_SKEYS_GET_CLASS(ss);
1783    }
1784
1785    if (skeyclass->get_skeys(ss, r2 / TARGET_PAGE_SIZE, 1, &key)) {
1786        return 0;
1787    }
1788
1789    re = key & (SK_R | SK_C);
1790    key &= ~SK_R;
1791
1792    if (skeyclass->set_skeys(ss, r2 / TARGET_PAGE_SIZE, 1, &key)) {
1793        return 0;
1794    }
1795
1796    /*
1797     * cc
1798     *
1799     * 0  Reference bit zero; change bit zero
1800     * 1  Reference bit zero; change bit one
1801     * 2  Reference bit one; change bit zero
1802     * 3  Reference bit one; change bit one
1803     */
1804
1805    return re >> 1;
1806}
1807
1808uint32_t HELPER(mvcs)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2)
1809{
1810    uintptr_t ra = GETPC();
1811    int cc = 0, i;
1812
1813    HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
1814               __func__, l, a1, a2);
1815
1816    if (l > 256) {
1817        /* max 256 */
1818        l = 256;
1819        cc = 3;
1820    }
1821
1822    /* XXX replace w/ memcpy */
1823    for (i = 0; i < l; i++) {
1824        uint8_t x = cpu_ldub_primary_ra(env, a2 + i, ra);
1825        cpu_stb_secondary_ra(env, a1 + i, x, ra);
1826    }
1827
1828    return cc;
1829}
1830
1831uint32_t HELPER(mvcp)(CPUS390XState *env, uint64_t l, uint64_t a1, uint64_t a2)
1832{
1833    uintptr_t ra = GETPC();
1834    int cc = 0, i;
1835
1836    HELPER_LOG("%s: %16" PRIx64 " %16" PRIx64 " %16" PRIx64 "\n",
1837               __func__, l, a1, a2);
1838
1839    if (l > 256) {
1840        /* max 256 */
1841        l = 256;
1842        cc = 3;
1843    }
1844
1845    /* XXX replace w/ memcpy */
1846    for (i = 0; i < l; i++) {
1847        uint8_t x = cpu_ldub_secondary_ra(env, a2 + i, ra);
1848        cpu_stb_primary_ra(env, a1 + i, x, ra);
1849    }
1850
1851    return cc;
1852}
1853
1854void HELPER(idte)(CPUS390XState *env, uint64_t r1, uint64_t r2, uint32_t m4)
1855{
1856    CPUState *cs = CPU(s390_env_get_cpu(env));
1857    const uintptr_t ra = GETPC();
1858    uint64_t table, entry, raddr;
1859    uint16_t entries, i, index = 0;
1860
1861    if (r2 & 0xff000) {
1862        cpu_restore_state(cs, ra);
1863        program_interrupt(env, PGM_SPECIFICATION, 4);
1864    }
1865
1866    if (!(r2 & 0x800)) {
1867        /* invalidation-and-clearing operation */
1868        table = r1 & _ASCE_ORIGIN;
1869        entries = (r2 & 0x7ff) + 1;
1870
1871        switch (r1 & _ASCE_TYPE_MASK) {
1872        case _ASCE_TYPE_REGION1:
1873            index = (r2 >> 53) & 0x7ff;
1874            break;
1875        case _ASCE_TYPE_REGION2:
1876            index = (r2 >> 42) & 0x7ff;
1877            break;
1878        case _ASCE_TYPE_REGION3:
1879            index = (r2 >> 31) & 0x7ff;
1880            break;
1881        case _ASCE_TYPE_SEGMENT:
1882            index = (r2 >> 20) & 0x7ff;
1883            break;
1884        }
1885        for (i = 0; i < entries; i++) {
1886            /* addresses are not wrapped in 24/31bit mode but table index is */
1887            raddr = table + ((index + i) & 0x7ff) * sizeof(entry);
1888            entry = ldq_phys(cs->as, raddr);
1889            if (!(entry & _REGION_ENTRY_INV)) {
1890                /* we are allowed to not store if already invalid */
1891                entry |= _REGION_ENTRY_INV;
1892                stq_phys(cs->as, raddr, entry);
1893            }
1894        }
1895    }
1896
1897    /* We simply flush the complete tlb, therefore we can ignore r3. */
1898    if (m4 & 1) {
1899        tlb_flush(cs);
1900    } else {
1901        tlb_flush_all_cpus_synced(cs);
1902    }
1903}
1904
1905/* invalidate pte */
1906void HELPER(ipte)(CPUS390XState *env, uint64_t pto, uint64_t vaddr,
1907                  uint32_t m4)
1908{
1909    CPUState *cs = CPU(s390_env_get_cpu(env));
1910    uint64_t page = vaddr & TARGET_PAGE_MASK;
1911    uint64_t pte_addr, pte;
1912
1913    /* Compute the page table entry address */
1914    pte_addr = (pto & _SEGMENT_ENTRY_ORIGIN);
1915    pte_addr += (vaddr & VADDR_PX) >> 9;
1916
1917    /* Mark the page table entry as invalid */
1918    pte = ldq_phys(cs->as, pte_addr);
1919    pte |= _PAGE_INVALID;
1920    stq_phys(cs->as, pte_addr, pte);
1921
1922    /* XXX we exploit the fact that Linux passes the exact virtual
1923       address here - it's not obliged to! */
1924    if (m4 & 1) {
1925        if (vaddr & ~VADDR_PX) {
1926            tlb_flush_page(cs, page);
1927            /* XXX 31-bit hack */
1928            tlb_flush_page(cs, page ^ 0x80000000);
1929        } else {
1930            /* looks like we don't have a valid virtual address */
1931            tlb_flush(cs);
1932        }
1933    } else {
1934        if (vaddr & ~VADDR_PX) {
1935            tlb_flush_page_all_cpus_synced(cs, page);
1936            /* XXX 31-bit hack */
1937            tlb_flush_page_all_cpus_synced(cs, page ^ 0x80000000);
1938        } else {
1939            /* looks like we don't have a valid virtual address */
1940            tlb_flush_all_cpus_synced(cs);
1941        }
1942    }
1943}
1944
1945/* flush local tlb */
1946void HELPER(ptlb)(CPUS390XState *env)
1947{
1948    S390CPU *cpu = s390_env_get_cpu(env);
1949
1950    tlb_flush(CPU(cpu));
1951}
1952
1953/* flush global tlb */
1954void HELPER(purge)(CPUS390XState *env)
1955{
1956    S390CPU *cpu = s390_env_get_cpu(env);
1957
1958    tlb_flush_all_cpus_synced(CPU(cpu));
1959}
1960
1961/* load using real address */
1962uint64_t HELPER(lura)(CPUS390XState *env, uint64_t addr)
1963{
1964    CPUState *cs = CPU(s390_env_get_cpu(env));
1965
1966    return (uint32_t)ldl_phys(cs->as, wrap_address(env, addr));
1967}
1968
1969uint64_t HELPER(lurag)(CPUS390XState *env, uint64_t addr)
1970{
1971    CPUState *cs = CPU(s390_env_get_cpu(env));
1972
1973    return ldq_phys(cs->as, wrap_address(env, addr));
1974}
1975
1976/* store using real address */
1977void HELPER(stura)(CPUS390XState *env, uint64_t addr, uint64_t v1)
1978{
1979    CPUState *cs = CPU(s390_env_get_cpu(env));
1980
1981    stl_phys(cs->as, wrap_address(env, addr), (uint32_t)v1);
1982
1983    if ((env->psw.mask & PSW_MASK_PER) &&
1984        (env->cregs[9] & PER_CR9_EVENT_STORE) &&
1985        (env->cregs[9] & PER_CR9_EVENT_STORE_REAL)) {
1986        /* PSW is saved just before calling the helper.  */
1987        env->per_address = env->psw.addr;
1988        env->per_perc_atmid = PER_CODE_EVENT_STORE_REAL | get_per_atmid(env);
1989    }
1990}
1991
1992void HELPER(sturg)(CPUS390XState *env, uint64_t addr, uint64_t v1)
1993{
1994    CPUState *cs = CPU(s390_env_get_cpu(env));
1995
1996    stq_phys(cs->as, wrap_address(env, addr), v1);
1997
1998    if ((env->psw.mask & PSW_MASK_PER) &&
1999        (env->cregs[9] & PER_CR9_EVENT_STORE) &&
2000        (env->cregs[9] & PER_CR9_EVENT_STORE_REAL)) {
2001        /* PSW is saved just before calling the helper.  */
2002        env->per_address = env->psw.addr;
2003        env->per_perc_atmid = PER_CODE_EVENT_STORE_REAL | get_per_atmid(env);
2004    }
2005}
2006
2007/* load real address */
2008uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr)
2009{
2010    CPUState *cs = CPU(s390_env_get_cpu(env));
2011    uint32_t cc = 0;
2012    uint64_t asc = env->psw.mask & PSW_MASK_ASC;
2013    uint64_t ret;
2014    int old_exc, flags;
2015
2016    /* XXX incomplete - has more corner cases */
2017    if (!(env->psw.mask & PSW_MASK_64) && (addr >> 32)) {
2018        cpu_restore_state(cs, GETPC());
2019        program_interrupt(env, PGM_SPECIAL_OP, 2);
2020    }
2021
2022    old_exc = cs->exception_index;
2023    if (mmu_translate(env, addr, 0, asc, &ret, &flags, true)) {
2024        cc = 3;
2025    }
2026    if (cs->exception_index == EXCP_PGM) {
2027        ret = env->int_pgm_code | 0x80000000;
2028    } else {
2029        ret |= addr & ~TARGET_PAGE_MASK;
2030    }
2031    cs->exception_index = old_exc;
2032
2033    env->cc_op = cc;
2034    return ret;
2035}
2036#endif
2037
2038/* load pair from quadword */
2039uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr)
2040{
2041    uintptr_t ra = GETPC();
2042    uint64_t hi, lo;
2043
2044    if (parallel_cpus) {
2045#ifndef CONFIG_ATOMIC128
2046        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
2047#else
2048        int mem_idx = cpu_mmu_index(env, false);
2049        TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
2050        Int128 v = helper_atomic_ldo_be_mmu(env, addr, oi, ra);
2051        hi = int128_gethi(v);
2052        lo = int128_getlo(v);
2053#endif
2054    } else {
2055        check_alignment(env, addr, 16, ra);
2056
2057        hi = cpu_ldq_data_ra(env, addr + 0, ra);
2058        lo = cpu_ldq_data_ra(env, addr + 8, ra);
2059    }
2060
2061    env->retxl = lo;
2062    return hi;
2063}
2064
2065/* store pair to quadword */
2066void HELPER(stpq)(CPUS390XState *env, uint64_t addr,
2067                  uint64_t low, uint64_t high)
2068{
2069    uintptr_t ra = GETPC();
2070
2071    if (parallel_cpus) {
2072#ifndef CONFIG_ATOMIC128
2073        cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
2074#else
2075        int mem_idx = cpu_mmu_index(env, false);
2076        TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
2077
2078        Int128 v = int128_make128(low, high);
2079        helper_atomic_sto_be_mmu(env, addr, v, oi, ra);
2080#endif
2081    } else {
2082        check_alignment(env, addr, 16, ra);
2083
2084        cpu_stq_data_ra(env, addr + 0, high, ra);
2085        cpu_stq_data_ra(env, addr + 8, low, ra);
2086    }
2087}
2088
2089/* Execute instruction.  This instruction executes an insn modified with
2090   the contents of r1.  It does not change the executed instruction in memory;
2091   it does not change the program counter.
2092
2093   Perform this by recording the modified instruction in env->ex_value.
2094   This will be noticed by cpu_get_tb_cpu_state and thus tb translation.
2095*/
2096void HELPER(ex)(CPUS390XState *env, uint32_t ilen, uint64_t r1, uint64_t addr)
2097{
2098    uint64_t insn = cpu_lduw_code(env, addr);
2099    uint8_t opc = insn >> 8;
2100
2101    /* Or in the contents of R1[56:63].  */
2102    insn |= r1 & 0xff;
2103
2104    /* Load the rest of the instruction.  */
2105    insn <<= 48;
2106    switch (get_ilen(opc)) {
2107    case 2:
2108        break;
2109    case 4:
2110        insn |= (uint64_t)cpu_lduw_code(env, addr + 2) << 32;
2111        break;
2112    case 6:
2113        insn |= (uint64_t)(uint32_t)cpu_ldl_code(env, addr + 2) << 16;
2114        break;
2115    default:
2116        g_assert_not_reached();
2117    }
2118
2119    /* The very most common cases can be sped up by avoiding a new TB.  */
2120    if ((opc & 0xf0) == 0xd0) {
2121        typedef uint32_t (*dx_helper)(CPUS390XState *, uint32_t, uint64_t,
2122                                      uint64_t, uintptr_t);
2123        static const dx_helper dx[16] = {
2124            [0x2] = do_helper_mvc,
2125            [0x4] = do_helper_nc,
2126            [0x5] = do_helper_clc,
2127            [0x6] = do_helper_oc,
2128            [0x7] = do_helper_xc,
2129            [0xc] = do_helper_tr,
2130        };
2131        dx_helper helper = dx[opc & 0xf];
2132
2133        if (helper) {
2134            uint32_t l = extract64(insn, 48, 8);
2135            uint32_t b1 = extract64(insn, 44, 4);
2136            uint32_t d1 = extract64(insn, 32, 12);
2137            uint32_t b2 = extract64(insn, 28, 4);
2138            uint32_t d2 = extract64(insn, 16, 12);
2139            uint64_t a1 = wrap_address(env, env->regs[b1] + d1);
2140            uint64_t a2 = wrap_address(env, env->regs[b2] + d2);
2141
2142            env->cc_op = helper(env, l, a1, a2, 0);
2143            env->psw.addr += ilen;
2144            return;
2145        }
2146    } else if (opc == 0x0a) {
2147        env->int_svc_code = extract64(insn, 48, 8);
2148        env->int_svc_ilen = ilen;
2149        helper_exception(env, EXCP_SVC);
2150        g_assert_not_reached();
2151    }
2152
2153    /* Record the insn we want to execute as well as the ilen to use
2154       during the execution of the target insn.  This will also ensure
2155       that ex_value is non-zero, which flags that we are in a state
2156       that requires such execution.  */
2157    env->ex_value = insn | ilen;
2158}
2159
2160uint32_t HELPER(mvcos)(CPUS390XState *env, uint64_t dest, uint64_t src,
2161                       uint64_t len)
2162{
2163    const uint8_t psw_key = (env->psw.mask & PSW_MASK_KEY) >> PSW_SHIFT_KEY;
2164    const uint8_t psw_as = (env->psw.mask & PSW_MASK_ASC) >> PSW_SHIFT_ASC;
2165    const uint64_t r0 = env->regs[0];
2166    const uintptr_t ra = GETPC();
2167    CPUState *cs = CPU(s390_env_get_cpu(env));
2168    uint8_t dest_key, dest_as, dest_k, dest_a;
2169    uint8_t src_key, src_as, src_k, src_a;
2170    uint64_t val;
2171    int cc = 0;
2172
2173    HELPER_LOG("%s dest %" PRIx64 ", src %" PRIx64 ", len %" PRIx64 "\n",
2174               __func__, dest, src, len);
2175
2176    if (!(env->psw.mask & PSW_MASK_DAT)) {
2177        cpu_restore_state(cs, ra);
2178        program_interrupt(env, PGM_SPECIAL_OP, 6);
2179    }
2180
2181    /* OAC (operand access control) for the first operand -> dest */
2182    val = (r0 & 0xffff0000ULL) >> 16;
2183    dest_key = (val >> 12) & 0xf;
2184    dest_as = (val >> 6) & 0x3;
2185    dest_k = (val >> 1) & 0x1;
2186    dest_a = val & 0x1;
2187
2188    /* OAC (operand access control) for the second operand -> src */
2189    val = (r0 & 0x0000ffffULL);
2190    src_key = (val >> 12) & 0xf;
2191    src_as = (val >> 6) & 0x3;
2192    src_k = (val >> 1) & 0x1;
2193    src_a = val & 0x1;
2194
2195    if (!dest_k) {
2196        dest_key = psw_key;
2197    }
2198    if (!src_k) {
2199        src_key = psw_key;
2200    }
2201    if (!dest_a) {
2202        dest_as = psw_as;
2203    }
2204    if (!src_a) {
2205        src_as = psw_as;
2206    }
2207
2208    if (dest_a && dest_as == AS_HOME && (env->psw.mask & PSW_MASK_PSTATE)) {
2209        cpu_restore_state(cs, ra);
2210        program_interrupt(env, PGM_SPECIAL_OP, 6);
2211    }
2212    if (!(env->cregs[0] & CR0_SECONDARY) &&
2213        (dest_as == AS_SECONDARY || src_as == AS_SECONDARY)) {
2214        cpu_restore_state(cs, ra);
2215        program_interrupt(env, PGM_SPECIAL_OP, 6);
2216    }
2217    if (!psw_key_valid(env, dest_key) || !psw_key_valid(env, src_key)) {
2218        cpu_restore_state(cs, ra);
2219        program_interrupt(env, PGM_PRIVILEGED, 6);
2220    }
2221
2222    len = wrap_length(env, len);
2223    if (len > 4096) {
2224        cc = 3;
2225        len = 4096;
2226    }
2227
2228    /* FIXME: AR-mode and proper problem state mode (using PSW keys) missing */
2229    if (src_as == AS_ACCREG || dest_as == AS_ACCREG ||
2230        (env->psw.mask & PSW_MASK_PSTATE)) {
2231        qemu_log_mask(LOG_UNIMP, "%s: AR-mode and PSTATE support missing\n",
2232                      __func__);
2233        cpu_restore_state(cs, ra);
2234        program_interrupt(env, PGM_ADDRESSING, 6);
2235    }
2236
2237    /* FIXME: a) LAP
2238     *        b) Access using correct keys
2239     *        c) AR-mode
2240     */
2241#ifdef CONFIG_USER_ONLY
2242    /* psw keys are never valid in user mode, we will never reach this */
2243    g_assert_not_reached();
2244#else
2245    fast_memmove_as(env, dest, src, len, dest_as, src_as, ra);
2246#endif
2247
2248    return cc;
2249}
2250
2251/* Decode a Unicode character.  A return value < 0 indicates success, storing
2252   the UTF-32 result into OCHAR and the input length into OLEN.  A return
2253   value >= 0 indicates failure, and the CC value to be returned.  */
2254typedef int (*decode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2255                                 uint64_t ilen, bool enh_check, uintptr_t ra,
2256                                 uint32_t *ochar, uint32_t *olen);
2257
2258/* Encode a Unicode character.  A return value < 0 indicates success, storing
2259   the bytes into ADDR and the output length into OLEN.  A return value >= 0
2260   indicates failure, and the CC value to be returned.  */
2261typedef int (*encode_unicode_fn)(CPUS390XState *env, uint64_t addr,
2262                                 uint64_t ilen, uintptr_t ra, uint32_t c,
2263                                 uint32_t *olen);
2264
2265static int decode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2266                       bool enh_check, uintptr_t ra,
2267                       uint32_t *ochar, uint32_t *olen)
2268{
2269    uint8_t s0, s1, s2, s3;
2270    uint32_t c, l;
2271
2272    if (ilen < 1) {
2273        return 0;
2274    }
2275    s0 = cpu_ldub_data_ra(env, addr, ra);
2276    if (s0 <= 0x7f) {
2277        /* one byte character */
2278        l = 1;
2279        c = s0;
2280    } else if (s0 <= (enh_check ? 0xc1 : 0xbf)) {
2281        /* invalid character */
2282        return 2;
2283    } else if (s0 <= 0xdf) {
2284        /* two byte character */
2285        l = 2;
2286        if (ilen < 2) {
2287            return 0;
2288        }
2289        s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2290        c = s0 & 0x1f;
2291        c = (c << 6) | (s1 & 0x3f);
2292        if (enh_check && (s1 & 0xc0) != 0x80) {
2293            return 2;
2294        }
2295    } else if (s0 <= 0xef) {
2296        /* three byte character */
2297        l = 3;
2298        if (ilen < 3) {
2299            return 0;
2300        }
2301        s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2302        s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2303        c = s0 & 0x0f;
2304        c = (c << 6) | (s1 & 0x3f);
2305        c = (c << 6) | (s2 & 0x3f);
2306        /* Fold the byte-by-byte range descriptions in the PoO into
2307           tests against the complete value.  It disallows encodings
2308           that could be smaller, and the UTF-16 surrogates.  */
2309        if (enh_check
2310            && ((s1 & 0xc0) != 0x80
2311                || (s2 & 0xc0) != 0x80
2312                || c < 0x1000
2313                || (c >= 0xd800 && c <= 0xdfff))) {
2314            return 2;
2315        }
2316    } else if (s0 <= (enh_check ? 0xf4 : 0xf7)) {
2317        /* four byte character */
2318        l = 4;
2319        if (ilen < 4) {
2320            return 0;
2321        }
2322        s1 = cpu_ldub_data_ra(env, addr + 1, ra);
2323        s2 = cpu_ldub_data_ra(env, addr + 2, ra);
2324        s3 = cpu_ldub_data_ra(env, addr + 3, ra);
2325        c = s0 & 0x07;
2326        c = (c << 6) | (s1 & 0x3f);
2327        c = (c << 6) | (s2 & 0x3f);
2328        c = (c << 6) | (s3 & 0x3f);
2329        /* See above.  */
2330        if (enh_check
2331            && ((s1 & 0xc0) != 0x80
2332                || (s2 & 0xc0) != 0x80
2333                || (s3 & 0xc0) != 0x80
2334                || c < 0x010000
2335                || c > 0x10ffff)) {
2336            return 2;
2337        }
2338    } else {
2339        /* invalid character */
2340        return 2;
2341    }
2342
2343    *ochar = c;
2344    *olen = l;
2345    return -1;
2346}
2347
2348static int decode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2349                        bool enh_check, uintptr_t ra,
2350                        uint32_t *ochar, uint32_t *olen)
2351{
2352    uint16_t s0, s1;
2353    uint32_t c, l;
2354
2355    if (ilen < 2) {
2356        return 0;
2357    }
2358    s0 = cpu_lduw_data_ra(env, addr, ra);
2359    if ((s0 & 0xfc00) != 0xd800) {
2360        /* one word character */
2361        l = 2;
2362        c = s0;
2363    } else {
2364        /* two word character */
2365        l = 4;
2366        if (ilen < 4) {
2367            return 0;
2368        }
2369        s1 = cpu_lduw_data_ra(env, addr + 2, ra);
2370        c = extract32(s0, 6, 4) + 1;
2371        c = (c << 6) | (s0 & 0x3f);
2372        c = (c << 10) | (s1 & 0x3ff);
2373        if (enh_check && (s1 & 0xfc00) != 0xdc00) {
2374            /* invalid surrogate character */
2375            return 2;
2376        }
2377    }
2378
2379    *ochar = c;
2380    *olen = l;
2381    return -1;
2382}
2383
2384static int decode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2385                        bool enh_check, uintptr_t ra,
2386                        uint32_t *ochar, uint32_t *olen)
2387{
2388    uint32_t c;
2389
2390    if (ilen < 4) {
2391        return 0;
2392    }
2393    c = cpu_ldl_data_ra(env, addr, ra);
2394    if ((c >= 0xd800 && c <= 0xdbff) || c > 0x10ffff) {
2395        /* invalid unicode character */
2396        return 2;
2397    }
2398
2399    *ochar = c;
2400    *olen = 4;
2401    return -1;
2402}
2403
2404static int encode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2405                       uintptr_t ra, uint32_t c, uint32_t *olen)
2406{
2407    uint8_t d[4];
2408    uint32_t l, i;
2409
2410    if (c <= 0x7f) {
2411        /* one byte character */
2412        l = 1;
2413        d[0] = c;
2414    } else if (c <= 0x7ff) {
2415        /* two byte character */
2416        l = 2;
2417        d[1] = 0x80 | extract32(c, 0, 6);
2418        d[0] = 0xc0 | extract32(c, 6, 5);
2419    } else if (c <= 0xffff) {
2420        /* three byte character */
2421        l = 3;
2422        d[2] = 0x80 | extract32(c, 0, 6);
2423        d[1] = 0x80 | extract32(c, 6, 6);
2424        d[0] = 0xe0 | extract32(c, 12, 4);
2425    } else {
2426        /* four byte character */
2427        l = 4;
2428        d[3] = 0x80 | extract32(c, 0, 6);
2429        d[2] = 0x80 | extract32(c, 6, 6);
2430        d[1] = 0x80 | extract32(c, 12, 6);
2431        d[0] = 0xf0 | extract32(c, 18, 3);
2432    }
2433
2434    if (ilen < l) {
2435        return 1;
2436    }
2437    for (i = 0; i < l; ++i) {
2438        cpu_stb_data_ra(env, addr + i, d[i], ra);
2439    }
2440
2441    *olen = l;
2442    return -1;
2443}
2444
2445static int encode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2446                        uintptr_t ra, uint32_t c, uint32_t *olen)
2447{
2448    uint16_t d0, d1;
2449
2450    if (c <= 0xffff) {
2451        /* one word character */
2452        if (ilen < 2) {
2453            return 1;
2454        }
2455        cpu_stw_data_ra(env, addr, c, ra);
2456        *olen = 2;
2457    } else {
2458        /* two word character */
2459        if (ilen < 4) {
2460            return 1;
2461        }
2462        d1 = 0xdc00 | extract32(c, 0, 10);
2463        d0 = 0xd800 | extract32(c, 10, 6);
2464        d0 = deposit32(d0, 6, 4, extract32(c, 16, 5) - 1);
2465        cpu_stw_data_ra(env, addr + 0, d0, ra);
2466        cpu_stw_data_ra(env, addr + 2, d1, ra);
2467        *olen = 4;
2468    }
2469
2470    return -1;
2471}
2472
2473static int encode_utf32(CPUS390XState *env, uint64_t addr, uint64_t ilen,
2474                        uintptr_t ra, uint32_t c, uint32_t *olen)
2475{
2476    if (ilen < 4) {
2477        return 1;
2478    }
2479    cpu_stl_data_ra(env, addr, c, ra);
2480    *olen = 4;
2481    return -1;
2482}
2483
2484static inline uint32_t convert_unicode(CPUS390XState *env, uint32_t r1,
2485                                       uint32_t r2, uint32_t m3, uintptr_t ra,
2486                                       decode_unicode_fn decode,
2487                                       encode_unicode_fn encode)
2488{
2489    uint64_t dst = get_address(env, r1);
2490    uint64_t dlen = get_length(env, r1 + 1);
2491    uint64_t src = get_address(env, r2);
2492    uint64_t slen = get_length(env, r2 + 1);
2493    bool enh_check = m3 & 1;
2494    int cc, i;
2495
2496    /* Lest we fail to service interrupts in a timely manner, limit the
2497       amount of work we're willing to do.  For now, let's cap at 256.  */
2498    for (i = 0; i < 256; ++i) {
2499        uint32_t c, ilen, olen;
2500
2501        cc = decode(env, src, slen, enh_check, ra, &c, &ilen);
2502        if (unlikely(cc >= 0)) {
2503            break;
2504        }
2505        cc = encode(env, dst, dlen, ra, c, &olen);
2506        if (unlikely(cc >= 0)) {
2507            break;
2508        }
2509
2510        src += ilen;
2511        slen -= ilen;
2512        dst += olen;
2513        dlen -= olen;
2514        cc = 3;
2515    }
2516
2517    set_address(env, r1, dst);
2518    set_length(env, r1 + 1, dlen);
2519    set_address(env, r2, src);
2520    set_length(env, r2 + 1, slen);
2521
2522    return cc;
2523}
2524
2525uint32_t HELPER(cu12)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2526{
2527    return convert_unicode(env, r1, r2, m3, GETPC(),
2528                           decode_utf8, encode_utf16);
2529}
2530
2531uint32_t HELPER(cu14)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2532{
2533    return convert_unicode(env, r1, r2, m3, GETPC(),
2534                           decode_utf8, encode_utf32);
2535}
2536
2537uint32_t HELPER(cu21)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2538{
2539    return convert_unicode(env, r1, r2, m3, GETPC(),
2540                           decode_utf16, encode_utf8);
2541}
2542
2543uint32_t HELPER(cu24)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2544{
2545    return convert_unicode(env, r1, r2, m3, GETPC(),
2546                           decode_utf16, encode_utf32);
2547}
2548
2549uint32_t HELPER(cu41)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2550{
2551    return convert_unicode(env, r1, r2, m3, GETPC(),
2552                           decode_utf32, encode_utf8);
2553}
2554
2555uint32_t HELPER(cu42)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t m3)
2556{
2557    return convert_unicode(env, r1, r2, m3, GETPC(),
2558                           decode_utf32, encode_utf16);
2559}
2560