qemu/accel/tcg/cputlb.c
<<
>>
Prefs
   1/*
   2 *  Common CPU TLB handling
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2.1 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include "qemu/main-loop.h"
  22#include "qemu/target-info.h"
  23#include "accel/tcg/cpu-ops.h"
  24#include "accel/tcg/iommu.h"
  25#include "accel/tcg/probe.h"
  26#include "exec/page-protection.h"
  27#include "system/memory.h"
  28#include "accel/tcg/cpu-ldst-common.h"
  29#include "accel/tcg/cpu-mmu-index.h"
  30#include "exec/cputlb.h"
  31#include "exec/tb-flush.h"
  32#include "system/ram_addr.h"
  33#include "exec/mmu-access-type.h"
  34#include "exec/tlb-common.h"
  35#include "exec/vaddr.h"
  36#include "tcg/tcg.h"
  37#include "qemu/error-report.h"
  38#include "exec/log.h"
  39#include "exec/helper-proto-common.h"
  40#include "exec/tlb-flags.h"
  41#include "qemu/atomic.h"
  42#include "qemu/atomic128.h"
  43#include "tb-internal.h"
  44#include "trace.h"
  45#include "tb-hash.h"
  46#include "tb-internal.h"
  47#include "tlb-bounds.h"
  48#include "internal-common.h"
  49#ifdef CONFIG_PLUGIN
  50#include "qemu/plugin-memory.h"
  51#endif
  52#include "tcg/tcg-ldst.h"
  53#include "backend-ldst.h"
  54
  55
  56/* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
  57/* #define DEBUG_TLB */
  58/* #define DEBUG_TLB_LOG */
  59
  60#ifdef DEBUG_TLB
  61# define DEBUG_TLB_GATE 1
  62# ifdef DEBUG_TLB_LOG
  63#  define DEBUG_TLB_LOG_GATE 1
  64# else
  65#  define DEBUG_TLB_LOG_GATE 0
  66# endif
  67#else
  68# define DEBUG_TLB_GATE 0
  69# define DEBUG_TLB_LOG_GATE 0
  70#endif
  71
  72#define tlb_debug(fmt, ...) do { \
  73    if (DEBUG_TLB_LOG_GATE) { \
  74        qemu_log_mask(CPU_LOG_MMU, "%s: " fmt, __func__, \
  75                      ## __VA_ARGS__); \
  76    } else if (DEBUG_TLB_GATE) { \
  77        fprintf(stderr, "%s: " fmt, __func__, ## __VA_ARGS__); \
  78    } \
  79} while (0)
  80
  81#define assert_cpu_is_self(cpu) do {                              \
  82        if (DEBUG_TLB_GATE) {                                     \
  83            g_assert(!(cpu)->created || qemu_cpu_is_self(cpu));   \
  84        }                                                         \
  85    } while (0)
  86
  87/* run_on_cpu_data.target_ptr should always be big enough for a
  88 * vaddr even on 32 bit builds
  89 */
  90QEMU_BUILD_BUG_ON(sizeof(vaddr) > sizeof(run_on_cpu_data));
  91
  92/* We currently can't handle more than 16 bits in the MMUIDX bitmask.
  93 */
  94QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
  95#define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
  96
  97static inline size_t tlb_n_entries(CPUTLBDescFast *fast)
  98{
  99    return (fast->mask >> CPU_TLB_ENTRY_BITS) + 1;
 100}
 101
 102static inline size_t sizeof_tlb(CPUTLBDescFast *fast)
 103{
 104    return fast->mask + (1 << CPU_TLB_ENTRY_BITS);
 105}
 106
 107static inline uint64_t tlb_read_idx(const CPUTLBEntry *entry,
 108                                    MMUAccessType access_type)
 109{
 110    /* Do not rearrange the CPUTLBEntry structure members. */
 111    QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_read) !=
 112                      MMU_DATA_LOAD * sizeof(uintptr_t));
 113    QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_write) !=
 114                      MMU_DATA_STORE * sizeof(uintptr_t));
 115    QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_code) !=
 116                      MMU_INST_FETCH * sizeof(uintptr_t));
 117
 118    const uintptr_t *ptr = &entry->addr_idx[access_type];
 119    /* ofs might correspond to .addr_write, so use qatomic_read */
 120    return qatomic_read(ptr);
 121}
 122
 123static inline uint64_t tlb_addr_write(const CPUTLBEntry *entry)
 124{
 125    return tlb_read_idx(entry, MMU_DATA_STORE);
 126}
 127
 128/* Find the TLB index corresponding to the mmu_idx + address pair.  */
 129static inline uintptr_t tlb_index(CPUState *cpu, uintptr_t mmu_idx,
 130                                  vaddr addr)
 131{
 132    uintptr_t size_mask = cpu->neg.tlb.f[mmu_idx].mask >> CPU_TLB_ENTRY_BITS;
 133
 134    return (addr >> TARGET_PAGE_BITS) & size_mask;
 135}
 136
 137/* Find the TLB entry corresponding to the mmu_idx + address pair.  */
 138static inline CPUTLBEntry *tlb_entry(CPUState *cpu, uintptr_t mmu_idx,
 139                                     vaddr addr)
 140{
 141    return &cpu->neg.tlb.f[mmu_idx].table[tlb_index(cpu, mmu_idx, addr)];
 142}
 143
 144static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
 145                             size_t max_entries)
 146{
 147    desc->window_begin_ns = ns;
 148    desc->window_max_entries = max_entries;
 149}
 150
 151static void tb_jmp_cache_clear_page(CPUState *cpu, vaddr page_addr)
 152{
 153    CPUJumpCache *jc = cpu->tb_jmp_cache;
 154    int i, i0;
 155
 156    if (unlikely(!jc)) {
 157        return;
 158    }
 159
 160    i0 = tb_jmp_cache_hash_page(page_addr);
 161    for (i = 0; i < TB_JMP_PAGE_SIZE; i++) {
 162        qatomic_set(&jc->array[i0 + i].tb, NULL);
 163    }
 164}
 165
 166/**
 167 * tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary
 168 * @desc: The CPUTLBDesc portion of the TLB
 169 * @fast: The CPUTLBDescFast portion of the same TLB
 170 *
 171 * Called with tlb_lock_held.
 172 *
 173 * We have two main constraints when resizing a TLB: (1) we only resize it
 174 * on a TLB flush (otherwise we'd have to take a perf hit by either rehashing
 175 * the array or unnecessarily flushing it), which means we do not control how
 176 * frequently the resizing can occur; (2) we don't have access to the guest's
 177 * future scheduling decisions, and therefore have to decide the magnitude of
 178 * the resize based on past observations.
 179 *
 180 * In general, a memory-hungry process can benefit greatly from an appropriately
 181 * sized TLB, since a guest TLB miss is very expensive. This doesn't mean that
 182 * we just have to make the TLB as large as possible; while an oversized TLB
 183 * results in minimal TLB miss rates, it also takes longer to be flushed
 184 * (flushes can be _very_ frequent), and the reduced locality can also hurt
 185 * performance.
 186 *
 187 * To achieve near-optimal performance for all kinds of workloads, we:
 188 *
 189 * 1. Aggressively increase the size of the TLB when the use rate of the
 190 * TLB being flushed is high, since it is likely that in the near future this
 191 * memory-hungry process will execute again, and its memory hungriness will
 192 * probably be similar.
 193 *
 194 * 2. Slowly reduce the size of the TLB as the use rate declines over a
 195 * reasonably large time window. The rationale is that if in such a time window
 196 * we have not observed a high TLB use rate, it is likely that we won't observe
 197 * it in the near future. In that case, once a time window expires we downsize
 198 * the TLB to match the maximum use rate observed in the window.
 199 *
 200 * 3. Try to keep the maximum use rate in a time window in the 30-70% range,
 201 * since in that range performance is likely near-optimal. Recall that the TLB
 202 * is direct mapped, so we want the use rate to be low (or at least not too
 203 * high), since otherwise we are likely to have a significant amount of
 204 * conflict misses.
 205 */
 206static void tlb_mmu_resize_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast,
 207                                  int64_t now)
 208{
 209    size_t old_size = tlb_n_entries(fast);
 210    size_t rate;
 211    size_t new_size = old_size;
 212    int64_t window_len_ms = 100;
 213    int64_t window_len_ns = window_len_ms * 1000 * 1000;
 214    bool window_expired = now > desc->window_begin_ns + window_len_ns;
 215
 216    if (desc->n_used_entries > desc->window_max_entries) {
 217        desc->window_max_entries = desc->n_used_entries;
 218    }
 219    rate = desc->window_max_entries * 100 / old_size;
 220
 221    if (rate > 70) {
 222        new_size = MIN(old_size << 1, 1 << CPU_TLB_DYN_MAX_BITS);
 223    } else if (rate < 30 && window_expired) {
 224        size_t ceil = pow2ceil(desc->window_max_entries);
 225        size_t expected_rate = desc->window_max_entries * 100 / ceil;
 226
 227        /*
 228         * Avoid undersizing when the max number of entries seen is just below
 229         * a pow2. For instance, if max_entries == 1025, the expected use rate
 230         * would be 1025/2048==50%. However, if max_entries == 1023, we'd get
 231         * 1023/1024==99.9% use rate, so we'd likely end up doubling the size
 232         * later. Thus, make sure that the expected use rate remains below 70%.
 233         * (and since we double the size, that means the lowest rate we'd
 234         * expect to get is 35%, which is still in the 30-70% range where
 235         * we consider that the size is appropriate.)
 236         */
 237        if (expected_rate > 70) {
 238            ceil *= 2;
 239        }
 240        new_size = MAX(ceil, 1 << CPU_TLB_DYN_MIN_BITS);
 241    }
 242
 243    if (new_size == old_size) {
 244        if (window_expired) {
 245            tlb_window_reset(desc, now, desc->n_used_entries);
 246        }
 247        return;
 248    }
 249
 250    g_free(fast->table);
 251    g_free(desc->fulltlb);
 252
 253    tlb_window_reset(desc, now, 0);
 254    /* desc->n_used_entries is cleared by the caller */
 255    fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
 256    fast->table = g_try_new(CPUTLBEntry, new_size);
 257    desc->fulltlb = g_try_new(CPUTLBEntryFull, new_size);
 258
 259    /*
 260     * If the allocations fail, try smaller sizes. We just freed some
 261     * memory, so going back to half of new_size has a good chance of working.
 262     * Increased memory pressure elsewhere in the system might cause the
 263     * allocations to fail though, so we progressively reduce the allocation
 264     * size, aborting if we cannot even allocate the smallest TLB we support.
 265     */
 266    while (fast->table == NULL || desc->fulltlb == NULL) {
 267        if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) {
 268            error_report("%s: %s", __func__, strerror(errno));
 269            abort();
 270        }
 271        new_size = MAX(new_size >> 1, 1 << CPU_TLB_DYN_MIN_BITS);
 272        fast->mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
 273
 274        g_free(fast->table);
 275        g_free(desc->fulltlb);
 276        fast->table = g_try_new(CPUTLBEntry, new_size);
 277        desc->fulltlb = g_try_new(CPUTLBEntryFull, new_size);
 278    }
 279}
 280
 281static void tlb_mmu_flush_locked(CPUTLBDesc *desc, CPUTLBDescFast *fast)
 282{
 283    desc->n_used_entries = 0;
 284    desc->large_page_addr = -1;
 285    desc->large_page_mask = -1;
 286    desc->vindex = 0;
 287    memset(fast->table, -1, sizeof_tlb(fast));
 288    memset(desc->vtable, -1, sizeof(desc->vtable));
 289}
 290
 291static void tlb_flush_one_mmuidx_locked(CPUState *cpu, int mmu_idx,
 292                                        int64_t now)
 293{
 294    CPUTLBDesc *desc = &cpu->neg.tlb.d[mmu_idx];
 295    CPUTLBDescFast *fast = &cpu->neg.tlb.f[mmu_idx];
 296
 297    tlb_mmu_resize_locked(desc, fast, now);
 298    tlb_mmu_flush_locked(desc, fast);
 299}
 300
 301static void tlb_mmu_init(CPUTLBDesc *desc, CPUTLBDescFast *fast, int64_t now)
 302{
 303    size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS;
 304
 305    tlb_window_reset(desc, now, 0);
 306    desc->n_used_entries = 0;
 307    fast->mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
 308    fast->table = g_new(CPUTLBEntry, n_entries);
 309    desc->fulltlb = g_new(CPUTLBEntryFull, n_entries);
 310    tlb_mmu_flush_locked(desc, fast);
 311}
 312
 313static inline void tlb_n_used_entries_inc(CPUState *cpu, uintptr_t mmu_idx)
 314{
 315    cpu->neg.tlb.d[mmu_idx].n_used_entries++;
 316}
 317
 318static inline void tlb_n_used_entries_dec(CPUState *cpu, uintptr_t mmu_idx)
 319{
 320    cpu->neg.tlb.d[mmu_idx].n_used_entries--;
 321}
 322
 323void tlb_init(CPUState *cpu)
 324{
 325    int64_t now = get_clock_realtime();
 326    int i;
 327
 328    qemu_spin_init(&cpu->neg.tlb.c.lock);
 329
 330    /* All tlbs are initialized flushed. */
 331    cpu->neg.tlb.c.dirty = 0;
 332
 333    for (i = 0; i < NB_MMU_MODES; i++) {
 334        tlb_mmu_init(&cpu->neg.tlb.d[i], &cpu->neg.tlb.f[i], now);
 335    }
 336}
 337
 338void tlb_destroy(CPUState *cpu)
 339{
 340    int i;
 341
 342    qemu_spin_destroy(&cpu->neg.tlb.c.lock);
 343    for (i = 0; i < NB_MMU_MODES; i++) {
 344        CPUTLBDesc *desc = &cpu->neg.tlb.d[i];
 345        CPUTLBDescFast *fast = &cpu->neg.tlb.f[i];
 346
 347        g_free(fast->table);
 348        g_free(desc->fulltlb);
 349    }
 350}
 351
 352/* flush_all_helper: run fn across all cpus
 353 *
 354 * If the wait flag is set then the src cpu's helper will be queued as
 355 * "safe" work and the loop exited creating a synchronisation point
 356 * where all queued work will be finished before execution starts
 357 * again.
 358 */
 359static void flush_all_helper(CPUState *src, run_on_cpu_func fn,
 360                             run_on_cpu_data d)
 361{
 362    CPUState *cpu;
 363
 364    CPU_FOREACH(cpu) {
 365        if (cpu != src) {
 366            async_run_on_cpu(cpu, fn, d);
 367        }
 368    }
 369}
 370
 371static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
 372{
 373    uint16_t asked = data.host_int;
 374    uint16_t all_dirty, work, to_clean;
 375    int64_t now = get_clock_realtime();
 376
 377    assert_cpu_is_self(cpu);
 378
 379    tlb_debug("mmu_idx:0x%04" PRIx16 "\n", asked);
 380
 381    qemu_spin_lock(&cpu->neg.tlb.c.lock);
 382
 383    all_dirty = cpu->neg.tlb.c.dirty;
 384    to_clean = asked & all_dirty;
 385    all_dirty &= ~to_clean;
 386    cpu->neg.tlb.c.dirty = all_dirty;
 387
 388    for (work = to_clean; work != 0; work &= work - 1) {
 389        int mmu_idx = ctz32(work);
 390        tlb_flush_one_mmuidx_locked(cpu, mmu_idx, now);
 391    }
 392
 393    qemu_spin_unlock(&cpu->neg.tlb.c.lock);
 394
 395    tcg_flush_jmp_cache(cpu);
 396
 397    if (to_clean == ALL_MMUIDX_BITS) {
 398        qatomic_set(&cpu->neg.tlb.c.full_flush_count,
 399                    cpu->neg.tlb.c.full_flush_count + 1);
 400    } else {
 401        qatomic_set(&cpu->neg.tlb.c.part_flush_count,
 402                    cpu->neg.tlb.c.part_flush_count + ctpop16(to_clean));
 403        if (to_clean != asked) {
 404            qatomic_set(&cpu->neg.tlb.c.elide_flush_count,
 405                        cpu->neg.tlb.c.elide_flush_count +
 406                        ctpop16(asked & ~to_clean));
 407        }
 408    }
 409}
 410
 411void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
 412{
 413    tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap);
 414
 415    assert_cpu_is_self(cpu);
 416
 417    tlb_flush_by_mmuidx_async_work(cpu, RUN_ON_CPU_HOST_INT(idxmap));
 418}
 419
 420void tlb_flush(CPUState *cpu)
 421{
 422    tlb_flush_by_mmuidx(cpu, ALL_MMUIDX_BITS);
 423}
 424
 425void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu, uint16_t idxmap)
 426{
 427    const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
 428
 429    tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
 430
 431    flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
 432    async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
 433}
 434
 435void tlb_flush_all_cpus_synced(CPUState *src_cpu)
 436{
 437    tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, ALL_MMUIDX_BITS);
 438}
 439
 440static bool tlb_hit_page_mask_anyprot(CPUTLBEntry *tlb_entry,
 441                                      vaddr page, vaddr mask)
 442{
 443    page &= mask;
 444    mask &= TARGET_PAGE_MASK | TLB_INVALID_MASK;
 445
 446    return (page == (tlb_entry->addr_read & mask) ||
 447            page == (tlb_addr_write(tlb_entry) & mask) ||
 448            page == (tlb_entry->addr_code & mask));
 449}
 450
 451static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry, vaddr page)
 452{
 453    return tlb_hit_page_mask_anyprot(tlb_entry, page, -1);
 454}
 455
 456/**
 457 * tlb_entry_is_empty - return true if the entry is not in use
 458 * @te: pointer to CPUTLBEntry
 459 */
 460static inline bool tlb_entry_is_empty(const CPUTLBEntry *te)
 461{
 462    return te->addr_read == -1 && te->addr_write == -1 && te->addr_code == -1;
 463}
 464
 465/* Called with tlb_c.lock held */
 466static bool tlb_flush_entry_mask_locked(CPUTLBEntry *tlb_entry,
 467                                        vaddr page,
 468                                        vaddr mask)
 469{
 470    if (tlb_hit_page_mask_anyprot(tlb_entry, page, mask)) {
 471        memset(tlb_entry, -1, sizeof(*tlb_entry));
 472        return true;
 473    }
 474    return false;
 475}
 476
 477static inline bool tlb_flush_entry_locked(CPUTLBEntry *tlb_entry, vaddr page)
 478{
 479    return tlb_flush_entry_mask_locked(tlb_entry, page, -1);
 480}
 481
 482/* Called with tlb_c.lock held */
 483static void tlb_flush_vtlb_page_mask_locked(CPUState *cpu, int mmu_idx,
 484                                            vaddr page,
 485                                            vaddr mask)
 486{
 487    CPUTLBDesc *d = &cpu->neg.tlb.d[mmu_idx];
 488    int k;
 489
 490    assert_cpu_is_self(cpu);
 491    for (k = 0; k < CPU_VTLB_SIZE; k++) {
 492        if (tlb_flush_entry_mask_locked(&d->vtable[k], page, mask)) {
 493            tlb_n_used_entries_dec(cpu, mmu_idx);
 494        }
 495    }
 496}
 497
 498static inline void tlb_flush_vtlb_page_locked(CPUState *cpu, int mmu_idx,
 499                                              vaddr page)
 500{
 501    tlb_flush_vtlb_page_mask_locked(cpu, mmu_idx, page, -1);
 502}
 503
 504static void tlb_flush_page_locked(CPUState *cpu, int midx, vaddr page)
 505{
 506    vaddr lp_addr = cpu->neg.tlb.d[midx].large_page_addr;
 507    vaddr lp_mask = cpu->neg.tlb.d[midx].large_page_mask;
 508
 509    /* Check if we need to flush due to large pages.  */
 510    if ((page & lp_mask) == lp_addr) {
 511        tlb_debug("forcing full flush midx %d (%016"
 512                  VADDR_PRIx "/%016" VADDR_PRIx ")\n",
 513                  midx, lp_addr, lp_mask);
 514        tlb_flush_one_mmuidx_locked(cpu, midx, get_clock_realtime());
 515    } else {
 516        if (tlb_flush_entry_locked(tlb_entry(cpu, midx, page), page)) {
 517            tlb_n_used_entries_dec(cpu, midx);
 518        }
 519        tlb_flush_vtlb_page_locked(cpu, midx, page);
 520    }
 521}
 522
 523/**
 524 * tlb_flush_page_by_mmuidx_async_0:
 525 * @cpu: cpu on which to flush
 526 * @addr: page of virtual address to flush
 527 * @idxmap: set of mmu_idx to flush
 528 *
 529 * Helper for tlb_flush_page_by_mmuidx and friends, flush one page
 530 * at @addr from the tlbs indicated by @idxmap from @cpu.
 531 */
 532static void tlb_flush_page_by_mmuidx_async_0(CPUState *cpu,
 533                                             vaddr addr,
 534                                             uint16_t idxmap)
 535{
 536    int mmu_idx;
 537
 538    assert_cpu_is_self(cpu);
 539
 540    tlb_debug("page addr: %016" VADDR_PRIx " mmu_map:0x%x\n", addr, idxmap);
 541
 542    qemu_spin_lock(&cpu->neg.tlb.c.lock);
 543    for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
 544        if ((idxmap >> mmu_idx) & 1) {
 545            tlb_flush_page_locked(cpu, mmu_idx, addr);
 546        }
 547    }
 548    qemu_spin_unlock(&cpu->neg.tlb.c.lock);
 549
 550    /*
 551     * Discard jump cache entries for any tb which might potentially
 552     * overlap the flushed page, which includes the previous.
 553     */
 554    tb_jmp_cache_clear_page(cpu, addr - TARGET_PAGE_SIZE);
 555    tb_jmp_cache_clear_page(cpu, addr);
 556}
 557
 558/**
 559 * tlb_flush_page_by_mmuidx_async_1:
 560 * @cpu: cpu on which to flush
 561 * @data: encoded addr + idxmap
 562 *
 563 * Helper for tlb_flush_page_by_mmuidx and friends, called through
 564 * async_run_on_cpu.  The idxmap parameter is encoded in the page
 565 * offset of the target_ptr field.  This limits the set of mmu_idx
 566 * that can be passed via this method.
 567 */
 568static void tlb_flush_page_by_mmuidx_async_1(CPUState *cpu,
 569                                             run_on_cpu_data data)
 570{
 571    vaddr addr_and_idxmap = data.target_ptr;
 572    vaddr addr = addr_and_idxmap & TARGET_PAGE_MASK;
 573    uint16_t idxmap = addr_and_idxmap & ~TARGET_PAGE_MASK;
 574
 575    tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap);
 576}
 577
 578typedef struct {
 579    vaddr addr;
 580    uint16_t idxmap;
 581} TLBFlushPageByMMUIdxData;
 582
 583/**
 584 * tlb_flush_page_by_mmuidx_async_2:
 585 * @cpu: cpu on which to flush
 586 * @data: allocated addr + idxmap
 587 *
 588 * Helper for tlb_flush_page_by_mmuidx and friends, called through
 589 * async_run_on_cpu.  The addr+idxmap parameters are stored in a
 590 * TLBFlushPageByMMUIdxData structure that has been allocated
 591 * specifically for this helper.  Free the structure when done.
 592 */
 593static void tlb_flush_page_by_mmuidx_async_2(CPUState *cpu,
 594                                             run_on_cpu_data data)
 595{
 596    TLBFlushPageByMMUIdxData *d = data.host_ptr;
 597
 598    tlb_flush_page_by_mmuidx_async_0(cpu, d->addr, d->idxmap);
 599    g_free(d);
 600}
 601
 602void tlb_flush_page_by_mmuidx(CPUState *cpu, vaddr addr, uint16_t idxmap)
 603{
 604    tlb_debug("addr: %016" VADDR_PRIx " mmu_idx:%" PRIx16 "\n", addr, idxmap);
 605
 606    assert_cpu_is_self(cpu);
 607
 608    /* This should already be page aligned */
 609    addr &= TARGET_PAGE_MASK;
 610
 611    tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap);
 612}
 613
 614void tlb_flush_page(CPUState *cpu, vaddr addr)
 615{
 616    tlb_flush_page_by_mmuidx(cpu, addr, ALL_MMUIDX_BITS);
 617}
 618
 619void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
 620                                              vaddr addr,
 621                                              uint16_t idxmap)
 622{
 623    tlb_debug("addr: %016" VADDR_PRIx " mmu_idx:%"PRIx16"\n", addr, idxmap);
 624
 625    /* This should already be page aligned */
 626    addr &= TARGET_PAGE_MASK;
 627
 628    /*
 629     * Allocate memory to hold addr+idxmap only when needed.
 630     * See tlb_flush_page_by_mmuidx for details.
 631     */
 632    if (idxmap < TARGET_PAGE_SIZE) {
 633        flush_all_helper(src_cpu, tlb_flush_page_by_mmuidx_async_1,
 634                         RUN_ON_CPU_TARGET_PTR(addr | idxmap));
 635        async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_1,
 636                              RUN_ON_CPU_TARGET_PTR(addr | idxmap));
 637    } else {
 638        CPUState *dst_cpu;
 639        TLBFlushPageByMMUIdxData *d;
 640
 641        /* Allocate a separate data block for each destination cpu.  */
 642        CPU_FOREACH(dst_cpu) {
 643            if (dst_cpu != src_cpu) {
 644                d = g_new(TLBFlushPageByMMUIdxData, 1);
 645                d->addr = addr;
 646                d->idxmap = idxmap;
 647                async_run_on_cpu(dst_cpu, tlb_flush_page_by_mmuidx_async_2,
 648                                 RUN_ON_CPU_HOST_PTR(d));
 649            }
 650        }
 651
 652        d = g_new(TLBFlushPageByMMUIdxData, 1);
 653        d->addr = addr;
 654        d->idxmap = idxmap;
 655        async_safe_run_on_cpu(src_cpu, tlb_flush_page_by_mmuidx_async_2,
 656                              RUN_ON_CPU_HOST_PTR(d));
 657    }
 658}
 659
 660void tlb_flush_page_all_cpus_synced(CPUState *src, vaddr addr)
 661{
 662    tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS);
 663}
 664
 665static void tlb_flush_range_locked(CPUState *cpu, int midx,
 666                                   vaddr addr, vaddr len,
 667                                   unsigned bits)
 668{
 669    CPUTLBDesc *d = &cpu->neg.tlb.d[midx];
 670    CPUTLBDescFast *f = &cpu->neg.tlb.f[midx];
 671    vaddr mask = MAKE_64BIT_MASK(0, bits);
 672
 673    /*
 674     * If @bits is smaller than the tlb size, there may be multiple entries
 675     * within the TLB; otherwise all addresses that match under @mask hit
 676     * the same TLB entry.
 677     * TODO: Perhaps allow bits to be a few bits less than the size.
 678     * For now, just flush the entire TLB.
 679     *
 680     * If @len is larger than the tlb size, then it will take longer to
 681     * test all of the entries in the TLB than it will to flush it all.
 682     */
 683    if (mask < f->mask || len > f->mask) {
 684        tlb_debug("forcing full flush midx %d ("
 685                  "%016" VADDR_PRIx "/%016" VADDR_PRIx "+%016" VADDR_PRIx ")\n",
 686                  midx, addr, mask, len);
 687        tlb_flush_one_mmuidx_locked(cpu, midx, get_clock_realtime());
 688        return;
 689    }
 690
 691    /*
 692     * Check if we need to flush due to large pages.
 693     * Because large_page_mask contains all 1's from the msb,
 694     * we only need to test the end of the range.
 695     */
 696    if (((addr + len - 1) & d->large_page_mask) == d->large_page_addr) {
 697        tlb_debug("forcing full flush midx %d ("
 698                  "%016" VADDR_PRIx "/%016" VADDR_PRIx ")\n",
 699                  midx, d->large_page_addr, d->large_page_mask);
 700        tlb_flush_one_mmuidx_locked(cpu, midx, get_clock_realtime());
 701        return;
 702    }
 703
 704    for (vaddr i = 0; i < len; i += TARGET_PAGE_SIZE) {
 705        vaddr page = addr + i;
 706        CPUTLBEntry *entry = tlb_entry(cpu, midx, page);
 707
 708        if (tlb_flush_entry_mask_locked(entry, page, mask)) {
 709            tlb_n_used_entries_dec(cpu, midx);
 710        }
 711        tlb_flush_vtlb_page_mask_locked(cpu, midx, page, mask);
 712    }
 713}
 714
 715typedef struct {
 716    vaddr addr;
 717    vaddr len;
 718    uint16_t idxmap;
 719    uint16_t bits;
 720} TLBFlushRangeData;
 721
 722static void tlb_flush_range_by_mmuidx_async_0(CPUState *cpu,
 723                                              TLBFlushRangeData d)
 724{
 725    int mmu_idx;
 726
 727    assert_cpu_is_self(cpu);
 728
 729    tlb_debug("range: %016" VADDR_PRIx "/%u+%016" VADDR_PRIx " mmu_map:0x%x\n",
 730              d.addr, d.bits, d.len, d.idxmap);
 731
 732    qemu_spin_lock(&cpu->neg.tlb.c.lock);
 733    for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
 734        if ((d.idxmap >> mmu_idx) & 1) {
 735            tlb_flush_range_locked(cpu, mmu_idx, d.addr, d.len, d.bits);
 736        }
 737    }
 738    qemu_spin_unlock(&cpu->neg.tlb.c.lock);
 739
 740    /*
 741     * If the length is larger than the jump cache size, then it will take
 742     * longer to clear each entry individually than it will to clear it all.
 743     */
 744    if (d.len >= (TARGET_PAGE_SIZE * TB_JMP_CACHE_SIZE)) {
 745        tcg_flush_jmp_cache(cpu);
 746        return;
 747    }
 748
 749    /*
 750     * Discard jump cache entries for any tb which might potentially
 751     * overlap the flushed pages, which includes the previous.
 752     */
 753    d.addr -= TARGET_PAGE_SIZE;
 754    for (vaddr i = 0, n = d.len / TARGET_PAGE_SIZE + 1; i < n; i++) {
 755        tb_jmp_cache_clear_page(cpu, d.addr);
 756        d.addr += TARGET_PAGE_SIZE;
 757    }
 758}
 759
 760static void tlb_flush_range_by_mmuidx_async_1(CPUState *cpu,
 761                                              run_on_cpu_data data)
 762{
 763    TLBFlushRangeData *d = data.host_ptr;
 764    tlb_flush_range_by_mmuidx_async_0(cpu, *d);
 765    g_free(d);
 766}
 767
 768void tlb_flush_range_by_mmuidx(CPUState *cpu, vaddr addr,
 769                               vaddr len, uint16_t idxmap,
 770                               unsigned bits)
 771{
 772    TLBFlushRangeData d;
 773
 774    assert_cpu_is_self(cpu);
 775
 776    /* If no page bits are significant, this devolves to tlb_flush. */
 777    if (bits < TARGET_PAGE_BITS) {
 778        tlb_flush_by_mmuidx(cpu, idxmap);
 779        return;
 780    }
 781    /*
 782     * If all bits are significant, and len is small,
 783     * this devolves to tlb_flush_page.
 784     */
 785    if (len <= TARGET_PAGE_SIZE && bits >= target_long_bits()) {
 786        tlb_flush_page_by_mmuidx(cpu, addr, idxmap);
 787        return;
 788    }
 789
 790    /* This should already be page aligned */
 791    d.addr = addr & TARGET_PAGE_MASK;
 792    d.len = len;
 793    d.idxmap = idxmap;
 794    d.bits = bits;
 795
 796    tlb_flush_range_by_mmuidx_async_0(cpu, d);
 797}
 798
 799void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, vaddr addr,
 800                                   uint16_t idxmap, unsigned bits)
 801{
 802    tlb_flush_range_by_mmuidx(cpu, addr, TARGET_PAGE_SIZE, idxmap, bits);
 803}
 804
 805void tlb_flush_range_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
 806                                               vaddr addr,
 807                                               vaddr len,
 808                                               uint16_t idxmap,
 809                                               unsigned bits)
 810{
 811    TLBFlushRangeData d, *p;
 812    CPUState *dst_cpu;
 813
 814    /* If no page bits are significant, this devolves to tlb_flush. */
 815    if (bits < TARGET_PAGE_BITS) {
 816        tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, idxmap);
 817        return;
 818    }
 819    /*
 820     * If all bits are significant, and len is small,
 821     * this devolves to tlb_flush_page.
 822     */
 823    if (len <= TARGET_PAGE_SIZE && bits >= target_long_bits()) {
 824        tlb_flush_page_by_mmuidx_all_cpus_synced(src_cpu, addr, idxmap);
 825        return;
 826    }
 827
 828    /* This should already be page aligned */
 829    d.addr = addr & TARGET_PAGE_MASK;
 830    d.len = len;
 831    d.idxmap = idxmap;
 832    d.bits = bits;
 833
 834    /* Allocate a separate data block for each destination cpu.  */
 835    CPU_FOREACH(dst_cpu) {
 836        if (dst_cpu != src_cpu) {
 837            p = g_memdup(&d, sizeof(d));
 838            async_run_on_cpu(dst_cpu, tlb_flush_range_by_mmuidx_async_1,
 839                             RUN_ON_CPU_HOST_PTR(p));
 840        }
 841    }
 842
 843    p = g_memdup(&d, sizeof(d));
 844    async_safe_run_on_cpu(src_cpu, tlb_flush_range_by_mmuidx_async_1,
 845                          RUN_ON_CPU_HOST_PTR(p));
 846}
 847
 848void tlb_flush_page_bits_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
 849                                                   vaddr addr,
 850                                                   uint16_t idxmap,
 851                                                   unsigned bits)
 852{
 853    tlb_flush_range_by_mmuidx_all_cpus_synced(src_cpu, addr, TARGET_PAGE_SIZE,
 854                                              idxmap, bits);
 855}
 856
 857/* update the TLBs so that writes to code in the virtual page 'addr'
 858   can be detected */
 859void tlb_protect_code(ram_addr_t ram_addr)
 860{
 861    cpu_physical_memory_test_and_clear_dirty(ram_addr & TARGET_PAGE_MASK,
 862                                             TARGET_PAGE_SIZE,
 863                                             DIRTY_MEMORY_CODE);
 864}
 865
 866/* update the TLB so that writes in physical page 'phys_addr' are no longer
 867   tested for self modifying code */
 868void tlb_unprotect_code(ram_addr_t ram_addr)
 869{
 870    cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE);
 871}
 872
 873
 874/*
 875 * Dirty write flag handling
 876 *
 877 * When the TCG code writes to a location it looks up the address in
 878 * the TLB and uses that data to compute the final address. If any of
 879 * the lower bits of the address are set then the slow path is forced.
 880 * There are a number of reasons to do this but for normal RAM the
 881 * most usual is detecting writes to code regions which may invalidate
 882 * generated code.
 883 *
 884 * Other vCPUs might be reading their TLBs during guest execution, so we update
 885 * te->addr_write with qatomic_set. We don't need to worry about this for
 886 * oversized guests as MTTCG is disabled for them.
 887 *
 888 * Called with tlb_c.lock held.
 889 */
 890static void tlb_reset_dirty_range_locked(CPUTLBEntryFull *full, CPUTLBEntry *ent,
 891                                         uintptr_t start, uintptr_t length)
 892{
 893    const uintptr_t addr = ent->addr_write;
 894    int flags = addr | full->slow_flags[MMU_DATA_STORE];
 895
 896    flags &= TLB_INVALID_MASK | TLB_MMIO | TLB_DISCARD_WRITE | TLB_NOTDIRTY;
 897    if (flags == 0) {
 898        uintptr_t host = (addr & TARGET_PAGE_MASK) + ent->addend;
 899        if ((host - start) < length) {
 900            qatomic_set(&ent->addr_write, addr | TLB_NOTDIRTY);
 901        }
 902    }
 903}
 904
 905/*
 906 * Called with tlb_c.lock held.
 907 * Called only from the vCPU context, i.e. the TLB's owner thread.
 908 */
 909static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s)
 910{
 911    *d = *s;
 912}
 913
 914/* This is a cross vCPU call (i.e. another vCPU resetting the flags of
 915 * the target vCPU).
 916 * We must take tlb_c.lock to avoid racing with another vCPU update. The only
 917 * thing actually updated is the target TLB entry ->addr_write flags.
 918 */
 919void tlb_reset_dirty(CPUState *cpu, uintptr_t start, uintptr_t length)
 920{
 921    int mmu_idx;
 922
 923    qemu_spin_lock(&cpu->neg.tlb.c.lock);
 924    for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
 925        CPUTLBDesc *desc = &cpu->neg.tlb.d[mmu_idx];
 926        CPUTLBDescFast *fast = &cpu->neg.tlb.f[mmu_idx];
 927        unsigned int n = tlb_n_entries(fast);
 928        unsigned int i;
 929
 930        for (i = 0; i < n; i++) {
 931            tlb_reset_dirty_range_locked(&desc->fulltlb[i], &fast->table[i],
 932                                         start, length);
 933        }
 934
 935        for (i = 0; i < CPU_VTLB_SIZE; i++) {
 936            tlb_reset_dirty_range_locked(&desc->vfulltlb[i], &desc->vtable[i],
 937                                         start, length);
 938        }
 939    }
 940    qemu_spin_unlock(&cpu->neg.tlb.c.lock);
 941}
 942
 943/* Called with tlb_c.lock held */
 944static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry,
 945                                         vaddr addr)
 946{
 947    if (tlb_entry->addr_write == (addr | TLB_NOTDIRTY)) {
 948        tlb_entry->addr_write = addr;
 949    }
 950}
 951
 952/* update the TLB corresponding to virtual page vaddr
 953   so that it is no longer dirty */
 954static void tlb_set_dirty(CPUState *cpu, vaddr addr)
 955{
 956    int mmu_idx;
 957
 958    assert_cpu_is_self(cpu);
 959
 960    addr &= TARGET_PAGE_MASK;
 961    qemu_spin_lock(&cpu->neg.tlb.c.lock);
 962    for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
 963        tlb_set_dirty1_locked(tlb_entry(cpu, mmu_idx, addr), addr);
 964    }
 965
 966    for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
 967        int k;
 968        for (k = 0; k < CPU_VTLB_SIZE; k++) {
 969            tlb_set_dirty1_locked(&cpu->neg.tlb.d[mmu_idx].vtable[k], addr);
 970        }
 971    }
 972    qemu_spin_unlock(&cpu->neg.tlb.c.lock);
 973}
 974
 975/* Our TLB does not support large pages, so remember the area covered by
 976   large pages and trigger a full TLB flush if these are invalidated.  */
 977static void tlb_add_large_page(CPUState *cpu, int mmu_idx,
 978                               vaddr addr, uint64_t size)
 979{
 980    vaddr lp_addr = cpu->neg.tlb.d[mmu_idx].large_page_addr;
 981    vaddr lp_mask = ~(size - 1);
 982
 983    if (lp_addr == (vaddr)-1) {
 984        /* No previous large page.  */
 985        lp_addr = addr;
 986    } else {
 987        /* Extend the existing region to include the new page.
 988           This is a compromise between unnecessary flushes and
 989           the cost of maintaining a full variable size TLB.  */
 990        lp_mask &= cpu->neg.tlb.d[mmu_idx].large_page_mask;
 991        while (((lp_addr ^ addr) & lp_mask) != 0) {
 992            lp_mask <<= 1;
 993        }
 994    }
 995    cpu->neg.tlb.d[mmu_idx].large_page_addr = lp_addr & lp_mask;
 996    cpu->neg.tlb.d[mmu_idx].large_page_mask = lp_mask;
 997}
 998
 999static inline void tlb_set_compare(CPUTLBEntryFull *full, CPUTLBEntry *ent,
1000                                   vaddr address, int flags,
1001                                   MMUAccessType access_type, bool enable)
1002{
1003    if (enable) {
1004        address |= flags & TLB_FLAGS_MASK;
1005        flags &= TLB_SLOW_FLAGS_MASK;
1006        if (flags) {
1007            address |= TLB_FORCE_SLOW;
1008        }
1009    } else {
1010        address = -1;
1011        flags = 0;
1012    }
1013    ent->addr_idx[access_type] = address;
1014    full->slow_flags[access_type] = flags;
1015}
1016
1017/*
1018 * Add a new TLB entry. At most one entry for a given virtual address
1019 * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
1020 * supplied size is only used by tlb_flush_page.
1021 *
1022 * Called from TCG-generated code, which is under an RCU read-side
1023 * critical section.
1024 */
1025void tlb_set_page_full(CPUState *cpu, int mmu_idx,
1026                       vaddr addr, CPUTLBEntryFull *full)
1027{
1028    CPUTLB *tlb = &cpu->neg.tlb;
1029    CPUTLBDesc *desc = &tlb->d[mmu_idx];
1030    MemoryRegionSection *section;
1031    unsigned int index, read_flags, write_flags;
1032    uintptr_t addend;
1033    CPUTLBEntry *te, tn;
1034    hwaddr iotlb, xlat, sz, paddr_page;
1035    vaddr addr_page;
1036    int asidx, wp_flags, prot;
1037    bool is_ram, is_romd;
1038
1039    assert_cpu_is_self(cpu);
1040
1041    if (full->lg_page_size <= TARGET_PAGE_BITS) {
1042        sz = TARGET_PAGE_SIZE;
1043    } else {
1044        sz = (hwaddr)1 << full->lg_page_size;
1045        tlb_add_large_page(cpu, mmu_idx, addr, sz);
1046    }
1047    addr_page = addr & TARGET_PAGE_MASK;
1048    paddr_page = full->phys_addr & TARGET_PAGE_MASK;
1049
1050    prot = full->prot;
1051    asidx = cpu_asidx_from_attrs(cpu, full->attrs);
1052    section = address_space_translate_for_iotlb(cpu, asidx, paddr_page,
1053                                                &xlat, &sz, full->attrs, &prot);
1054    assert(sz >= TARGET_PAGE_SIZE);
1055
1056    tlb_debug("vaddr=%016" VADDR_PRIx " paddr=0x" HWADDR_FMT_plx
1057              " prot=%x idx=%d\n",
1058              addr, full->phys_addr, prot, mmu_idx);
1059
1060    read_flags = full->tlb_fill_flags;
1061    if (full->lg_page_size < TARGET_PAGE_BITS) {
1062        /* Repeat the MMU check and TLB fill on every access.  */
1063        read_flags |= TLB_INVALID_MASK;
1064    }
1065
1066    is_ram = memory_region_is_ram(section->mr);
1067    is_romd = memory_region_is_romd(section->mr);
1068
1069    if (is_ram || is_romd) {
1070        /* RAM and ROMD both have associated host memory. */
1071        addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat;
1072    } else {
1073        /* I/O does not; force the host address to NULL. */
1074        addend = 0;
1075    }
1076
1077    write_flags = read_flags;
1078    if (is_ram) {
1079        iotlb = memory_region_get_ram_addr(section->mr) + xlat;
1080        assert(!(iotlb & ~TARGET_PAGE_MASK));
1081        /*
1082         * Computing is_clean is expensive; avoid all that unless
1083         * the page is actually writable.
1084         */
1085        if (prot & PAGE_WRITE) {
1086            if (section->readonly) {
1087                write_flags |= TLB_DISCARD_WRITE;
1088            } else if (cpu_physical_memory_is_clean(iotlb)) {
1089                write_flags |= TLB_NOTDIRTY;
1090            }
1091        }
1092    } else {
1093        /* I/O or ROMD */
1094        iotlb = memory_region_section_get_iotlb(cpu, section) + xlat;
1095        /*
1096         * Writes to romd devices must go through MMIO to enable write.
1097         * Reads to romd devices go through the ram_ptr found above,
1098         * but of course reads to I/O must go through MMIO.
1099         */
1100        write_flags |= TLB_MMIO;
1101        if (!is_romd) {
1102            read_flags = write_flags;
1103        }
1104    }
1105
1106    wp_flags = cpu_watchpoint_address_matches(cpu, addr_page,
1107                                              TARGET_PAGE_SIZE);
1108
1109    index = tlb_index(cpu, mmu_idx, addr_page);
1110    te = tlb_entry(cpu, mmu_idx, addr_page);
1111
1112    /*
1113     * Hold the TLB lock for the rest of the function. We could acquire/release
1114     * the lock several times in the function, but it is faster to amortize the
1115     * acquisition cost by acquiring it just once. Note that this leads to
1116     * a longer critical section, but this is not a concern since the TLB lock
1117     * is unlikely to be contended.
1118     */
1119    qemu_spin_lock(&tlb->c.lock);
1120
1121    /* Note that the tlb is no longer clean.  */
1122    tlb->c.dirty |= 1 << mmu_idx;
1123
1124    /* Make sure there's no cached translation for the new page.  */
1125    tlb_flush_vtlb_page_locked(cpu, mmu_idx, addr_page);
1126
1127    /*
1128     * Only evict the old entry to the victim tlb if it's for a
1129     * different page; otherwise just overwrite the stale data.
1130     */
1131    if (!tlb_hit_page_anyprot(te, addr_page) && !tlb_entry_is_empty(te)) {
1132        unsigned vidx = desc->vindex++ % CPU_VTLB_SIZE;
1133        CPUTLBEntry *tv = &desc->vtable[vidx];
1134
1135        /* Evict the old entry into the victim tlb.  */
1136        copy_tlb_helper_locked(tv, te);
1137        desc->vfulltlb[vidx] = desc->fulltlb[index];
1138        tlb_n_used_entries_dec(cpu, mmu_idx);
1139    }
1140
1141    /* refill the tlb */
1142    /*
1143     * When memory region is ram, iotlb contains a TARGET_PAGE_BITS
1144     * aligned ram_addr_t of the page base of the target RAM.
1145     * Otherwise, iotlb contains
1146     *  - a physical section number in the lower TARGET_PAGE_BITS
1147     *  - the offset within section->mr of the page base (I/O, ROMD) with the
1148     *    TARGET_PAGE_BITS masked off.
1149     * We subtract addr_page (which is page aligned and thus won't
1150     * disturb the low bits) to give an offset which can be added to the
1151     * (non-page-aligned) vaddr of the eventual memory access to get
1152     * the MemoryRegion offset for the access. Note that the vaddr we
1153     * subtract here is that of the page base, and not the same as the
1154     * vaddr we add back in io_prepare()/get_page_addr_code().
1155     */
1156    desc->fulltlb[index] = *full;
1157    full = &desc->fulltlb[index];
1158    full->xlat_section = iotlb - addr_page;
1159    full->phys_addr = paddr_page;
1160
1161    /* Now calculate the new entry */
1162    tn.addend = addend - addr_page;
1163
1164    tlb_set_compare(full, &tn, addr_page, read_flags,
1165                    MMU_INST_FETCH, prot & PAGE_EXEC);
1166
1167    if (wp_flags & BP_MEM_READ) {
1168        read_flags |= TLB_WATCHPOINT;
1169    }
1170    tlb_set_compare(full, &tn, addr_page, read_flags,
1171                    MMU_DATA_LOAD, prot & PAGE_READ);
1172
1173    if (prot & PAGE_WRITE_INV) {
1174        write_flags |= TLB_INVALID_MASK;
1175    }
1176    if (wp_flags & BP_MEM_WRITE) {
1177        write_flags |= TLB_WATCHPOINT;
1178    }
1179    tlb_set_compare(full, &tn, addr_page, write_flags,
1180                    MMU_DATA_STORE, prot & PAGE_WRITE);
1181
1182    copy_tlb_helper_locked(te, &tn);
1183    tlb_n_used_entries_inc(cpu, mmu_idx);
1184    qemu_spin_unlock(&tlb->c.lock);
1185}
1186
1187void tlb_set_page_with_attrs(CPUState *cpu, vaddr addr,
1188                             hwaddr paddr, MemTxAttrs attrs, int prot,
1189                             int mmu_idx, vaddr size)
1190{
1191    CPUTLBEntryFull full = {
1192        .phys_addr = paddr,
1193        .attrs = attrs,
1194        .prot = prot,
1195        .lg_page_size = ctz64(size)
1196    };
1197
1198    assert(is_power_of_2(size));
1199    tlb_set_page_full(cpu, mmu_idx, addr, &full);
1200}
1201
1202void tlb_set_page(CPUState *cpu, vaddr addr,
1203                  hwaddr paddr, int prot,
1204                  int mmu_idx, vaddr size)
1205{
1206    tlb_set_page_with_attrs(cpu, addr, paddr, MEMTXATTRS_UNSPECIFIED,
1207                            prot, mmu_idx, size);
1208}
1209
1210/**
1211 * tlb_hit_page: return true if page aligned @addr is a hit against the
1212 * TLB entry @tlb_addr
1213 *
1214 * @addr: virtual address to test (must be page aligned)
1215 * @tlb_addr: TLB entry address (a CPUTLBEntry addr_read/write/code value)
1216 */
1217static inline bool tlb_hit_page(uint64_t tlb_addr, vaddr addr)
1218{
1219    return addr == (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK));
1220}
1221
1222/**
1223 * tlb_hit: return true if @addr is a hit against the TLB entry @tlb_addr
1224 *
1225 * @addr: virtual address to test (need not be page aligned)
1226 * @tlb_addr: TLB entry address (a CPUTLBEntry addr_read/write/code value)
1227 */
1228static inline bool tlb_hit(uint64_t tlb_addr, vaddr addr)
1229{
1230    return tlb_hit_page(tlb_addr, addr & TARGET_PAGE_MASK);
1231}
1232
1233/*
1234 * Note: tlb_fill_align() can trigger a resize of the TLB.
1235 * This means that all of the caller's prior references to the TLB table
1236 * (e.g. CPUTLBEntry pointers) must be discarded and looked up again
1237 * (e.g. via tlb_entry()).
1238 */
1239static bool tlb_fill_align(CPUState *cpu, vaddr addr, MMUAccessType type,
1240                           int mmu_idx, MemOp memop, int size,
1241                           bool probe, uintptr_t ra)
1242{
1243    const TCGCPUOps *ops = cpu->cc->tcg_ops;
1244    CPUTLBEntryFull full;
1245
1246    if (ops->tlb_fill_align) {
1247        if (ops->tlb_fill_align(cpu, &full, addr, type, mmu_idx,
1248                                memop, size, probe, ra)) {
1249            tlb_set_page_full(cpu, mmu_idx, addr, &full);
1250            return true;
1251        }
1252    } else {
1253        /* Legacy behaviour is alignment before paging. */
1254        if (addr & ((1u << memop_alignment_bits(memop)) - 1)) {
1255            ops->do_unaligned_access(cpu, addr, type, mmu_idx, ra);
1256        }
1257        if (ops->tlb_fill(cpu, addr, size, type, mmu_idx, probe, ra)) {
1258            return true;
1259        }
1260    }
1261    assert(probe);
1262    return false;
1263}
1264
1265static inline void cpu_unaligned_access(CPUState *cpu, vaddr addr,
1266                                        MMUAccessType access_type,
1267                                        int mmu_idx, uintptr_t retaddr)
1268{
1269    cpu->cc->tcg_ops->do_unaligned_access(cpu, addr, access_type,
1270                                          mmu_idx, retaddr);
1271}
1272
1273static MemoryRegionSection *
1274io_prepare(hwaddr *out_offset, CPUState *cpu, hwaddr xlat,
1275           MemTxAttrs attrs, vaddr addr, uintptr_t retaddr)
1276{
1277    MemoryRegionSection *section;
1278    hwaddr mr_offset;
1279
1280    section = iotlb_to_section(cpu, xlat, attrs);
1281    mr_offset = (xlat & TARGET_PAGE_MASK) + addr;
1282    cpu->mem_io_pc = retaddr;
1283    if (!cpu->neg.can_do_io) {
1284        cpu_io_recompile(cpu, retaddr);
1285    }
1286
1287    *out_offset = mr_offset;
1288    return section;
1289}
1290
1291static void io_failed(CPUState *cpu, CPUTLBEntryFull *full, vaddr addr,
1292                      unsigned size, MMUAccessType access_type, int mmu_idx,
1293                      MemTxResult response, uintptr_t retaddr)
1294{
1295    if (!cpu->ignore_memory_transaction_failures
1296        && cpu->cc->tcg_ops->do_transaction_failed) {
1297        hwaddr physaddr = full->phys_addr | (addr & ~TARGET_PAGE_MASK);
1298
1299        cpu->cc->tcg_ops->do_transaction_failed(cpu, physaddr, addr, size,
1300                                                access_type, mmu_idx,
1301                                                full->attrs, response, retaddr);
1302    }
1303}
1304
1305/* Return true if ADDR is present in the victim tlb, and has been copied
1306   back to the main tlb.  */
1307static bool victim_tlb_hit(CPUState *cpu, size_t mmu_idx, size_t index,
1308                           MMUAccessType access_type, vaddr page)
1309{
1310    size_t vidx;
1311
1312    assert_cpu_is_self(cpu);
1313    for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) {
1314        CPUTLBEntry *vtlb = &cpu->neg.tlb.d[mmu_idx].vtable[vidx];
1315        uint64_t cmp = tlb_read_idx(vtlb, access_type);
1316
1317        if (cmp == page) {
1318            /* Found entry in victim tlb, swap tlb and iotlb.  */
1319            CPUTLBEntry tmptlb, *tlb = &cpu->neg.tlb.f[mmu_idx].table[index];
1320
1321            qemu_spin_lock(&cpu->neg.tlb.c.lock);
1322            copy_tlb_helper_locked(&tmptlb, tlb);
1323            copy_tlb_helper_locked(tlb, vtlb);
1324            copy_tlb_helper_locked(vtlb, &tmptlb);
1325            qemu_spin_unlock(&cpu->neg.tlb.c.lock);
1326
1327            CPUTLBEntryFull *f1 = &cpu->neg.tlb.d[mmu_idx].fulltlb[index];
1328            CPUTLBEntryFull *f2 = &cpu->neg.tlb.d[mmu_idx].vfulltlb[vidx];
1329            CPUTLBEntryFull tmpf;
1330            tmpf = *f1; *f1 = *f2; *f2 = tmpf;
1331            return true;
1332        }
1333    }
1334    return false;
1335}
1336
1337static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
1338                           CPUTLBEntryFull *full, uintptr_t retaddr)
1339{
1340    ram_addr_t ram_addr = mem_vaddr + full->xlat_section;
1341
1342    trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size);
1343
1344    if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1345        tb_invalidate_phys_range_fast(cpu, ram_addr, size, retaddr);
1346    }
1347
1348    /*
1349     * Set both VGA and migration bits for simplicity and to remove
1350     * the notdirty callback faster.
1351     */
1352    cpu_physical_memory_set_dirty_range(ram_addr, size, DIRTY_CLIENTS_NOCODE);
1353
1354    /* We remove the notdirty callback only if the code has been flushed. */
1355    if (!cpu_physical_memory_is_clean(ram_addr)) {
1356        trace_memory_notdirty_set_dirty(mem_vaddr);
1357        tlb_set_dirty(cpu, mem_vaddr);
1358    }
1359}
1360
1361static int probe_access_internal(CPUState *cpu, vaddr addr,
1362                                 int fault_size, MMUAccessType access_type,
1363                                 int mmu_idx, bool nonfault,
1364                                 void **phost, CPUTLBEntryFull **pfull,
1365                                 uintptr_t retaddr, bool check_mem_cbs)
1366{
1367    uintptr_t index = tlb_index(cpu, mmu_idx, addr);
1368    CPUTLBEntry *entry = tlb_entry(cpu, mmu_idx, addr);
1369    uint64_t tlb_addr = tlb_read_idx(entry, access_type);
1370    vaddr page_addr = addr & TARGET_PAGE_MASK;
1371    int flags = TLB_FLAGS_MASK & ~TLB_FORCE_SLOW;
1372    bool force_mmio = check_mem_cbs && cpu_plugin_mem_cbs_enabled(cpu);
1373    CPUTLBEntryFull *full;
1374
1375    if (!tlb_hit_page(tlb_addr, page_addr)) {
1376        if (!victim_tlb_hit(cpu, mmu_idx, index, access_type, page_addr)) {
1377            if (!tlb_fill_align(cpu, addr, access_type, mmu_idx,
1378                                0, fault_size, nonfault, retaddr)) {
1379                /* Non-faulting page table read failed.  */
1380                *phost = NULL;
1381                *pfull = NULL;
1382                return TLB_INVALID_MASK;
1383            }
1384
1385            /* TLB resize via tlb_fill_align may have moved the entry.  */
1386            index = tlb_index(cpu, mmu_idx, addr);
1387            entry = tlb_entry(cpu, mmu_idx, addr);
1388
1389            /*
1390             * With PAGE_WRITE_INV, we set TLB_INVALID_MASK immediately,
1391             * to force the next access through tlb_fill_align.  We've just
1392             * called tlb_fill_align, so we know that this entry *is* valid.
1393             */
1394            flags &= ~TLB_INVALID_MASK;
1395        }
1396        tlb_addr = tlb_read_idx(entry, access_type);
1397    }
1398    flags &= tlb_addr;
1399
1400    *pfull = full = &cpu->neg.tlb.d[mmu_idx].fulltlb[index];
1401    flags |= full->slow_flags[access_type];
1402
1403    /* Fold all "mmio-like" bits into TLB_MMIO.  This is not RAM.  */
1404    if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY | TLB_CHECK_ALIGNED))
1405        || (access_type != MMU_INST_FETCH && force_mmio)) {
1406        *phost = NULL;
1407        return TLB_MMIO;
1408    }
1409
1410    /* Everything else is RAM. */
1411    *phost = (void *)((uintptr_t)addr + entry->addend);
1412    return flags;
1413}
1414
1415int probe_access_full(CPUArchState *env, vaddr addr, int size,
1416                      MMUAccessType access_type, int mmu_idx,
1417                      bool nonfault, void **phost, CPUTLBEntryFull **pfull,
1418                      uintptr_t retaddr)
1419{
1420    int flags = probe_access_internal(env_cpu(env), addr, size, access_type,
1421                                      mmu_idx, nonfault, phost, pfull, retaddr,
1422                                      true);
1423
1424    /* Handle clean RAM pages.  */
1425    if (unlikely(flags & TLB_NOTDIRTY)) {
1426        int dirtysize = size == 0 ? 1 : size;
1427        notdirty_write(env_cpu(env), addr, dirtysize, *pfull, retaddr);
1428        flags &= ~TLB_NOTDIRTY;
1429    }
1430
1431    return flags;
1432}
1433
1434int probe_access_full_mmu(CPUArchState *env, vaddr addr, int size,
1435                          MMUAccessType access_type, int mmu_idx,
1436                          void **phost, CPUTLBEntryFull **pfull)
1437{
1438    void *discard_phost;
1439    CPUTLBEntryFull *discard_tlb;
1440
1441    /* privately handle users that don't need full results */
1442    phost = phost ? phost : &discard_phost;
1443    pfull = pfull ? pfull : &discard_tlb;
1444
1445    int flags = probe_access_internal(env_cpu(env), addr, size, access_type,
1446                                      mmu_idx, true, phost, pfull, 0, false);
1447
1448    /* Handle clean RAM pages.  */
1449    if (unlikely(flags & TLB_NOTDIRTY)) {
1450        int dirtysize = size == 0 ? 1 : size;
1451        notdirty_write(env_cpu(env), addr, dirtysize, *pfull, 0);
1452        flags &= ~TLB_NOTDIRTY;
1453    }
1454
1455    return flags;
1456}
1457
1458int probe_access_flags(CPUArchState *env, vaddr addr, int size,
1459                       MMUAccessType access_type, int mmu_idx,
1460                       bool nonfault, void **phost, uintptr_t retaddr)
1461{
1462    CPUTLBEntryFull *full;
1463    int flags;
1464
1465    g_assert(-(addr | TARGET_PAGE_MASK) >= size);
1466
1467    flags = probe_access_internal(env_cpu(env), addr, size, access_type,
1468                                  mmu_idx, nonfault, phost, &full, retaddr,
1469                                  true);
1470
1471    /* Handle clean RAM pages. */
1472    if (unlikely(flags & TLB_NOTDIRTY)) {
1473        int dirtysize = size == 0 ? 1 : size;
1474        notdirty_write(env_cpu(env), addr, dirtysize, full, retaddr);
1475        flags &= ~TLB_NOTDIRTY;
1476    }
1477
1478    return flags;
1479}
1480
1481void *probe_access(CPUArchState *env, vaddr addr, int size,
1482                   MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
1483{
1484    CPUTLBEntryFull *full;
1485    void *host;
1486    int flags;
1487
1488    g_assert(-(addr | TARGET_PAGE_MASK) >= size);
1489
1490    flags = probe_access_internal(env_cpu(env), addr, size, access_type,
1491                                  mmu_idx, false, &host, &full, retaddr,
1492                                  true);
1493
1494    /* Per the interface, size == 0 merely faults the access. */
1495    if (size == 0) {
1496        return NULL;
1497    }
1498
1499    if (unlikely(flags & (TLB_NOTDIRTY | TLB_WATCHPOINT))) {
1500        /* Handle watchpoints.  */
1501        if (flags & TLB_WATCHPOINT) {
1502            int wp_access = (access_type == MMU_DATA_STORE
1503                             ? BP_MEM_WRITE : BP_MEM_READ);
1504            cpu_check_watchpoint(env_cpu(env), addr, size,
1505                                 full->attrs, wp_access, retaddr);
1506        }
1507
1508        /* Handle clean RAM pages.  */
1509        if (flags & TLB_NOTDIRTY) {
1510            notdirty_write(env_cpu(env), addr, size, full, retaddr);
1511        }
1512    }
1513
1514    return host;
1515}
1516
1517void *tlb_vaddr_to_host(CPUArchState *env, vaddr addr,
1518                        MMUAccessType access_type, int mmu_idx)
1519{
1520    CPUTLBEntryFull *full;
1521    void *host;
1522    int flags;
1523
1524    flags = probe_access_internal(env_cpu(env), addr, 0, access_type,
1525                                  mmu_idx, true, &host, &full, 0, false);
1526
1527    /* No combination of flags are expected by the caller. */
1528    return flags ? NULL : host;
1529}
1530
1531/*
1532 * Return a ram_addr_t for the virtual address for execution.
1533 *
1534 * Return -1 if we can't translate and execute from an entire page
1535 * of RAM.  This will force us to execute by loading and translating
1536 * one insn at a time, without caching.
1537 *
1538 * NOTE: This function will trigger an exception if the page is
1539 * not executable.
1540 */
1541tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, vaddr addr,
1542                                        void **hostp)
1543{
1544    CPUTLBEntryFull *full;
1545    void *p;
1546
1547    (void)probe_access_internal(env_cpu(env), addr, 1, MMU_INST_FETCH,
1548                                cpu_mmu_index(env_cpu(env), true), false,
1549                                &p, &full, 0, false);
1550    if (p == NULL) {
1551        return -1;
1552    }
1553
1554    if (full->lg_page_size < TARGET_PAGE_BITS) {
1555        return -1;
1556    }
1557
1558    if (hostp) {
1559        *hostp = p;
1560    }
1561    return qemu_ram_addr_from_host_nofail(p);
1562}
1563
1564/* Load/store with atomicity primitives. */
1565#include "ldst_atomicity.c.inc"
1566
1567#ifdef CONFIG_PLUGIN
1568/*
1569 * Perform a TLB lookup and populate the qemu_plugin_hwaddr structure.
1570 * This should be a hot path as we will have just looked this path up
1571 * in the softmmu lookup code (or helper). We don't handle re-fills or
1572 * checking the victim table. This is purely informational.
1573 *
1574 * The one corner case is i/o write, which can cause changes to the
1575 * address space.  Those changes, and the corresponding tlb flush,
1576 * should be delayed until the next TB, so even then this ought not fail.
1577 * But check, Just in Case.
1578 */
1579bool tlb_plugin_lookup(CPUState *cpu, vaddr addr, int mmu_idx,
1580                       bool is_store, struct qemu_plugin_hwaddr *data)
1581{
1582    CPUTLBEntry *tlbe = tlb_entry(cpu, mmu_idx, addr);
1583    uintptr_t index = tlb_index(cpu, mmu_idx, addr);
1584    MMUAccessType access_type = is_store ? MMU_DATA_STORE : MMU_DATA_LOAD;
1585    uint64_t tlb_addr = tlb_read_idx(tlbe, access_type);
1586    CPUTLBEntryFull *full;
1587
1588    if (unlikely(!tlb_hit(tlb_addr, addr))) {
1589        return false;
1590    }
1591
1592    full = &cpu->neg.tlb.d[mmu_idx].fulltlb[index];
1593    data->phys_addr = full->phys_addr | (addr & ~TARGET_PAGE_MASK);
1594
1595    /* We must have an iotlb entry for MMIO */
1596    if (tlb_addr & TLB_MMIO) {
1597        MemoryRegionSection *section =
1598            iotlb_to_section(cpu, full->xlat_section & ~TARGET_PAGE_MASK,
1599                             full->attrs);
1600        data->is_io = true;
1601        data->mr = section->mr;
1602    } else {
1603        data->is_io = false;
1604        data->mr = NULL;
1605    }
1606    return true;
1607}
1608#endif
1609
1610/*
1611 * Probe for a load/store operation.
1612 * Return the host address and into @flags.
1613 */
1614
1615typedef struct MMULookupPageData {
1616    CPUTLBEntryFull *full;
1617    void *haddr;
1618    vaddr addr;
1619    int flags;
1620    int size;
1621} MMULookupPageData;
1622
1623typedef struct MMULookupLocals {
1624    MMULookupPageData page[2];
1625    MemOp memop;
1626    int mmu_idx;
1627} MMULookupLocals;
1628
1629/**
1630 * mmu_lookup1: translate one page
1631 * @cpu: generic cpu state
1632 * @data: lookup parameters
1633 * @memop: memory operation for the access, or 0
1634 * @mmu_idx: virtual address context
1635 * @access_type: load/store/code
1636 * @ra: return address into tcg generated code, or 0
1637 *
1638 * Resolve the translation for the one page at @data.addr, filling in
1639 * the rest of @data with the results.  If the translation fails,
1640 * tlb_fill_align will longjmp out.  Return true if the softmmu tlb for
1641 * @mmu_idx may have resized.
1642 */
1643static bool mmu_lookup1(CPUState *cpu, MMULookupPageData *data, MemOp memop,
1644                        int mmu_idx, MMUAccessType access_type, uintptr_t ra)
1645{
1646    vaddr addr = data->addr;
1647    uintptr_t index = tlb_index(cpu, mmu_idx, addr);
1648    CPUTLBEntry *entry = tlb_entry(cpu, mmu_idx, addr);
1649    uint64_t tlb_addr = tlb_read_idx(entry, access_type);
1650    bool maybe_resized = false;
1651    CPUTLBEntryFull *full;
1652    int flags;
1653
1654    /* If the TLB entry is for a different page, reload and try again.  */
1655    if (!tlb_hit(tlb_addr, addr)) {
1656        if (!victim_tlb_hit(cpu, mmu_idx, index, access_type,
1657                            addr & TARGET_PAGE_MASK)) {
1658            tlb_fill_align(cpu, addr, access_type, mmu_idx,
1659                           memop, data->size, false, ra);
1660            maybe_resized = true;
1661            index = tlb_index(cpu, mmu_idx, addr);
1662            entry = tlb_entry(cpu, mmu_idx, addr);
1663        }
1664        tlb_addr = tlb_read_idx(entry, access_type) & ~TLB_INVALID_MASK;
1665    }
1666
1667    full = &cpu->neg.tlb.d[mmu_idx].fulltlb[index];
1668    flags = tlb_addr & (TLB_FLAGS_MASK & ~TLB_FORCE_SLOW);
1669    flags |= full->slow_flags[access_type];
1670
1671    if (likely(!maybe_resized)) {
1672        /* Alignment has not been checked by tlb_fill_align. */
1673        int a_bits = memop_alignment_bits(memop);
1674
1675        /*
1676         * This alignment check differs from the one above, in that this is
1677         * based on the atomicity of the operation. The intended use case is
1678         * the ARM memory type field of each PTE, where access to pages with
1679         * Device memory type require alignment.
1680         */
1681        if (unlikely(flags & TLB_CHECK_ALIGNED)) {
1682            int at_bits = memop_atomicity_bits(memop);
1683            a_bits = MAX(a_bits, at_bits);
1684        }
1685        if (unlikely(addr & ((1 << a_bits) - 1))) {
1686            cpu_unaligned_access(cpu, addr, access_type, mmu_idx, ra);
1687        }
1688    }
1689
1690    data->full = full;
1691    data->flags = flags;
1692    /* Compute haddr speculatively; depending on flags it might be invalid. */
1693    data->haddr = (void *)((uintptr_t)addr + entry->addend);
1694
1695    return maybe_resized;
1696}
1697
1698/**
1699 * mmu_watch_or_dirty
1700 * @cpu: generic cpu state
1701 * @data: lookup parameters
1702 * @access_type: load/store/code
1703 * @ra: return address into tcg generated code, or 0
1704 *
1705 * Trigger watchpoints for @data.addr:@data.size;
1706 * record writes to protected clean pages.
1707 */
1708static void mmu_watch_or_dirty(CPUState *cpu, MMULookupPageData *data,
1709                               MMUAccessType access_type, uintptr_t ra)
1710{
1711    CPUTLBEntryFull *full = data->full;
1712    vaddr addr = data->addr;
1713    int flags = data->flags;
1714    int size = data->size;
1715
1716    /* On watchpoint hit, this will longjmp out.  */
1717    if (flags & TLB_WATCHPOINT) {
1718        int wp = access_type == MMU_DATA_STORE ? BP_MEM_WRITE : BP_MEM_READ;
1719        cpu_check_watchpoint(cpu, addr, size, full->attrs, wp, ra);
1720        flags &= ~TLB_WATCHPOINT;
1721    }
1722
1723    /* Note that notdirty is only set for writes. */
1724    if (flags & TLB_NOTDIRTY) {
1725        notdirty_write(cpu, addr, size, full, ra);
1726        flags &= ~TLB_NOTDIRTY;
1727    }
1728    data->flags = flags;
1729}
1730
1731/**
1732 * mmu_lookup: translate page(s)
1733 * @cpu: generic cpu state
1734 * @addr: virtual address
1735 * @oi: combined mmu_idx and MemOp
1736 * @ra: return address into tcg generated code, or 0
1737 * @access_type: load/store/code
1738 * @l: output result
1739 *
1740 * Resolve the translation for the page(s) beginning at @addr, for MemOp.size
1741 * bytes.  Return true if the lookup crosses a page boundary.
1742 */
1743static bool mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi,
1744                       uintptr_t ra, MMUAccessType type, MMULookupLocals *l)
1745{
1746    bool crosspage;
1747    int flags;
1748
1749    l->memop = get_memop(oi);
1750    l->mmu_idx = get_mmuidx(oi);
1751
1752    tcg_debug_assert(l->mmu_idx < NB_MMU_MODES);
1753
1754    l->page[0].addr = addr;
1755    l->page[0].size = memop_size(l->memop);
1756    l->page[1].addr = (addr + l->page[0].size - 1) & TARGET_PAGE_MASK;
1757    l->page[1].size = 0;
1758    crosspage = (addr ^ l->page[1].addr) & TARGET_PAGE_MASK;
1759
1760    if (likely(!crosspage)) {
1761        mmu_lookup1(cpu, &l->page[0], l->memop, l->mmu_idx, type, ra);
1762
1763        flags = l->page[0].flags;
1764        if (unlikely(flags & (TLB_WATCHPOINT | TLB_NOTDIRTY))) {
1765            mmu_watch_or_dirty(cpu, &l->page[0], type, ra);
1766        }
1767        if (unlikely(flags & TLB_BSWAP)) {
1768            l->memop ^= MO_BSWAP;
1769        }
1770    } else {
1771        /* Finish compute of page crossing. */
1772        int size0 = l->page[1].addr - addr;
1773        l->page[1].size = l->page[0].size - size0;
1774        l->page[0].size = size0;
1775
1776        l->page[1].addr = cpu->cc->tcg_ops->pointer_wrap(cpu, l->mmu_idx,
1777                                                         l->page[1].addr, addr);
1778
1779        /*
1780         * Lookup both pages, recognizing exceptions from either.  If the
1781         * second lookup potentially resized, refresh first CPUTLBEntryFull.
1782         */
1783        mmu_lookup1(cpu, &l->page[0], l->memop, l->mmu_idx, type, ra);
1784        if (mmu_lookup1(cpu, &l->page[1], 0, l->mmu_idx, type, ra)) {
1785            uintptr_t index = tlb_index(cpu, l->mmu_idx, addr);
1786            l->page[0].full = &cpu->neg.tlb.d[l->mmu_idx].fulltlb[index];
1787        }
1788
1789        flags = l->page[0].flags | l->page[1].flags;
1790        if (unlikely(flags & (TLB_WATCHPOINT | TLB_NOTDIRTY))) {
1791            mmu_watch_or_dirty(cpu, &l->page[0], type, ra);
1792            mmu_watch_or_dirty(cpu, &l->page[1], type, ra);
1793        }
1794
1795        /*
1796         * Since target/sparc is the only user of TLB_BSWAP, and all
1797         * Sparc accesses are aligned, any treatment across two pages
1798         * would be arbitrary.  Refuse it until there's a use.
1799         */
1800        tcg_debug_assert((flags & TLB_BSWAP) == 0);
1801    }
1802
1803    return crosspage;
1804}
1805
1806/*
1807 * Probe for an atomic operation.  Do not allow unaligned operations,
1808 * or io operations to proceed.  Return the host address.
1809 */
1810static void *atomic_mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi,
1811                               int size, uintptr_t retaddr)
1812{
1813    uintptr_t mmu_idx = get_mmuidx(oi);
1814    MemOp mop = get_memop(oi);
1815    uintptr_t index;
1816    CPUTLBEntry *tlbe;
1817    vaddr tlb_addr;
1818    void *hostaddr;
1819    CPUTLBEntryFull *full;
1820    bool did_tlb_fill = false;
1821
1822    tcg_debug_assert(mmu_idx < NB_MMU_MODES);
1823
1824    /* Adjust the given return address.  */
1825    retaddr -= GETPC_ADJ;
1826
1827    index = tlb_index(cpu, mmu_idx, addr);
1828    tlbe = tlb_entry(cpu, mmu_idx, addr);
1829
1830    /* Check TLB entry and enforce page permissions.  */
1831    tlb_addr = tlb_addr_write(tlbe);
1832    if (!tlb_hit(tlb_addr, addr)) {
1833        if (!victim_tlb_hit(cpu, mmu_idx, index, MMU_DATA_STORE,
1834                            addr & TARGET_PAGE_MASK)) {
1835            tlb_fill_align(cpu, addr, MMU_DATA_STORE, mmu_idx,
1836                           mop, size, false, retaddr);
1837            did_tlb_fill = true;
1838            index = tlb_index(cpu, mmu_idx, addr);
1839            tlbe = tlb_entry(cpu, mmu_idx, addr);
1840        }
1841        tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK;
1842    }
1843
1844    /*
1845     * Let the guest notice RMW on a write-only page.
1846     * We have just verified that the page is writable.
1847     * Subpage lookups may have left TLB_INVALID_MASK set,
1848     * but addr_read will only be -1 if PAGE_READ was unset.
1849     */
1850    if (unlikely(tlbe->addr_read == -1)) {
1851        tlb_fill_align(cpu, addr, MMU_DATA_LOAD, mmu_idx,
1852                       0, size, false, retaddr);
1853        /*
1854         * Since we don't support reads and writes to different
1855         * addresses, and we do have the proper page loaded for
1856         * write, this shouldn't ever return.
1857         */
1858        g_assert_not_reached();
1859    }
1860
1861    /* Enforce guest required alignment, if not handled by tlb_fill_align. */
1862    if (!did_tlb_fill && (addr & ((1 << memop_alignment_bits(mop)) - 1))) {
1863        cpu_unaligned_access(cpu, addr, MMU_DATA_STORE, mmu_idx, retaddr);
1864    }
1865
1866    /* Enforce qemu required alignment.  */
1867    if (unlikely(addr & (size - 1))) {
1868        /*
1869         * We get here if guest alignment was not requested, or was not
1870         * enforced by cpu_unaligned_access or tlb_fill_align above.
1871         * We might widen the access and emulate, but for now
1872         * mark an exception and exit the cpu loop.
1873         */
1874        goto stop_the_world;
1875    }
1876
1877    /* Finish collecting tlb flags for both read and write. */
1878    full = &cpu->neg.tlb.d[mmu_idx].fulltlb[index];
1879    tlb_addr |= tlbe->addr_read;
1880    tlb_addr &= TLB_FLAGS_MASK & ~TLB_FORCE_SLOW;
1881    tlb_addr |= full->slow_flags[MMU_DATA_STORE];
1882    tlb_addr |= full->slow_flags[MMU_DATA_LOAD];
1883
1884    /* Notice an IO access or a needs-MMU-lookup access */
1885    if (unlikely(tlb_addr & (TLB_MMIO | TLB_DISCARD_WRITE))) {
1886        /* There's really nothing that can be done to
1887           support this apart from stop-the-world.  */
1888        goto stop_the_world;
1889    }
1890
1891    hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
1892
1893    if (unlikely(tlb_addr & TLB_NOTDIRTY)) {
1894        notdirty_write(cpu, addr, size, full, retaddr);
1895    }
1896
1897    if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
1898        int wp_flags = 0;
1899
1900        if (full->slow_flags[MMU_DATA_STORE] & TLB_WATCHPOINT) {
1901            wp_flags |= BP_MEM_WRITE;
1902        }
1903        if (full->slow_flags[MMU_DATA_LOAD] & TLB_WATCHPOINT) {
1904            wp_flags |= BP_MEM_READ;
1905        }
1906        cpu_check_watchpoint(cpu, addr, size,
1907                             full->attrs, wp_flags, retaddr);
1908    }
1909
1910    return hostaddr;
1911
1912 stop_the_world:
1913    cpu_loop_exit_atomic(cpu, retaddr);
1914}
1915
1916/*
1917 * Load Helpers
1918 *
1919 * We support two different access types. SOFTMMU_CODE_ACCESS is
1920 * specifically for reading instructions from system memory. It is
1921 * called by the translation loop and in some helpers where the code
1922 * is disassembled. It shouldn't be called directly by guest code.
1923 *
1924 * For the benefit of TCG generated code, we want to avoid the
1925 * complication of ABI-specific return type promotion and always
1926 * return a value extended to the register size of the host. This is
1927 * tcg_target_long, except in the case of a 32-bit host and 64-bit
1928 * data, and for that we always have uint64_t.
1929 *
1930 * We don't bother with this widened value for SOFTMMU_CODE_ACCESS.
1931 */
1932
1933/**
1934 * do_ld_mmio_beN:
1935 * @cpu: generic cpu state
1936 * @full: page parameters
1937 * @ret_be: accumulated data
1938 * @addr: virtual address
1939 * @size: number of bytes
1940 * @mmu_idx: virtual address context
1941 * @ra: return address into tcg generated code, or 0
1942 * Context: BQL held
1943 *
1944 * Load @size bytes from @addr, which is memory-mapped i/o.
1945 * The bytes are concatenated in big-endian order with @ret_be.
1946 */
1947static uint64_t int_ld_mmio_beN(CPUState *cpu, CPUTLBEntryFull *full,
1948                                uint64_t ret_be, vaddr addr, int size,
1949                                int mmu_idx, MMUAccessType type, uintptr_t ra,
1950                                MemoryRegion *mr, hwaddr mr_offset)
1951{
1952    do {
1953        MemOp this_mop;
1954        unsigned this_size;
1955        uint64_t val;
1956        MemTxResult r;
1957
1958        /* Read aligned pieces up to 8 bytes. */
1959        this_mop = ctz32(size | (int)addr | 8);
1960        this_size = 1 << this_mop;
1961        this_mop |= MO_BE;
1962
1963        r = memory_region_dispatch_read(mr, mr_offset, &val,
1964                                        this_mop, full->attrs);
1965        if (unlikely(r != MEMTX_OK)) {
1966            io_failed(cpu, full, addr, this_size, type, mmu_idx, r, ra);
1967        }
1968        if (this_size == 8) {
1969            return val;
1970        }
1971
1972        ret_be = (ret_be << (this_size * 8)) | val;
1973        addr += this_size;
1974        mr_offset += this_size;
1975        size -= this_size;
1976    } while (size);
1977
1978    return ret_be;
1979}
1980
1981static uint64_t do_ld_mmio_beN(CPUState *cpu, CPUTLBEntryFull *full,
1982                               uint64_t ret_be, vaddr addr, int size,
1983                               int mmu_idx, MMUAccessType type, uintptr_t ra)
1984{
1985    MemoryRegionSection *section;
1986    MemoryRegion *mr;
1987    hwaddr mr_offset;
1988    MemTxAttrs attrs;
1989
1990    tcg_debug_assert(size > 0 && size <= 8);
1991
1992    attrs = full->attrs;
1993    section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, ra);
1994    mr = section->mr;
1995
1996    BQL_LOCK_GUARD();
1997    return int_ld_mmio_beN(cpu, full, ret_be, addr, size, mmu_idx,
1998                           type, ra, mr, mr_offset);
1999}
2000
2001static Int128 do_ld16_mmio_beN(CPUState *cpu, CPUTLBEntryFull *full,
2002                               uint64_t ret_be, vaddr addr, int size,
2003                               int mmu_idx, uintptr_t ra)
2004{
2005    MemoryRegionSection *section;
2006    MemoryRegion *mr;
2007    hwaddr mr_offset;
2008    MemTxAttrs attrs;
2009    uint64_t a, b;
2010
2011    tcg_debug_assert(size > 8 && size <= 16);
2012
2013    attrs = full->attrs;
2014    section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, ra);
2015    mr = section->mr;
2016
2017    BQL_LOCK_GUARD();
2018    a = int_ld_mmio_beN(cpu, full, ret_be, addr, size - 8, mmu_idx,
2019                        MMU_DATA_LOAD, ra, mr, mr_offset);
2020    b = int_ld_mmio_beN(cpu, full, ret_be, addr + size - 8, 8, mmu_idx,
2021                        MMU_DATA_LOAD, ra, mr, mr_offset + size - 8);
2022    return int128_make128(b, a);
2023}
2024
2025/**
2026 * do_ld_bytes_beN
2027 * @p: translation parameters
2028 * @ret_be: accumulated data
2029 *
2030 * Load @p->size bytes from @p->haddr, which is RAM.
2031 * The bytes to concatenated in big-endian order with @ret_be.
2032 */
2033static uint64_t do_ld_bytes_beN(MMULookupPageData *p, uint64_t ret_be)
2034{
2035    uint8_t *haddr = p->haddr;
2036    int i, size = p->size;
2037
2038    for (i = 0; i < size; i++) {
2039        ret_be = (ret_be << 8) | haddr[i];
2040    }
2041    return ret_be;
2042}
2043
2044/**
2045 * do_ld_parts_beN
2046 * @p: translation parameters
2047 * @ret_be: accumulated data
2048 *
2049 * As do_ld_bytes_beN, but atomically on each aligned part.
2050 */
2051static uint64_t do_ld_parts_beN(MMULookupPageData *p, uint64_t ret_be)
2052{
2053    void *haddr = p->haddr;
2054    int size = p->size;
2055
2056    do {
2057        uint64_t x;
2058        int n;
2059
2060        /*
2061         * Find minimum of alignment and size.
2062         * This is slightly stronger than required by MO_ATOM_SUBALIGN, which
2063         * would have only checked the low bits of addr|size once at the start,
2064         * but is just as easy.
2065         */
2066        switch (((uintptr_t)haddr | size) & 7) {
2067        case 4:
2068            x = cpu_to_be32(load_atomic4(haddr));
2069            ret_be = (ret_be << 32) | x;
2070            n = 4;
2071            break;
2072        case 2:
2073        case 6:
2074            x = cpu_to_be16(load_atomic2(haddr));
2075            ret_be = (ret_be << 16) | x;
2076            n = 2;
2077            break;
2078        default:
2079            x = *(uint8_t *)haddr;
2080            ret_be = (ret_be << 8) | x;
2081            n = 1;
2082            break;
2083        case 0:
2084            g_assert_not_reached();
2085        }
2086        haddr += n;
2087        size -= n;
2088    } while (size != 0);
2089    return ret_be;
2090}
2091
2092/**
2093 * do_ld_parts_be4
2094 * @p: translation parameters
2095 * @ret_be: accumulated data
2096 *
2097 * As do_ld_bytes_beN, but with one atomic load.
2098 * Four aligned bytes are guaranteed to cover the load.
2099 */
2100static uint64_t do_ld_whole_be4(MMULookupPageData *p, uint64_t ret_be)
2101{
2102    int o = p->addr & 3;
2103    uint32_t x = load_atomic4(p->haddr - o);
2104
2105    x = cpu_to_be32(x);
2106    x <<= o * 8;
2107    x >>= (4 - p->size) * 8;
2108    return (ret_be << (p->size * 8)) | x;
2109}
2110
2111/**
2112 * do_ld_parts_be8
2113 * @p: translation parameters
2114 * @ret_be: accumulated data
2115 *
2116 * As do_ld_bytes_beN, but with one atomic load.
2117 * Eight aligned bytes are guaranteed to cover the load.
2118 */
2119static uint64_t do_ld_whole_be8(CPUState *cpu, uintptr_t ra,
2120                                MMULookupPageData *p, uint64_t ret_be)
2121{
2122    int o = p->addr & 7;
2123    uint64_t x = load_atomic8_or_exit(cpu, ra, p->haddr - o);
2124
2125    x = cpu_to_be64(x);
2126    x <<= o * 8;
2127    x >>= (8 - p->size) * 8;
2128    return (ret_be << (p->size * 8)) | x;
2129}
2130
2131/**
2132 * do_ld_parts_be16
2133 * @p: translation parameters
2134 * @ret_be: accumulated data
2135 *
2136 * As do_ld_bytes_beN, but with one atomic load.
2137 * 16 aligned bytes are guaranteed to cover the load.
2138 */
2139static Int128 do_ld_whole_be16(CPUState *cpu, uintptr_t ra,
2140                               MMULookupPageData *p, uint64_t ret_be)
2141{
2142    int o = p->addr & 15;
2143    Int128 x, y = load_atomic16_or_exit(cpu, ra, p->haddr - o);
2144    int size = p->size;
2145
2146    if (!HOST_BIG_ENDIAN) {
2147        y = bswap128(y);
2148    }
2149    y = int128_lshift(y, o * 8);
2150    y = int128_urshift(y, (16 - size) * 8);
2151    x = int128_make64(ret_be);
2152    x = int128_lshift(x, size * 8);
2153    return int128_or(x, y);
2154}
2155
2156/*
2157 * Wrapper for the above.
2158 */
2159static uint64_t do_ld_beN(CPUState *cpu, MMULookupPageData *p,
2160                          uint64_t ret_be, int mmu_idx, MMUAccessType type,
2161                          MemOp mop, uintptr_t ra)
2162{
2163    MemOp atom;
2164    unsigned tmp, half_size;
2165
2166    if (unlikely(p->flags & TLB_MMIO)) {
2167        return do_ld_mmio_beN(cpu, p->full, ret_be, p->addr, p->size,
2168                              mmu_idx, type, ra);
2169    }
2170
2171    /*
2172     * It is a given that we cross a page and therefore there is no
2173     * atomicity for the load as a whole, but subobjects may need attention.
2174     */
2175    atom = mop & MO_ATOM_MASK;
2176    switch (atom) {
2177    case MO_ATOM_SUBALIGN:
2178        return do_ld_parts_beN(p, ret_be);
2179
2180    case MO_ATOM_IFALIGN_PAIR:
2181    case MO_ATOM_WITHIN16_PAIR:
2182        tmp = mop & MO_SIZE;
2183        tmp = tmp ? tmp - 1 : 0;
2184        half_size = 1 << tmp;
2185        if (atom == MO_ATOM_IFALIGN_PAIR
2186            ? p->size == half_size
2187            : p->size >= half_size) {
2188            if (!HAVE_al8_fast && p->size < 4) {
2189                return do_ld_whole_be4(p, ret_be);
2190            } else {
2191                return do_ld_whole_be8(cpu, ra, p, ret_be);
2192            }
2193        }
2194        /* fall through */
2195
2196    case MO_ATOM_IFALIGN:
2197    case MO_ATOM_WITHIN16:
2198    case MO_ATOM_NONE:
2199        return do_ld_bytes_beN(p, ret_be);
2200
2201    default:
2202        g_assert_not_reached();
2203    }
2204}
2205
2206/*
2207 * Wrapper for the above, for 8 < size < 16.
2208 */
2209static Int128 do_ld16_beN(CPUState *cpu, MMULookupPageData *p,
2210                          uint64_t a, int mmu_idx, MemOp mop, uintptr_t ra)
2211{
2212    int size = p->size;
2213    uint64_t b;
2214    MemOp atom;
2215
2216    if (unlikely(p->flags & TLB_MMIO)) {
2217        return do_ld16_mmio_beN(cpu, p->full, a, p->addr, size, mmu_idx, ra);
2218    }
2219
2220    /*
2221     * It is a given that we cross a page and therefore there is no
2222     * atomicity for the load as a whole, but subobjects may need attention.
2223     */
2224    atom = mop & MO_ATOM_MASK;
2225    switch (atom) {
2226    case MO_ATOM_SUBALIGN:
2227        p->size = size - 8;
2228        a = do_ld_parts_beN(p, a);
2229        p->haddr += size - 8;
2230        p->size = 8;
2231        b = do_ld_parts_beN(p, 0);
2232        break;
2233
2234    case MO_ATOM_WITHIN16_PAIR:
2235        /* Since size > 8, this is the half that must be atomic. */
2236        return do_ld_whole_be16(cpu, ra, p, a);
2237
2238    case MO_ATOM_IFALIGN_PAIR:
2239        /*
2240         * Since size > 8, both halves are misaligned,
2241         * and so neither is atomic.
2242         */
2243    case MO_ATOM_IFALIGN:
2244    case MO_ATOM_WITHIN16:
2245    case MO_ATOM_NONE:
2246        p->size = size - 8;
2247        a = do_ld_bytes_beN(p, a);
2248        b = ldq_be_p(p->haddr + size - 8);
2249        break;
2250
2251    default:
2252        g_assert_not_reached();
2253    }
2254
2255    return int128_make128(b, a);
2256}
2257
2258static uint8_t do_ld_1(CPUState *cpu, MMULookupPageData *p, int mmu_idx,
2259                       MMUAccessType type, uintptr_t ra)
2260{
2261    if (unlikely(p->flags & TLB_MMIO)) {
2262        return do_ld_mmio_beN(cpu, p->full, 0, p->addr, 1, mmu_idx, type, ra);
2263    } else {
2264        return *(uint8_t *)p->haddr;
2265    }
2266}
2267
2268static uint16_t do_ld_2(CPUState *cpu, MMULookupPageData *p, int mmu_idx,
2269                        MMUAccessType type, MemOp memop, uintptr_t ra)
2270{
2271    uint16_t ret;
2272
2273    if (unlikely(p->flags & TLB_MMIO)) {
2274        ret = do_ld_mmio_beN(cpu, p->full, 0, p->addr, 2, mmu_idx, type, ra);
2275        if ((memop & MO_BSWAP) == MO_LE) {
2276            ret = bswap16(ret);
2277        }
2278    } else {
2279        /* Perform the load host endian, then swap if necessary. */
2280        ret = load_atom_2(cpu, ra, p->haddr, memop);
2281        if (memop & MO_BSWAP) {
2282            ret = bswap16(ret);
2283        }
2284    }
2285    return ret;
2286}
2287
2288static uint32_t do_ld_4(CPUState *cpu, MMULookupPageData *p, int mmu_idx,
2289                        MMUAccessType type, MemOp memop, uintptr_t ra)
2290{
2291    uint32_t ret;
2292
2293    if (unlikely(p->flags & TLB_MMIO)) {
2294        ret = do_ld_mmio_beN(cpu, p->full, 0, p->addr, 4, mmu_idx, type, ra);
2295        if ((memop & MO_BSWAP) == MO_LE) {
2296            ret = bswap32(ret);
2297        }
2298    } else {
2299        /* Perform the load host endian. */
2300        ret = load_atom_4(cpu, ra, p->haddr, memop);
2301        if (memop & MO_BSWAP) {
2302            ret = bswap32(ret);
2303        }
2304    }
2305    return ret;
2306}
2307
2308static uint64_t do_ld_8(CPUState *cpu, MMULookupPageData *p, int mmu_idx,
2309                        MMUAccessType type, MemOp memop, uintptr_t ra)
2310{
2311    uint64_t ret;
2312
2313    if (unlikely(p->flags & TLB_MMIO)) {
2314        ret = do_ld_mmio_beN(cpu, p->full, 0, p->addr, 8, mmu_idx, type, ra);
2315        if ((memop & MO_BSWAP) == MO_LE) {
2316            ret = bswap64(ret);
2317        }
2318    } else {
2319        /* Perform the load host endian. */
2320        ret = load_atom_8(cpu, ra, p->haddr, memop);
2321        if (memop & MO_BSWAP) {
2322            ret = bswap64(ret);
2323        }
2324    }
2325    return ret;
2326}
2327
2328static uint8_t do_ld1_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
2329                          uintptr_t ra, MMUAccessType access_type)
2330{
2331    MMULookupLocals l;
2332    bool crosspage;
2333
2334    cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD);
2335    crosspage = mmu_lookup(cpu, addr, oi, ra, access_type, &l);
2336    tcg_debug_assert(!crosspage);
2337
2338    return do_ld_1(cpu, &l.page[0], l.mmu_idx, access_type, ra);
2339}
2340
2341static uint16_t do_ld2_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
2342                           uintptr_t ra, MMUAccessType access_type)
2343{
2344    MMULookupLocals l;
2345    bool crosspage;
2346    uint16_t ret;
2347    uint8_t a, b;
2348
2349    cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD);
2350    crosspage = mmu_lookup(cpu, addr, oi, ra, access_type, &l);
2351    if (likely(!crosspage)) {
2352        return do_ld_2(cpu, &l.page[0], l.mmu_idx, access_type, l.memop, ra);
2353    }
2354
2355    a = do_ld_1(cpu, &l.page[0], l.mmu_idx, access_type, ra);
2356    b = do_ld_1(cpu, &l.page[1], l.mmu_idx, access_type, ra);
2357
2358    if ((l.memop & MO_BSWAP) == MO_LE) {
2359        ret = a | (b << 8);
2360    } else {
2361        ret = b | (a << 8);
2362    }
2363    return ret;
2364}
2365
2366static uint32_t do_ld4_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
2367                           uintptr_t ra, MMUAccessType access_type)
2368{
2369    MMULookupLocals l;
2370    bool crosspage;
2371    uint32_t ret;
2372
2373    cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD);
2374    crosspage = mmu_lookup(cpu, addr, oi, ra, access_type, &l);
2375    if (likely(!crosspage)) {
2376        return do_ld_4(cpu, &l.page[0], l.mmu_idx, access_type, l.memop, ra);
2377    }
2378
2379    ret = do_ld_beN(cpu, &l.page[0], 0, l.mmu_idx, access_type, l.memop, ra);
2380    ret = do_ld_beN(cpu, &l.page[1], ret, l.mmu_idx, access_type, l.memop, ra);
2381    if ((l.memop & MO_BSWAP) == MO_LE) {
2382        ret = bswap32(ret);
2383    }
2384    return ret;
2385}
2386
2387static uint64_t do_ld8_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
2388                           uintptr_t ra, MMUAccessType access_type)
2389{
2390    MMULookupLocals l;
2391    bool crosspage;
2392    uint64_t ret;
2393
2394    cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD);
2395    crosspage = mmu_lookup(cpu, addr, oi, ra, access_type, &l);
2396    if (likely(!crosspage)) {
2397        return do_ld_8(cpu, &l.page[0], l.mmu_idx, access_type, l.memop, ra);
2398    }
2399
2400    ret = do_ld_beN(cpu, &l.page[0], 0, l.mmu_idx, access_type, l.memop, ra);
2401    ret = do_ld_beN(cpu, &l.page[1], ret, l.mmu_idx, access_type, l.memop, ra);
2402    if ((l.memop & MO_BSWAP) == MO_LE) {
2403        ret = bswap64(ret);
2404    }
2405    return ret;
2406}
2407
2408static Int128 do_ld16_mmu(CPUState *cpu, vaddr addr,
2409                          MemOpIdx oi, uintptr_t ra)
2410{
2411    MMULookupLocals l;
2412    bool crosspage;
2413    uint64_t a, b;
2414    Int128 ret;
2415    int first;
2416
2417    cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD);
2418    crosspage = mmu_lookup(cpu, addr, oi, ra, MMU_DATA_LOAD, &l);
2419    if (likely(!crosspage)) {
2420        if (unlikely(l.page[0].flags & TLB_MMIO)) {
2421            ret = do_ld16_mmio_beN(cpu, l.page[0].full, 0, addr, 16,
2422                                   l.mmu_idx, ra);
2423            if ((l.memop & MO_BSWAP) == MO_LE) {
2424                ret = bswap128(ret);
2425            }
2426        } else {
2427            /* Perform the load host endian. */
2428            ret = load_atom_16(cpu, ra, l.page[0].haddr, l.memop);
2429            if (l.memop & MO_BSWAP) {
2430                ret = bswap128(ret);
2431            }
2432        }
2433        return ret;
2434    }
2435
2436    first = l.page[0].size;
2437    if (first == 8) {
2438        MemOp mop8 = (l.memop & ~MO_SIZE) | MO_64;
2439
2440        a = do_ld_8(cpu, &l.page[0], l.mmu_idx, MMU_DATA_LOAD, mop8, ra);
2441        b = do_ld_8(cpu, &l.page[1], l.mmu_idx, MMU_DATA_LOAD, mop8, ra);
2442        if ((mop8 & MO_BSWAP) == MO_LE) {
2443            ret = int128_make128(a, b);
2444        } else {
2445            ret = int128_make128(b, a);
2446        }
2447        return ret;
2448    }
2449
2450    if (first < 8) {
2451        a = do_ld_beN(cpu, &l.page[0], 0, l.mmu_idx,
2452                      MMU_DATA_LOAD, l.memop, ra);
2453        ret = do_ld16_beN(cpu, &l.page[1], a, l.mmu_idx, l.memop, ra);
2454    } else {
2455        ret = do_ld16_beN(cpu, &l.page[0], 0, l.mmu_idx, l.memop, ra);
2456        b = int128_getlo(ret);
2457        ret = int128_lshift(ret, l.page[1].size * 8);
2458        a = int128_gethi(ret);
2459        b = do_ld_beN(cpu, &l.page[1], b, l.mmu_idx,
2460                      MMU_DATA_LOAD, l.memop, ra);
2461        ret = int128_make128(b, a);
2462    }
2463    if ((l.memop & MO_BSWAP) == MO_LE) {
2464        ret = bswap128(ret);
2465    }
2466    return ret;
2467}
2468
2469/*
2470 * Store Helpers
2471 */
2472
2473/**
2474 * do_st_mmio_leN:
2475 * @cpu: generic cpu state
2476 * @full: page parameters
2477 * @val_le: data to store
2478 * @addr: virtual address
2479 * @size: number of bytes
2480 * @mmu_idx: virtual address context
2481 * @ra: return address into tcg generated code, or 0
2482 * Context: BQL held
2483 *
2484 * Store @size bytes at @addr, which is memory-mapped i/o.
2485 * The bytes to store are extracted in little-endian order from @val_le;
2486 * return the bytes of @val_le beyond @p->size that have not been stored.
2487 */
2488static uint64_t int_st_mmio_leN(CPUState *cpu, CPUTLBEntryFull *full,
2489                                uint64_t val_le, vaddr addr, int size,
2490                                int mmu_idx, uintptr_t ra,
2491                                MemoryRegion *mr, hwaddr mr_offset)
2492{
2493    do {
2494        MemOp this_mop;
2495        unsigned this_size;
2496        MemTxResult r;
2497
2498        /* Store aligned pieces up to 8 bytes. */
2499        this_mop = ctz32(size | (int)addr | 8);
2500        this_size = 1 << this_mop;
2501        this_mop |= MO_LE;
2502
2503        r = memory_region_dispatch_write(mr, mr_offset, val_le,
2504                                         this_mop, full->attrs);
2505        if (unlikely(r != MEMTX_OK)) {
2506            io_failed(cpu, full, addr, this_size, MMU_DATA_STORE,
2507                      mmu_idx, r, ra);
2508        }
2509        if (this_size == 8) {
2510            return 0;
2511        }
2512
2513        val_le >>= this_size * 8;
2514        addr += this_size;
2515        mr_offset += this_size;
2516        size -= this_size;
2517    } while (size);
2518
2519    return val_le;
2520}
2521
2522static uint64_t do_st_mmio_leN(CPUState *cpu, CPUTLBEntryFull *full,
2523                               uint64_t val_le, vaddr addr, int size,
2524                               int mmu_idx, uintptr_t ra)
2525{
2526    MemoryRegionSection *section;
2527    hwaddr mr_offset;
2528    MemoryRegion *mr;
2529    MemTxAttrs attrs;
2530
2531    tcg_debug_assert(size > 0 && size <= 8);
2532
2533    attrs = full->attrs;
2534    section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, ra);
2535    mr = section->mr;
2536
2537    BQL_LOCK_GUARD();
2538    return int_st_mmio_leN(cpu, full, val_le, addr, size, mmu_idx,
2539                           ra, mr, mr_offset);
2540}
2541
2542static uint64_t do_st16_mmio_leN(CPUState *cpu, CPUTLBEntryFull *full,
2543                                 Int128 val_le, vaddr addr, int size,
2544                                 int mmu_idx, uintptr_t ra)
2545{
2546    MemoryRegionSection *section;
2547    MemoryRegion *mr;
2548    hwaddr mr_offset;
2549    MemTxAttrs attrs;
2550
2551    tcg_debug_assert(size > 8 && size <= 16);
2552
2553    attrs = full->attrs;
2554    section = io_prepare(&mr_offset, cpu, full->xlat_section, attrs, addr, ra);
2555    mr = section->mr;
2556
2557    BQL_LOCK_GUARD();
2558    int_st_mmio_leN(cpu, full, int128_getlo(val_le), addr, 8,
2559                    mmu_idx, ra, mr, mr_offset);
2560    return int_st_mmio_leN(cpu, full, int128_gethi(val_le), addr + 8,
2561                           size - 8, mmu_idx, ra, mr, mr_offset + 8);
2562}
2563
2564/*
2565 * Wrapper for the above.
2566 */
2567static uint64_t do_st_leN(CPUState *cpu, MMULookupPageData *p,
2568                          uint64_t val_le, int mmu_idx,
2569                          MemOp mop, uintptr_t ra)
2570{
2571    MemOp atom;
2572    unsigned tmp, half_size;
2573
2574    if (unlikely(p->flags & TLB_MMIO)) {
2575        return do_st_mmio_leN(cpu, p->full, val_le, p->addr,
2576                              p->size, mmu_idx, ra);
2577    } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
2578        return val_le >> (p->size * 8);
2579    }
2580
2581    /*
2582     * It is a given that we cross a page and therefore there is no atomicity
2583     * for the store as a whole, but subobjects may need attention.
2584     */
2585    atom = mop & MO_ATOM_MASK;
2586    switch (atom) {
2587    case MO_ATOM_SUBALIGN:
2588        return store_parts_leN(p->haddr, p->size, val_le);
2589
2590    case MO_ATOM_IFALIGN_PAIR:
2591    case MO_ATOM_WITHIN16_PAIR:
2592        tmp = mop & MO_SIZE;
2593        tmp = tmp ? tmp - 1 : 0;
2594        half_size = 1 << tmp;
2595        if (atom == MO_ATOM_IFALIGN_PAIR
2596            ? p->size == half_size
2597            : p->size >= half_size) {
2598            if (!HAVE_al8_fast && p->size <= 4) {
2599                return store_whole_le4(p->haddr, p->size, val_le);
2600            } else if (HAVE_al8) {
2601                return store_whole_le8(p->haddr, p->size, val_le);
2602            } else {
2603                cpu_loop_exit_atomic(cpu, ra);
2604            }
2605        }
2606        /* fall through */
2607
2608    case MO_ATOM_IFALIGN:
2609    case MO_ATOM_WITHIN16:
2610    case MO_ATOM_NONE:
2611        return store_bytes_leN(p->haddr, p->size, val_le);
2612
2613    default:
2614        g_assert_not_reached();
2615    }
2616}
2617
2618/*
2619 * Wrapper for the above, for 8 < size < 16.
2620 */
2621static uint64_t do_st16_leN(CPUState *cpu, MMULookupPageData *p,
2622                            Int128 val_le, int mmu_idx,
2623                            MemOp mop, uintptr_t ra)
2624{
2625    int size = p->size;
2626    MemOp atom;
2627
2628    if (unlikely(p->flags & TLB_MMIO)) {
2629        return do_st16_mmio_leN(cpu, p->full, val_le, p->addr,
2630                                size, mmu_idx, ra);
2631    } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
2632        return int128_gethi(val_le) >> ((size - 8) * 8);
2633    }
2634
2635    /*
2636     * It is a given that we cross a page and therefore there is no atomicity
2637     * for the store as a whole, but subobjects may need attention.
2638     */
2639    atom = mop & MO_ATOM_MASK;
2640    switch (atom) {
2641    case MO_ATOM_SUBALIGN:
2642        store_parts_leN(p->haddr, 8, int128_getlo(val_le));
2643        return store_parts_leN(p->haddr + 8, p->size - 8,
2644                               int128_gethi(val_le));
2645
2646    case MO_ATOM_WITHIN16_PAIR:
2647        /* Since size > 8, this is the half that must be atomic. */
2648        if (!HAVE_CMPXCHG128) {
2649            cpu_loop_exit_atomic(cpu, ra);
2650        }
2651        return store_whole_le16(p->haddr, p->size, val_le);
2652
2653    case MO_ATOM_IFALIGN_PAIR:
2654        /*
2655         * Since size > 8, both halves are misaligned,
2656         * and so neither is atomic.
2657         */
2658    case MO_ATOM_IFALIGN:
2659    case MO_ATOM_WITHIN16:
2660    case MO_ATOM_NONE:
2661        stq_le_p(p->haddr, int128_getlo(val_le));
2662        return store_bytes_leN(p->haddr + 8, p->size - 8,
2663                               int128_gethi(val_le));
2664
2665    default:
2666        g_assert_not_reached();
2667    }
2668}
2669
2670static void do_st_1(CPUState *cpu, MMULookupPageData *p, uint8_t val,
2671                    int mmu_idx, uintptr_t ra)
2672{
2673    if (unlikely(p->flags & TLB_MMIO)) {
2674        do_st_mmio_leN(cpu, p->full, val, p->addr, 1, mmu_idx, ra);
2675    } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
2676        /* nothing */
2677    } else {
2678        *(uint8_t *)p->haddr = val;
2679    }
2680}
2681
2682static void do_st_2(CPUState *cpu, MMULookupPageData *p, uint16_t val,
2683                    int mmu_idx, MemOp memop, uintptr_t ra)
2684{
2685    if (unlikely(p->flags & TLB_MMIO)) {
2686        if ((memop & MO_BSWAP) != MO_LE) {
2687            val = bswap16(val);
2688        }
2689        do_st_mmio_leN(cpu, p->full, val, p->addr, 2, mmu_idx, ra);
2690    } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
2691        /* nothing */
2692    } else {
2693        /* Swap to host endian if necessary, then store. */
2694        if (memop & MO_BSWAP) {
2695            val = bswap16(val);
2696        }
2697        store_atom_2(cpu, ra, p->haddr, memop, val);
2698    }
2699}
2700
2701static void do_st_4(CPUState *cpu, MMULookupPageData *p, uint32_t val,
2702                    int mmu_idx, MemOp memop, uintptr_t ra)
2703{
2704    if (unlikely(p->flags & TLB_MMIO)) {
2705        if ((memop & MO_BSWAP) != MO_LE) {
2706            val = bswap32(val);
2707        }
2708        do_st_mmio_leN(cpu, p->full, val, p->addr, 4, mmu_idx, ra);
2709    } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
2710        /* nothing */
2711    } else {
2712        /* Swap to host endian if necessary, then store. */
2713        if (memop & MO_BSWAP) {
2714            val = bswap32(val);
2715        }
2716        store_atom_4(cpu, ra, p->haddr, memop, val);
2717    }
2718}
2719
2720static void do_st_8(CPUState *cpu, MMULookupPageData *p, uint64_t val,
2721                    int mmu_idx, MemOp memop, uintptr_t ra)
2722{
2723    if (unlikely(p->flags & TLB_MMIO)) {
2724        if ((memop & MO_BSWAP) != MO_LE) {
2725            val = bswap64(val);
2726        }
2727        do_st_mmio_leN(cpu, p->full, val, p->addr, 8, mmu_idx, ra);
2728    } else if (unlikely(p->flags & TLB_DISCARD_WRITE)) {
2729        /* nothing */
2730    } else {
2731        /* Swap to host endian if necessary, then store. */
2732        if (memop & MO_BSWAP) {
2733            val = bswap64(val);
2734        }
2735        store_atom_8(cpu, ra, p->haddr, memop, val);
2736    }
2737}
2738
2739static void do_st1_mmu(CPUState *cpu, vaddr addr, uint8_t val,
2740                       MemOpIdx oi, uintptr_t ra)
2741{
2742    MMULookupLocals l;
2743    bool crosspage;
2744
2745    cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST);
2746    crosspage = mmu_lookup(cpu, addr, oi, ra, MMU_DATA_STORE, &l);
2747    tcg_debug_assert(!crosspage);
2748
2749    do_st_1(cpu, &l.page[0], val, l.mmu_idx, ra);
2750}
2751
2752static void do_st2_mmu(CPUState *cpu, vaddr addr, uint16_t val,
2753                       MemOpIdx oi, uintptr_t ra)
2754{
2755    MMULookupLocals l;
2756    bool crosspage;
2757    uint8_t a, b;
2758
2759    cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST);
2760    crosspage = mmu_lookup(cpu, addr, oi, ra, MMU_DATA_STORE, &l);
2761    if (likely(!crosspage)) {
2762        do_st_2(cpu, &l.page[0], val, l.mmu_idx, l.memop, ra);
2763        return;
2764    }
2765
2766    if ((l.memop & MO_BSWAP) == MO_LE) {
2767        a = val, b = val >> 8;
2768    } else {
2769        b = val, a = val >> 8;
2770    }
2771    do_st_1(cpu, &l.page[0], a, l.mmu_idx, ra);
2772    do_st_1(cpu, &l.page[1], b, l.mmu_idx, ra);
2773}
2774
2775static void do_st4_mmu(CPUState *cpu, vaddr addr, uint32_t val,
2776                       MemOpIdx oi, uintptr_t ra)
2777{
2778    MMULookupLocals l;
2779    bool crosspage;
2780
2781    cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST);
2782    crosspage = mmu_lookup(cpu, addr, oi, ra, MMU_DATA_STORE, &l);
2783    if (likely(!crosspage)) {
2784        do_st_4(cpu, &l.page[0], val, l.mmu_idx, l.memop, ra);
2785        return;
2786    }
2787
2788    /* Swap to little endian for simplicity, then store by bytes. */
2789    if ((l.memop & MO_BSWAP) != MO_LE) {
2790        val = bswap32(val);
2791    }
2792    val = do_st_leN(cpu, &l.page[0], val, l.mmu_idx, l.memop, ra);
2793    (void) do_st_leN(cpu, &l.page[1], val, l.mmu_idx, l.memop, ra);
2794}
2795
2796static void do_st8_mmu(CPUState *cpu, vaddr addr, uint64_t val,
2797                       MemOpIdx oi, uintptr_t ra)
2798{
2799    MMULookupLocals l;
2800    bool crosspage;
2801
2802    cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST);
2803    crosspage = mmu_lookup(cpu, addr, oi, ra, MMU_DATA_STORE, &l);
2804    if (likely(!crosspage)) {
2805        do_st_8(cpu, &l.page[0], val, l.mmu_idx, l.memop, ra);
2806        return;
2807    }
2808
2809    /* Swap to little endian for simplicity, then store by bytes. */
2810    if ((l.memop & MO_BSWAP) != MO_LE) {
2811        val = bswap64(val);
2812    }
2813    val = do_st_leN(cpu, &l.page[0], val, l.mmu_idx, l.memop, ra);
2814    (void) do_st_leN(cpu, &l.page[1], val, l.mmu_idx, l.memop, ra);
2815}
2816
2817static void do_st16_mmu(CPUState *cpu, vaddr addr, Int128 val,
2818                        MemOpIdx oi, uintptr_t ra)
2819{
2820    MMULookupLocals l;
2821    bool crosspage;
2822    uint64_t a, b;
2823    int first;
2824
2825    cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST);
2826    crosspage = mmu_lookup(cpu, addr, oi, ra, MMU_DATA_STORE, &l);
2827    if (likely(!crosspage)) {
2828        if (unlikely(l.page[0].flags & TLB_MMIO)) {
2829            if ((l.memop & MO_BSWAP) != MO_LE) {
2830                val = bswap128(val);
2831            }
2832            do_st16_mmio_leN(cpu, l.page[0].full, val, addr, 16, l.mmu_idx, ra);
2833        } else if (unlikely(l.page[0].flags & TLB_DISCARD_WRITE)) {
2834            /* nothing */
2835        } else {
2836            /* Swap to host endian if necessary, then store. */
2837            if (l.memop & MO_BSWAP) {
2838                val = bswap128(val);
2839            }
2840            store_atom_16(cpu, ra, l.page[0].haddr, l.memop, val);
2841        }
2842        return;
2843    }
2844
2845    first = l.page[0].size;
2846    if (first == 8) {
2847        MemOp mop8 = (l.memop & ~(MO_SIZE | MO_BSWAP)) | MO_64;
2848
2849        if (l.memop & MO_BSWAP) {
2850            val = bswap128(val);
2851        }
2852        if (HOST_BIG_ENDIAN) {
2853            b = int128_getlo(val), a = int128_gethi(val);
2854        } else {
2855            a = int128_getlo(val), b = int128_gethi(val);
2856        }
2857        do_st_8(cpu, &l.page[0], a, l.mmu_idx, mop8, ra);
2858        do_st_8(cpu, &l.page[1], b, l.mmu_idx, mop8, ra);
2859        return;
2860    }
2861
2862    if ((l.memop & MO_BSWAP) != MO_LE) {
2863        val = bswap128(val);
2864    }
2865    if (first < 8) {
2866        do_st_leN(cpu, &l.page[0], int128_getlo(val), l.mmu_idx, l.memop, ra);
2867        val = int128_urshift(val, first * 8);
2868        do_st16_leN(cpu, &l.page[1], val, l.mmu_idx, l.memop, ra);
2869    } else {
2870        b = do_st16_leN(cpu, &l.page[0], val, l.mmu_idx, l.memop, ra);
2871        do_st_leN(cpu, &l.page[1], b, l.mmu_idx, l.memop, ra);
2872    }
2873}
2874
2875#include "ldst_common.c.inc"
2876
2877/*
2878 * First set of functions passes in OI and RETADDR.
2879 * This makes them callable from other helpers.
2880 */
2881
2882#define ATOMIC_NAME(X) \
2883    glue(glue(glue(cpu_atomic_ ## X, SUFFIX), END), _mmu)
2884
2885#define ATOMIC_MMU_CLEANUP
2886
2887#include "atomic_common.c.inc"
2888
2889#define DATA_SIZE 1
2890#include "atomic_template.h"
2891
2892#define DATA_SIZE 2
2893#include "atomic_template.h"
2894
2895#define DATA_SIZE 4
2896#include "atomic_template.h"
2897
2898#ifdef CONFIG_ATOMIC64
2899#define DATA_SIZE 8
2900#include "atomic_template.h"
2901#endif
2902
2903#if defined(CONFIG_ATOMIC128) || HAVE_CMPXCHG128
2904#define DATA_SIZE 16
2905#include "atomic_template.h"
2906#endif
2907
2908/* Code access functions.  */
2909
2910uint8_t cpu_ldb_code_mmu(CPUArchState *env, vaddr addr,
2911                         MemOpIdx oi, uintptr_t retaddr)
2912{
2913    return do_ld1_mmu(env_cpu(env), addr, oi, retaddr, MMU_INST_FETCH);
2914}
2915
2916uint16_t cpu_ldw_code_mmu(CPUArchState *env, vaddr addr,
2917                          MemOpIdx oi, uintptr_t retaddr)
2918{
2919    return do_ld2_mmu(env_cpu(env), addr, oi, retaddr, MMU_INST_FETCH);
2920}
2921
2922uint32_t cpu_ldl_code_mmu(CPUArchState *env, vaddr addr,
2923                          MemOpIdx oi, uintptr_t retaddr)
2924{
2925    return do_ld4_mmu(env_cpu(env), addr, oi, retaddr, MMU_INST_FETCH);
2926}
2927
2928uint64_t cpu_ldq_code_mmu(CPUArchState *env, vaddr addr,
2929                          MemOpIdx oi, uintptr_t retaddr)
2930{
2931    return do_ld8_mmu(env_cpu(env), addr, oi, retaddr, MMU_INST_FETCH);
2932}
2933
2934/*
2935 * Common pointer_wrap implementations.
2936 */
2937
2938/*
2939 * To be used for strict alignment targets.
2940 * Because no accesses are unaligned, no accesses wrap either.
2941 */
2942vaddr cpu_pointer_wrap_notreached(CPUState *cs, int idx, vaddr res, vaddr base)
2943{
2944    g_assert_not_reached();
2945}
2946
2947/* To be used for strict 32-bit targets. */
2948vaddr cpu_pointer_wrap_uint32(CPUState *cs, int idx, vaddr res, vaddr base)
2949{
2950    return (uint32_t)res;
2951}
2952