qemu/accel/tcg/user-exec.c
<<
>>
Prefs
   1/*
   2 *  User emulator execution
   3 *
   4 *  Copyright (c) 2003-2005 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2.1 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19#include "qemu/osdep.h"
  20#include "accel/tcg/cpu-ops.h"
  21#include "disas/disas.h"
  22#include "exec/vaddr.h"
  23#include "exec/tlb-flags.h"
  24#include "tcg/tcg.h"
  25#include "qemu/bitops.h"
  26#include "qemu/rcu.h"
  27#include "accel/tcg/cpu-ldst-common.h"
  28#include "accel/tcg/helper-retaddr.h"
  29#include "accel/tcg/probe.h"
  30#include "user/cpu_loop.h"
  31#include "user/guest-host.h"
  32#include "qemu/main-loop.h"
  33#include "user/page-protection.h"
  34#include "exec/page-protection.h"
  35#include "exec/helper-proto-common.h"
  36#include "qemu/atomic128.h"
  37#include "qemu/bswap.h"
  38#include "qemu/int128.h"
  39#include "trace.h"
  40#include "tcg/tcg-ldst.h"
  41#include "backend-ldst.h"
  42#include "internal-common.h"
  43#include "tb-internal.h"
  44
  45__thread uintptr_t helper_retaddr;
  46
  47//#define DEBUG_SIGNAL
  48
  49void cpu_interrupt(CPUState *cpu, int mask)
  50{
  51    g_assert(bql_locked());
  52    cpu->interrupt_request |= mask;
  53    qatomic_set(&cpu->neg.icount_decr.u16.high, -1);
  54}
  55
  56/*
  57 * Adjust the pc to pass to cpu_restore_state; return the memop type.
  58 */
  59MMUAccessType adjust_signal_pc(uintptr_t *pc, bool is_write)
  60{
  61    switch (helper_retaddr) {
  62    default:
  63        /*
  64         * Fault during host memory operation within a helper function.
  65         * The helper's host return address, saved here, gives us a
  66         * pointer into the generated code that will unwind to the
  67         * correct guest pc.
  68         */
  69        *pc = helper_retaddr;
  70        break;
  71
  72    case 0:
  73        /*
  74         * Fault during host memory operation within generated code.
  75         * (Or, a unrelated bug within qemu, but we can't tell from here).
  76         *
  77         * We take the host pc from the signal frame.  However, we cannot
  78         * use that value directly.  Within cpu_restore_state_from_tb, we
  79         * assume PC comes from GETPC(), as used by the helper functions,
  80         * so we adjust the address by -GETPC_ADJ to form an address that
  81         * is within the call insn, so that the address does not accidentally
  82         * match the beginning of the next guest insn.  However, when the
  83         * pc comes from the signal frame it points to the actual faulting
  84         * host memory insn and not the return from a call insn.
  85         *
  86         * Therefore, adjust to compensate for what will be done later
  87         * by cpu_restore_state_from_tb.
  88         */
  89        *pc += GETPC_ADJ;
  90        break;
  91
  92    case 1:
  93        /*
  94         * Fault during host read for translation, or loosely, "execution".
  95         *
  96         * The guest pc is already pointing to the start of the TB for which
  97         * code is being generated.  If the guest translator manages the
  98         * page crossings correctly, this is exactly the correct address
  99         * (and if the translator doesn't handle page boundaries correctly
 100         * there's little we can do about that here).  Therefore, do not
 101         * trigger the unwinder.
 102         */
 103        *pc = 0;
 104        return MMU_INST_FETCH;
 105    }
 106
 107    return is_write ? MMU_DATA_STORE : MMU_DATA_LOAD;
 108}
 109
 110/**
 111 * handle_sigsegv_accerr_write:
 112 * @cpu: the cpu context
 113 * @old_set: the sigset_t from the signal ucontext_t
 114 * @host_pc: the host pc, adjusted for the signal
 115 * @guest_addr: the guest address of the fault
 116 *
 117 * Return true if the write fault has been handled, and should be re-tried.
 118 *
 119 * Note that it is important that we don't call page_unprotect() unless
 120 * this is really a "write to nonwritable page" fault, because
 121 * page_unprotect() assumes that if it is called for an access to
 122 * a page that's writable this means we had two threads racing and
 123 * another thread got there first and already made the page writable;
 124 * so we will retry the access. If we were to call page_unprotect()
 125 * for some other kind of fault that should really be passed to the
 126 * guest, we'd end up in an infinite loop of retrying the faulting access.
 127 */
 128bool handle_sigsegv_accerr_write(CPUState *cpu, sigset_t *old_set,
 129                                 uintptr_t host_pc, vaddr guest_addr)
 130{
 131    switch (page_unprotect(cpu, guest_addr, host_pc)) {
 132    case 0:
 133        /*
 134         * Fault not caused by a page marked unwritable to protect
 135         * cached translations, must be the guest binary's problem.
 136         */
 137        return false;
 138    case 1:
 139        /*
 140         * Fault caused by protection of cached translation; TBs
 141         * invalidated, so resume execution.
 142         */
 143        return true;
 144    case 2:
 145        /*
 146         * Fault caused by protection of cached translation, and the
 147         * currently executing TB was modified and must be exited immediately.
 148         */
 149        sigprocmask(SIG_SETMASK, old_set, NULL);
 150        cpu_loop_exit_noexc(cpu);
 151        /* NORETURN */
 152    default:
 153        g_assert_not_reached();
 154    }
 155}
 156
 157typedef struct PageFlagsNode {
 158    struct rcu_head rcu;
 159    IntervalTreeNode itree;
 160    int flags;
 161} PageFlagsNode;
 162
 163static IntervalTreeRoot pageflags_root;
 164
 165static PageFlagsNode *pageflags_find(vaddr start, vaddr last)
 166{
 167    IntervalTreeNode *n;
 168
 169    n = interval_tree_iter_first(&pageflags_root, start, last);
 170    return n ? container_of(n, PageFlagsNode, itree) : NULL;
 171}
 172
 173static PageFlagsNode *pageflags_next(PageFlagsNode *p, vaddr start, vaddr last)
 174{
 175    IntervalTreeNode *n;
 176
 177    n = interval_tree_iter_next(&p->itree, start, last);
 178    return n ? container_of(n, PageFlagsNode, itree) : NULL;
 179}
 180
 181int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
 182{
 183    IntervalTreeNode *n;
 184    int rc = 0;
 185
 186    mmap_lock();
 187    for (n = interval_tree_iter_first(&pageflags_root, 0, -1);
 188         n != NULL;
 189         n = interval_tree_iter_next(n, 0, -1)) {
 190        PageFlagsNode *p = container_of(n, PageFlagsNode, itree);
 191
 192        rc = fn(priv, n->start, n->last + 1, p->flags);
 193        if (rc != 0) {
 194            break;
 195        }
 196    }
 197    mmap_unlock();
 198
 199    return rc;
 200}
 201
 202static int dump_region(void *opaque, vaddr start, vaddr end, int prot)
 203{
 204    FILE *f = opaque;
 205    uint64_t mask;
 206    int width;
 207
 208    if (guest_addr_max <= UINT32_MAX) {
 209        mask = UINT32_MAX, width = 8;
 210    } else {
 211        mask = UINT64_MAX, width = 16;
 212    }
 213
 214    fprintf(f, "%0*" PRIx64 "-%0*" PRIx64 " %0*" PRIx64 " %c%c%c\n",
 215            width, start & mask,
 216            width, end & mask,
 217            width, (end - start) & mask,
 218            ((prot & PAGE_READ) ? 'r' : '-'),
 219            ((prot & PAGE_WRITE) ? 'w' : '-'),
 220            ((prot & PAGE_EXEC) ? 'x' : '-'));
 221    return 0;
 222}
 223
 224/* dump memory mappings */
 225void page_dump(FILE *f)
 226{
 227    int width = guest_addr_max <= UINT32_MAX ? 8 : 16;
 228
 229    fprintf(f, "%-*s %-*s %-*s %s\n",
 230            width, "start", width, "end", width, "size", "prot");
 231    walk_memory_regions(f, dump_region);
 232}
 233
 234int page_get_flags(vaddr address)
 235{
 236    PageFlagsNode *p = pageflags_find(address, address);
 237
 238    /*
 239     * See util/interval-tree.c re lockless lookups: no false positives but
 240     * there are false negatives.  If we find nothing, retry with the mmap
 241     * lock acquired.
 242     */
 243    if (p) {
 244        return p->flags;
 245    }
 246    if (have_mmap_lock()) {
 247        return 0;
 248    }
 249
 250    mmap_lock();
 251    p = pageflags_find(address, address);
 252    mmap_unlock();
 253    return p ? p->flags : 0;
 254}
 255
 256/* A subroutine of page_set_flags: insert a new node for [start,last]. */
 257static void pageflags_create(vaddr start, vaddr last, int flags)
 258{
 259    PageFlagsNode *p = g_new(PageFlagsNode, 1);
 260
 261    p->itree.start = start;
 262    p->itree.last = last;
 263    p->flags = flags;
 264    interval_tree_insert(&p->itree, &pageflags_root);
 265}
 266
 267/* A subroutine of page_set_flags: remove everything in [start,last]. */
 268static bool pageflags_unset(vaddr start, vaddr last)
 269{
 270    bool inval_tb = false;
 271
 272    while (true) {
 273        PageFlagsNode *p = pageflags_find(start, last);
 274        vaddr p_last;
 275
 276        if (!p) {
 277            break;
 278        }
 279
 280        if (p->flags & PAGE_EXEC) {
 281            inval_tb = true;
 282        }
 283
 284        interval_tree_remove(&p->itree, &pageflags_root);
 285        p_last = p->itree.last;
 286
 287        if (p->itree.start < start) {
 288            /* Truncate the node from the end, or split out the middle. */
 289            p->itree.last = start - 1;
 290            interval_tree_insert(&p->itree, &pageflags_root);
 291            if (last < p_last) {
 292                pageflags_create(last + 1, p_last, p->flags);
 293                break;
 294            }
 295        } else if (p_last <= last) {
 296            /* Range completely covers node -- remove it. */
 297            g_free_rcu(p, rcu);
 298        } else {
 299            /* Truncate the node from the start. */
 300            p->itree.start = last + 1;
 301            interval_tree_insert(&p->itree, &pageflags_root);
 302            break;
 303        }
 304    }
 305
 306    return inval_tb;
 307}
 308
 309/*
 310 * A subroutine of page_set_flags: nothing overlaps [start,last],
 311 * but check adjacent mappings and maybe merge into a single range.
 312 */
 313static void pageflags_create_merge(vaddr start, vaddr last, int flags)
 314{
 315    PageFlagsNode *next = NULL, *prev = NULL;
 316
 317    if (start > 0) {
 318        prev = pageflags_find(start - 1, start - 1);
 319        if (prev) {
 320            if (prev->flags == flags) {
 321                interval_tree_remove(&prev->itree, &pageflags_root);
 322            } else {
 323                prev = NULL;
 324            }
 325        }
 326    }
 327    if (last + 1 != 0) {
 328        next = pageflags_find(last + 1, last + 1);
 329        if (next) {
 330            if (next->flags == flags) {
 331                interval_tree_remove(&next->itree, &pageflags_root);
 332            } else {
 333                next = NULL;
 334            }
 335        }
 336    }
 337
 338    if (prev) {
 339        if (next) {
 340            prev->itree.last = next->itree.last;
 341            g_free_rcu(next, rcu);
 342        } else {
 343            prev->itree.last = last;
 344        }
 345        interval_tree_insert(&prev->itree, &pageflags_root);
 346    } else if (next) {
 347        next->itree.start = start;
 348        interval_tree_insert(&next->itree, &pageflags_root);
 349    } else {
 350        pageflags_create(start, last, flags);
 351    }
 352}
 353
 354/*
 355 * Allow the target to decide if PAGE_TARGET_[12] may be reset.
 356 * By default, they are not kept.
 357 */
 358#ifndef PAGE_TARGET_STICKY
 359#define PAGE_TARGET_STICKY  0
 360#endif
 361#define PAGE_STICKY  (PAGE_ANON | PAGE_PASSTHROUGH | PAGE_TARGET_STICKY)
 362
 363/* A subroutine of page_set_flags: add flags to [start,last]. */
 364static bool pageflags_set_clear(vaddr start, vaddr last,
 365                                int set_flags, int clear_flags)
 366{
 367    PageFlagsNode *p;
 368    vaddr p_start, p_last;
 369    int p_flags, merge_flags;
 370    bool inval_tb = false;
 371
 372 restart:
 373    p = pageflags_find(start, last);
 374    if (!p) {
 375        if (set_flags) {
 376            pageflags_create_merge(start, last, set_flags);
 377        }
 378        goto done;
 379    }
 380
 381    p_start = p->itree.start;
 382    p_last = p->itree.last;
 383    p_flags = p->flags;
 384    /* Using mprotect on a page does not change sticky bits. */
 385    merge_flags = (p_flags & ~clear_flags) | set_flags;
 386
 387    /*
 388     * Need to flush if an overlapping executable region
 389     * removes exec, or adds write.
 390     */
 391    if ((p_flags & PAGE_EXEC)
 392        && (!(merge_flags & PAGE_EXEC)
 393            || (merge_flags & ~p_flags & PAGE_WRITE))) {
 394        inval_tb = true;
 395    }
 396
 397    /*
 398     * If there is an exact range match, update and return without
 399     * attempting to merge with adjacent regions.
 400     */
 401    if (start == p_start && last == p_last) {
 402        if (merge_flags) {
 403            p->flags = merge_flags;
 404        } else {
 405            interval_tree_remove(&p->itree, &pageflags_root);
 406            g_free_rcu(p, rcu);
 407        }
 408        goto done;
 409    }
 410
 411    /*
 412     * If sticky bits affect the original mapping, then we must be more
 413     * careful about the existing intervals and the separate flags.
 414     */
 415    if (set_flags != merge_flags) {
 416        if (p_start < start) {
 417            interval_tree_remove(&p->itree, &pageflags_root);
 418            p->itree.last = start - 1;
 419            interval_tree_insert(&p->itree, &pageflags_root);
 420
 421            if (last < p_last) {
 422                if (merge_flags) {
 423                    pageflags_create(start, last, merge_flags);
 424                }
 425                pageflags_create(last + 1, p_last, p_flags);
 426            } else {
 427                if (merge_flags) {
 428                    pageflags_create(start, p_last, merge_flags);
 429                }
 430                if (p_last < last) {
 431                    start = p_last + 1;
 432                    goto restart;
 433                }
 434            }
 435        } else {
 436            if (start < p_start && set_flags) {
 437                pageflags_create(start, p_start - 1, set_flags);
 438            }
 439            if (last < p_last) {
 440                interval_tree_remove(&p->itree, &pageflags_root);
 441                p->itree.start = last + 1;
 442                interval_tree_insert(&p->itree, &pageflags_root);
 443                if (merge_flags) {
 444                    pageflags_create(start, last, merge_flags);
 445                }
 446            } else {
 447                if (merge_flags) {
 448                    p->flags = merge_flags;
 449                } else {
 450                    interval_tree_remove(&p->itree, &pageflags_root);
 451                    g_free_rcu(p, rcu);
 452                }
 453                if (p_last < last) {
 454                    start = p_last + 1;
 455                    goto restart;
 456                }
 457            }
 458        }
 459        goto done;
 460    }
 461
 462    /* If flags are not changing for this range, incorporate it. */
 463    if (set_flags == p_flags) {
 464        if (start < p_start) {
 465            interval_tree_remove(&p->itree, &pageflags_root);
 466            p->itree.start = start;
 467            interval_tree_insert(&p->itree, &pageflags_root);
 468        }
 469        if (p_last < last) {
 470            start = p_last + 1;
 471            goto restart;
 472        }
 473        goto done;
 474    }
 475
 476    /* Maybe split out head and/or tail ranges with the original flags. */
 477    interval_tree_remove(&p->itree, &pageflags_root);
 478    if (p_start < start) {
 479        p->itree.last = start - 1;
 480        interval_tree_insert(&p->itree, &pageflags_root);
 481
 482        if (p_last < last) {
 483            goto restart;
 484        }
 485        if (last < p_last) {
 486            pageflags_create(last + 1, p_last, p_flags);
 487        }
 488    } else if (last < p_last) {
 489        p->itree.start = last + 1;
 490        interval_tree_insert(&p->itree, &pageflags_root);
 491    } else {
 492        g_free_rcu(p, rcu);
 493        goto restart;
 494    }
 495    if (set_flags) {
 496        pageflags_create(start, last, set_flags);
 497    }
 498
 499 done:
 500    return inval_tb;
 501}
 502
 503void page_set_flags(vaddr start, vaddr last, int flags)
 504{
 505    bool reset = false;
 506    bool inval_tb = false;
 507
 508    /* This function should never be called with addresses outside the
 509       guest address space.  If this assert fires, it probably indicates
 510       a missing call to h2g_valid.  */
 511    assert(start <= last);
 512    assert(last <= guest_addr_max);
 513    /* Only set PAGE_ANON with new mappings. */
 514    assert(!(flags & PAGE_ANON) || (flags & PAGE_RESET));
 515    assert_memory_lock();
 516
 517    start &= TARGET_PAGE_MASK;
 518    last |= ~TARGET_PAGE_MASK;
 519
 520    if (!(flags & PAGE_VALID)) {
 521        flags = 0;
 522    } else {
 523        reset = flags & PAGE_RESET;
 524        flags &= ~PAGE_RESET;
 525        if (flags & PAGE_WRITE) {
 526            flags |= PAGE_WRITE_ORG;
 527        }
 528    }
 529
 530    if (!flags || reset) {
 531        page_reset_target_data(start, last);
 532        inval_tb |= pageflags_unset(start, last);
 533    }
 534    if (flags) {
 535        inval_tb |= pageflags_set_clear(start, last, flags,
 536                                        ~(reset ? 0 : PAGE_STICKY));
 537    }
 538    if (inval_tb) {
 539        tb_invalidate_phys_range(NULL, start, last);
 540    }
 541}
 542
 543bool page_check_range(vaddr start, vaddr len, int flags)
 544{
 545    vaddr last;
 546    int locked;  /* tri-state: =0: unlocked, +1: global, -1: local */
 547    bool ret;
 548
 549    if (len == 0) {
 550        return true;  /* trivial length */
 551    }
 552
 553    last = start + len - 1;
 554    if (last < start) {
 555        return false; /* wrap around */
 556    }
 557
 558    locked = have_mmap_lock();
 559    while (true) {
 560        PageFlagsNode *p = pageflags_find(start, last);
 561        int missing;
 562
 563        if (!p) {
 564            if (!locked) {
 565                /*
 566                 * Lockless lookups have false negatives.
 567                 * Retry with the lock held.
 568                 */
 569                mmap_lock();
 570                locked = -1;
 571                p = pageflags_find(start, last);
 572            }
 573            if (!p) {
 574                ret = false; /* entire region invalid */
 575                break;
 576            }
 577        }
 578        if (start < p->itree.start) {
 579            ret = false; /* initial bytes invalid */
 580            break;
 581        }
 582
 583        missing = flags & ~p->flags;
 584        if (missing & ~PAGE_WRITE) {
 585            ret = false; /* page doesn't match */
 586            break;
 587        }
 588        if (missing & PAGE_WRITE) {
 589            if (!(p->flags & PAGE_WRITE_ORG)) {
 590                ret = false; /* page not writable */
 591                break;
 592            }
 593            /* Asking about writable, but has been protected: undo. */
 594            if (!page_unprotect(NULL, start, 0)) {
 595                ret = false;
 596                break;
 597            }
 598            /* TODO: page_unprotect should take a range, not a single page. */
 599            if (last - start < TARGET_PAGE_SIZE) {
 600                ret = true; /* ok */
 601                break;
 602            }
 603            start += TARGET_PAGE_SIZE;
 604            continue;
 605        }
 606
 607        if (last <= p->itree.last) {
 608            ret = true; /* ok */
 609            break;
 610        }
 611        start = p->itree.last + 1;
 612    }
 613
 614    /* Release the lock if acquired locally. */
 615    if (locked < 0) {
 616        mmap_unlock();
 617    }
 618    return ret;
 619}
 620
 621bool page_check_range_empty(vaddr start, vaddr last)
 622{
 623    assert(last >= start);
 624    assert_memory_lock();
 625    return pageflags_find(start, last) == NULL;
 626}
 627
 628vaddr page_find_range_empty(vaddr min, vaddr max, vaddr len, vaddr align)
 629{
 630    vaddr len_m1, align_m1;
 631
 632    assert(min <= max);
 633    assert(max <= guest_addr_max);
 634    assert(len != 0);
 635    assert(is_power_of_2(align));
 636    assert_memory_lock();
 637
 638    len_m1 = len - 1;
 639    align_m1 = align - 1;
 640
 641    /* Iteratively narrow the search region. */
 642    while (1) {
 643        PageFlagsNode *p;
 644
 645        /* Align min and double-check there's enough space remaining. */
 646        min = (min + align_m1) & ~align_m1;
 647        if (min > max) {
 648            return -1;
 649        }
 650        if (len_m1 > max - min) {
 651            return -1;
 652        }
 653
 654        p = pageflags_find(min, min + len_m1);
 655        if (p == NULL) {
 656            /* Found! */
 657            return min;
 658        }
 659        if (max <= p->itree.last) {
 660            /* Existing allocation fills the remainder of the search region. */
 661            return -1;
 662        }
 663        /* Skip across existing allocation. */
 664        min = p->itree.last + 1;
 665    }
 666}
 667
 668void tb_lock_page0(tb_page_addr_t address)
 669{
 670    PageFlagsNode *p;
 671    vaddr start, last;
 672    int host_page_size = qemu_real_host_page_size();
 673    int prot;
 674
 675    assert_memory_lock();
 676
 677    if (host_page_size <= TARGET_PAGE_SIZE) {
 678        start = address & TARGET_PAGE_MASK;
 679        last = start + TARGET_PAGE_SIZE - 1;
 680    } else {
 681        start = address & -host_page_size;
 682        last = start + host_page_size - 1;
 683    }
 684
 685    p = pageflags_find(start, last);
 686    if (!p) {
 687        return;
 688    }
 689    prot = p->flags;
 690
 691    if (unlikely(p->itree.last < last)) {
 692        /* More than one protection region covers the one host page. */
 693        assert(TARGET_PAGE_SIZE < host_page_size);
 694        while ((p = pageflags_next(p, start, last)) != NULL) {
 695            prot |= p->flags;
 696        }
 697    }
 698
 699    if (prot & PAGE_WRITE) {
 700        pageflags_set_clear(start, last, 0, PAGE_WRITE);
 701        mprotect(g2h_untagged(start), last - start + 1,
 702                 prot & (PAGE_READ | PAGE_EXEC) ? PROT_READ : PROT_NONE);
 703    }
 704}
 705
 706/*
 707 * Called from signal handler: invalidate the code and unprotect the
 708 * page. Return 0 if the fault was not handled, 1 if it was handled,
 709 * and 2 if it was handled but the caller must cause the TB to be
 710 * immediately exited. (We can only return 2 if the 'pc' argument is
 711 * non-zero.)
 712 */
 713int page_unprotect(CPUState *cpu, tb_page_addr_t address, uintptr_t pc)
 714{
 715    PageFlagsNode *p;
 716    bool current_tb_invalidated;
 717
 718    assert((cpu == NULL) == (pc == 0));
 719
 720    /*
 721     * Technically this isn't safe inside a signal handler.  However we
 722     * know this only ever happens in a synchronous SEGV handler, so in
 723     * practice it seems to be ok.
 724     */
 725    mmap_lock();
 726
 727    p = pageflags_find(address, address);
 728
 729    /* If this address was not really writable, nothing to do. */
 730    if (!p || !(p->flags & PAGE_WRITE_ORG)) {
 731        mmap_unlock();
 732        return 0;
 733    }
 734
 735    current_tb_invalidated = false;
 736    if (p->flags & PAGE_WRITE) {
 737        /*
 738         * If the page is actually marked WRITE then assume this is because
 739         * this thread raced with another one which got here first and
 740         * set the page to PAGE_WRITE and did the TB invalidate for us.
 741         */
 742        if (pc && cpu->cc->tcg_ops->precise_smc) {
 743            TranslationBlock *current_tb = tcg_tb_lookup(pc);
 744            if (current_tb) {
 745                current_tb_invalidated = tb_cflags(current_tb) & CF_INVALID;
 746            }
 747        }
 748    } else {
 749        int host_page_size = qemu_real_host_page_size();
 750        vaddr start, len, i;
 751        int prot;
 752
 753        if (host_page_size <= TARGET_PAGE_SIZE) {
 754            start = address & TARGET_PAGE_MASK;
 755            len = TARGET_PAGE_SIZE;
 756            prot = p->flags | PAGE_WRITE;
 757            pageflags_set_clear(start, start + len - 1, PAGE_WRITE, 0);
 758            current_tb_invalidated =
 759                tb_invalidate_phys_page_unwind(cpu, start, pc);
 760        } else {
 761            start = address & -host_page_size;
 762            len = host_page_size;
 763            prot = 0;
 764
 765            for (i = 0; i < len; i += TARGET_PAGE_SIZE) {
 766                vaddr addr = start + i;
 767
 768                p = pageflags_find(addr, addr);
 769                if (p) {
 770                    prot |= p->flags;
 771                    if (p->flags & PAGE_WRITE_ORG) {
 772                        prot |= PAGE_WRITE;
 773                        pageflags_set_clear(addr, addr + TARGET_PAGE_SIZE - 1,
 774                                            PAGE_WRITE, 0);
 775                    }
 776                }
 777                /*
 778                 * Since the content will be modified, we must invalidate
 779                 * the corresponding translated code.
 780                 */
 781                current_tb_invalidated |=
 782                    tb_invalidate_phys_page_unwind(cpu, addr, pc);
 783            }
 784        }
 785        if (prot & PAGE_EXEC) {
 786            prot = (prot & ~PAGE_EXEC) | PAGE_READ;
 787        }
 788        mprotect((void *)g2h_untagged(start), len, prot & PAGE_RWX);
 789    }
 790    mmap_unlock();
 791
 792    /* If current TB was invalidated return to main loop */
 793    return current_tb_invalidated ? 2 : 1;
 794}
 795
 796static int probe_access_internal(CPUArchState *env, vaddr addr,
 797                                 int fault_size, MMUAccessType access_type,
 798                                 bool nonfault, uintptr_t ra)
 799{
 800    int acc_flag;
 801    bool maperr;
 802
 803    switch (access_type) {
 804    case MMU_DATA_STORE:
 805        acc_flag = PAGE_WRITE_ORG;
 806        break;
 807    case MMU_DATA_LOAD:
 808        acc_flag = PAGE_READ;
 809        break;
 810    case MMU_INST_FETCH:
 811        acc_flag = PAGE_EXEC;
 812        break;
 813    default:
 814        g_assert_not_reached();
 815    }
 816
 817    if (guest_addr_valid_untagged(addr)) {
 818        int page_flags = page_get_flags(addr);
 819        if (page_flags & acc_flag) {
 820            if (access_type != MMU_INST_FETCH
 821                && cpu_plugin_mem_cbs_enabled(env_cpu(env))) {
 822                return TLB_MMIO;
 823            }
 824            return 0; /* success */
 825        }
 826        maperr = !(page_flags & PAGE_VALID);
 827    } else {
 828        maperr = true;
 829    }
 830
 831    if (nonfault) {
 832        return TLB_INVALID_MASK;
 833    }
 834
 835    cpu_loop_exit_sigsegv(env_cpu(env), addr, access_type, maperr, ra);
 836}
 837
 838int probe_access_flags(CPUArchState *env, vaddr addr, int size,
 839                       MMUAccessType access_type, int mmu_idx,
 840                       bool nonfault, void **phost, uintptr_t ra)
 841{
 842    int flags;
 843
 844    g_assert(-(addr | TARGET_PAGE_MASK) >= size);
 845    flags = probe_access_internal(env, addr, size, access_type, nonfault, ra);
 846    *phost = (flags & TLB_INVALID_MASK) ? NULL : g2h(env_cpu(env), addr);
 847    return flags;
 848}
 849
 850void *probe_access(CPUArchState *env, vaddr addr, int size,
 851                   MMUAccessType access_type, int mmu_idx, uintptr_t ra)
 852{
 853    int flags;
 854
 855    g_assert(-(addr | TARGET_PAGE_MASK) >= size);
 856    flags = probe_access_internal(env, addr, size, access_type, false, ra);
 857    g_assert((flags & ~TLB_MMIO) == 0);
 858
 859    return size ? g2h(env_cpu(env), addr) : NULL;
 860}
 861
 862void *tlb_vaddr_to_host(CPUArchState *env, vaddr addr,
 863                        MMUAccessType access_type, int mmu_idx)
 864{
 865    return g2h(env_cpu(env), addr);
 866}
 867
 868tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, vaddr addr,
 869                                        void **hostp)
 870{
 871    int flags;
 872
 873    flags = probe_access_internal(env, addr, 1, MMU_INST_FETCH, false, 0);
 874    g_assert(flags == 0);
 875
 876    if (hostp) {
 877        *hostp = g2h_untagged(addr);
 878    }
 879    return addr;
 880}
 881
 882/*
 883 * Allocate chunks of target data together.  For the only current user,
 884 * if we allocate one hunk per page, we have overhead of 40/128 or 40%.
 885 * Therefore, allocate memory for 64 pages at a time for overhead < 1%.
 886 */
 887#define TPD_PAGES  64
 888#define TBD_MASK   (TARGET_PAGE_MASK * TPD_PAGES)
 889
 890typedef struct TargetPageDataNode {
 891    struct rcu_head rcu;
 892    IntervalTreeNode itree;
 893    char data[] __attribute__((aligned));
 894} TargetPageDataNode;
 895
 896static IntervalTreeRoot targetdata_root;
 897static size_t target_page_data_size;
 898
 899void page_reset_target_data(vaddr start, vaddr last)
 900{
 901    IntervalTreeNode *n, *next;
 902    size_t size = target_page_data_size;
 903
 904    if (likely(size == 0)) {
 905        return;
 906    }
 907
 908    assert_memory_lock();
 909
 910    start &= TARGET_PAGE_MASK;
 911    last |= ~TARGET_PAGE_MASK;
 912
 913    for (n = interval_tree_iter_first(&targetdata_root, start, last),
 914         next = n ? interval_tree_iter_next(n, start, last) : NULL;
 915         n != NULL;
 916         n = next,
 917         next = next ? interval_tree_iter_next(n, start, last) : NULL) {
 918        vaddr n_start, n_last, p_ofs, p_len;
 919        TargetPageDataNode *t = container_of(n, TargetPageDataNode, itree);
 920
 921        if (n->start >= start && n->last <= last) {
 922            interval_tree_remove(n, &targetdata_root);
 923            g_free_rcu(t, rcu);
 924            continue;
 925        }
 926
 927        if (n->start < start) {
 928            n_start = start;
 929            p_ofs = (start - n->start) >> TARGET_PAGE_BITS;
 930        } else {
 931            n_start = n->start;
 932            p_ofs = 0;
 933        }
 934        n_last = MIN(last, n->last);
 935        p_len = (n_last + 1 - n_start) >> TARGET_PAGE_BITS;
 936
 937        memset(t->data + p_ofs * size, 0, p_len * size);
 938    }
 939}
 940
 941void *page_get_target_data(vaddr address, size_t size)
 942{
 943    IntervalTreeNode *n;
 944    TargetPageDataNode *t;
 945    vaddr page, region, p_ofs;
 946
 947    /* Remember the size from the first call, and it should be constant. */
 948    if (unlikely(target_page_data_size != size)) {
 949        assert(target_page_data_size == 0);
 950        target_page_data_size = size;
 951    }
 952
 953    page = address & TARGET_PAGE_MASK;
 954    region = address & TBD_MASK;
 955
 956    n = interval_tree_iter_first(&targetdata_root, page, page);
 957    if (!n) {
 958        /*
 959         * See util/interval-tree.c re lockless lookups: no false positives
 960         * but there are false negatives.  If we find nothing, retry with
 961         * the mmap lock acquired.  We also need the lock for the
 962         * allocation + insert.
 963         */
 964        mmap_lock();
 965        n = interval_tree_iter_first(&targetdata_root, page, page);
 966        if (!n) {
 967            t = g_malloc0(sizeof(TargetPageDataNode) + TPD_PAGES * size);
 968            n = &t->itree;
 969            n->start = region;
 970            n->last = region | ~TBD_MASK;
 971            interval_tree_insert(n, &targetdata_root);
 972        }
 973        mmap_unlock();
 974    }
 975
 976    t = container_of(n, TargetPageDataNode, itree);
 977    p_ofs = (page - region) >> TARGET_PAGE_BITS;
 978    return t->data + p_ofs * size;
 979}
 980
 981/* The system-mode versions of these helpers are in cputlb.c.  */
 982
 983static void *cpu_mmu_lookup(CPUState *cpu, vaddr addr,
 984                            MemOp mop, uintptr_t ra, MMUAccessType type)
 985{
 986    int a_bits = memop_alignment_bits(mop);
 987    void *ret;
 988
 989    /* Enforce guest required alignment.  */
 990    if (unlikely(addr & ((1 << a_bits) - 1))) {
 991        cpu_loop_exit_sigbus(cpu, addr, type, ra);
 992    }
 993
 994    ret = g2h(cpu, addr);
 995    set_helper_retaddr(ra);
 996    return ret;
 997}
 998
 999/* physical memory access (slow version, mainly for debug) */
1000int cpu_memory_rw_debug(CPUState *cpu, vaddr addr,
1001                        void *ptr, size_t len, bool is_write)
1002{
1003    int flags;
1004    vaddr l, page;
1005    uint8_t *buf = ptr;
1006    ssize_t written;
1007    int ret = -1;
1008    int fd = -1;
1009
1010    mmap_lock();
1011
1012    while (len > 0) {
1013        page = addr & TARGET_PAGE_MASK;
1014        l = (page + TARGET_PAGE_SIZE) - addr;
1015        if (l > len) {
1016            l = len;
1017        }
1018        flags = page_get_flags(page);
1019        if (!(flags & PAGE_VALID)) {
1020            goto out_close;
1021        }
1022        if (is_write) {
1023            if (flags & PAGE_WRITE) {
1024                memcpy(g2h(cpu, addr), buf, l);
1025            } else {
1026                /* Bypass the host page protection using ptrace. */
1027                if (fd == -1) {
1028                    fd = open("/proc/self/mem", O_WRONLY);
1029                    if (fd == -1) {
1030                        goto out;
1031                    }
1032                }
1033                /*
1034                 * If there is a TranslationBlock and we weren't bypassing the
1035                 * host page protection, the memcpy() above would SEGV,
1036                 * ultimately leading to page_unprotect(). So invalidate the
1037                 * translations manually. Both invalidation and pwrite() must
1038                 * be under mmap_lock() in order to prevent the creation of
1039                 * another TranslationBlock in between.
1040                 */
1041                tb_invalidate_phys_range(NULL, addr, addr + l - 1);
1042                written = pwrite(fd, buf, l,
1043                                 (off_t)(uintptr_t)g2h_untagged(addr));
1044                if (written != l) {
1045                    goto out_close;
1046                }
1047            }
1048        } else if (flags & PAGE_READ) {
1049            memcpy(buf, g2h(cpu, addr), l);
1050        } else {
1051            /* Bypass the host page protection using ptrace. */
1052            if (fd == -1) {
1053                fd = open("/proc/self/mem", O_RDONLY);
1054                if (fd == -1) {
1055                    goto out;
1056                }
1057            }
1058            if (pread(fd, buf, l,
1059                      (off_t)(uintptr_t)g2h_untagged(addr)) != l) {
1060                goto out_close;
1061            }
1062        }
1063        len -= l;
1064        buf += l;
1065        addr += l;
1066    }
1067    ret = 0;
1068out_close:
1069    if (fd != -1) {
1070        close(fd);
1071    }
1072out:
1073    mmap_unlock();
1074
1075    return ret;
1076}
1077
1078#include "ldst_atomicity.c.inc"
1079
1080static uint8_t do_ld1_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
1081                          uintptr_t ra, MMUAccessType access_type)
1082{
1083    void *haddr;
1084    uint8_t ret;
1085
1086    cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD);
1087    haddr = cpu_mmu_lookup(cpu, addr, get_memop(oi), ra, access_type);
1088    ret = ldub_p(haddr);
1089    clear_helper_retaddr();
1090    return ret;
1091}
1092
1093static uint16_t do_ld2_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
1094                           uintptr_t ra, MMUAccessType access_type)
1095{
1096    void *haddr;
1097    uint16_t ret;
1098    MemOp mop = get_memop(oi);
1099
1100    cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD);
1101    haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type);
1102    ret = load_atom_2(cpu, ra, haddr, mop);
1103    clear_helper_retaddr();
1104
1105    if (mop & MO_BSWAP) {
1106        ret = bswap16(ret);
1107    }
1108    return ret;
1109}
1110
1111static uint32_t do_ld4_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
1112                           uintptr_t ra, MMUAccessType access_type)
1113{
1114    void *haddr;
1115    uint32_t ret;
1116    MemOp mop = get_memop(oi);
1117
1118    cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD);
1119    haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type);
1120    ret = load_atom_4(cpu, ra, haddr, mop);
1121    clear_helper_retaddr();
1122
1123    if (mop & MO_BSWAP) {
1124        ret = bswap32(ret);
1125    }
1126    return ret;
1127}
1128
1129static uint64_t do_ld8_mmu(CPUState *cpu, vaddr addr, MemOpIdx oi,
1130                           uintptr_t ra, MMUAccessType access_type)
1131{
1132    void *haddr;
1133    uint64_t ret;
1134    MemOp mop = get_memop(oi);
1135
1136    cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD);
1137    haddr = cpu_mmu_lookup(cpu, addr, mop, ra, access_type);
1138    ret = load_atom_8(cpu, ra, haddr, mop);
1139    clear_helper_retaddr();
1140
1141    if (mop & MO_BSWAP) {
1142        ret = bswap64(ret);
1143    }
1144    return ret;
1145}
1146
1147static Int128 do_ld16_mmu(CPUState *cpu, vaddr addr,
1148                          MemOpIdx oi, uintptr_t ra)
1149{
1150    void *haddr;
1151    Int128 ret;
1152    MemOp mop = get_memop(oi);
1153
1154    tcg_debug_assert((mop & MO_SIZE) == MO_128);
1155    cpu_req_mo(cpu, TCG_MO_LD_LD | TCG_MO_ST_LD);
1156    haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_LOAD);
1157    ret = load_atom_16(cpu, ra, haddr, mop);
1158    clear_helper_retaddr();
1159
1160    if (mop & MO_BSWAP) {
1161        ret = bswap128(ret);
1162    }
1163    return ret;
1164}
1165
1166static void do_st1_mmu(CPUState *cpu, vaddr addr, uint8_t val,
1167                       MemOpIdx oi, uintptr_t ra)
1168{
1169    void *haddr;
1170
1171    cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST);
1172    haddr = cpu_mmu_lookup(cpu, addr, get_memop(oi), ra, MMU_DATA_STORE);
1173    stb_p(haddr, val);
1174    clear_helper_retaddr();
1175}
1176
1177static void do_st2_mmu(CPUState *cpu, vaddr addr, uint16_t val,
1178                       MemOpIdx oi, uintptr_t ra)
1179{
1180    void *haddr;
1181    MemOp mop = get_memop(oi);
1182
1183    cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST);
1184    haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1185
1186    if (mop & MO_BSWAP) {
1187        val = bswap16(val);
1188    }
1189    store_atom_2(cpu, ra, haddr, mop, val);
1190    clear_helper_retaddr();
1191}
1192
1193static void do_st4_mmu(CPUState *cpu, vaddr addr, uint32_t val,
1194                       MemOpIdx oi, uintptr_t ra)
1195{
1196    void *haddr;
1197    MemOp mop = get_memop(oi);
1198
1199    cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST);
1200    haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1201
1202    if (mop & MO_BSWAP) {
1203        val = bswap32(val);
1204    }
1205    store_atom_4(cpu, ra, haddr, mop, val);
1206    clear_helper_retaddr();
1207}
1208
1209static void do_st8_mmu(CPUState *cpu, vaddr addr, uint64_t val,
1210                       MemOpIdx oi, uintptr_t ra)
1211{
1212    void *haddr;
1213    MemOp mop = get_memop(oi);
1214
1215    cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST);
1216    haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1217
1218    if (mop & MO_BSWAP) {
1219        val = bswap64(val);
1220    }
1221    store_atom_8(cpu, ra, haddr, mop, val);
1222    clear_helper_retaddr();
1223}
1224
1225static void do_st16_mmu(CPUState *cpu, vaddr addr, Int128 val,
1226                        MemOpIdx oi, uintptr_t ra)
1227{
1228    void *haddr;
1229    MemOpIdx mop = get_memop(oi);
1230
1231    cpu_req_mo(cpu, TCG_MO_LD_ST | TCG_MO_ST_ST);
1232    haddr = cpu_mmu_lookup(cpu, addr, mop, ra, MMU_DATA_STORE);
1233
1234    if (mop & MO_BSWAP) {
1235        val = bswap128(val);
1236    }
1237    store_atom_16(cpu, ra, haddr, mop, val);
1238    clear_helper_retaddr();
1239}
1240
1241uint8_t cpu_ldb_code_mmu(CPUArchState *env, vaddr addr,
1242                         MemOpIdx oi, uintptr_t ra)
1243{
1244    return do_ld1_mmu(env_cpu(env), addr, oi, ra ? ra : 1, MMU_INST_FETCH);
1245}
1246
1247uint16_t cpu_ldw_code_mmu(CPUArchState *env, vaddr addr,
1248                          MemOpIdx oi, uintptr_t ra)
1249{
1250    return do_ld2_mmu(env_cpu(env), addr, oi, ra ? ra : 1, MMU_INST_FETCH);
1251}
1252
1253uint32_t cpu_ldl_code_mmu(CPUArchState *env, vaddr addr,
1254                          MemOpIdx oi, uintptr_t ra)
1255{
1256    return do_ld4_mmu(env_cpu(env), addr, oi, ra ? ra : 1, MMU_INST_FETCH);
1257}
1258
1259uint64_t cpu_ldq_code_mmu(CPUArchState *env, vaddr addr,
1260                          MemOpIdx oi, uintptr_t ra)
1261{
1262    return do_ld8_mmu(env_cpu(env), addr, oi, ra ? ra : 1, MMU_INST_FETCH);
1263}
1264
1265#include "ldst_common.c.inc"
1266
1267/*
1268 * Do not allow unaligned operations to proceed.  Return the host address.
1269 */
1270static void *atomic_mmu_lookup(CPUState *cpu, vaddr addr, MemOpIdx oi,
1271                               int size, uintptr_t retaddr)
1272{
1273    MemOp mop = get_memop(oi);
1274    int a_bits = memop_alignment_bits(mop);
1275    void *ret;
1276
1277    /* Enforce guest required alignment.  */
1278    if (unlikely(addr & ((1 << a_bits) - 1))) {
1279        cpu_loop_exit_sigbus(cpu, addr, MMU_DATA_STORE, retaddr);
1280    }
1281
1282    /* Enforce qemu required alignment.  */
1283    if (unlikely(addr & (size - 1))) {
1284        cpu_loop_exit_atomic(cpu, retaddr);
1285    }
1286
1287    ret = g2h(cpu, addr);
1288    set_helper_retaddr(retaddr);
1289    return ret;
1290}
1291
1292#include "atomic_common.c.inc"
1293
1294/*
1295 * First set of functions passes in OI and RETADDR.
1296 * This makes them callable from other helpers.
1297 */
1298
1299#define ATOMIC_NAME(X) \
1300    glue(glue(glue(cpu_atomic_ ## X, SUFFIX), END), _mmu)
1301#define ATOMIC_MMU_CLEANUP do { clear_helper_retaddr(); } while (0)
1302
1303#define DATA_SIZE 1
1304#include "atomic_template.h"
1305
1306#define DATA_SIZE 2
1307#include "atomic_template.h"
1308
1309#define DATA_SIZE 4
1310#include "atomic_template.h"
1311
1312#ifdef CONFIG_ATOMIC64
1313#define DATA_SIZE 8
1314#include "atomic_template.h"
1315#endif
1316
1317#if defined(CONFIG_ATOMIC128) || HAVE_CMPXCHG128
1318#define DATA_SIZE 16
1319#include "atomic_template.h"
1320#endif
1321