qemu/accel/tcg/tb-maint.c
<<
>>
Prefs
   1/*
   2 * Translation Block Maintaince
   3 *
   4 *  Copyright (c) 2003 Fabrice Bellard
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2.1 of the License, or (at your option) any later version.
  10 *
  11 * This library is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 * Lesser General Public License for more details.
  15 *
  16 * You should have received a copy of the GNU Lesser General Public
  17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18 */
  19
  20#include "qemu/osdep.h"
  21#include "qemu/interval-tree.h"
  22#include "qemu/qtree.h"
  23#include "exec/cputlb.h"
  24#include "exec/log.h"
  25#include "exec/exec-all.h"
  26#include "exec/tb-flush.h"
  27#include "exec/translate-all.h"
  28#include "sysemu/tcg.h"
  29#include "tcg/tcg.h"
  30#include "tb-hash.h"
  31#include "tb-context.h"
  32#include "internal.h"
  33
  34
  35/* List iterators for lists of tagged pointers in TranslationBlock. */
  36#define TB_FOR_EACH_TAGGED(head, tb, n, field)                          \
  37    for (n = (head) & 1, tb = (TranslationBlock *)((head) & ~1);        \
  38         tb; tb = (TranslationBlock *)tb->field[n], n = (uintptr_t)tb & 1, \
  39             tb = (TranslationBlock *)((uintptr_t)tb & ~1))
  40
  41#define TB_FOR_EACH_JMP(head_tb, tb, n)                                 \
  42    TB_FOR_EACH_TAGGED((head_tb)->jmp_list_head, tb, n, jmp_list_next)
  43
  44static bool tb_cmp(const void *ap, const void *bp)
  45{
  46    const TranslationBlock *a = ap;
  47    const TranslationBlock *b = bp;
  48
  49    return ((tb_cflags(a) & CF_PCREL || a->pc == b->pc) &&
  50            a->cs_base == b->cs_base &&
  51            a->flags == b->flags &&
  52            (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
  53            tb_page_addr0(a) == tb_page_addr0(b) &&
  54            tb_page_addr1(a) == tb_page_addr1(b));
  55}
  56
  57void tb_htable_init(void)
  58{
  59    unsigned int mode = QHT_MODE_AUTO_RESIZE;
  60
  61    qht_init(&tb_ctx.htable, tb_cmp, CODE_GEN_HTABLE_SIZE, mode);
  62}
  63
  64typedef struct PageDesc PageDesc;
  65
  66#ifdef CONFIG_USER_ONLY
  67
  68/*
  69 * In user-mode page locks aren't used; mmap_lock is enough.
  70 */
  71#define assert_page_locked(pd) tcg_debug_assert(have_mmap_lock())
  72
  73static inline void tb_lock_pages(const TranslationBlock *tb) { }
  74
  75/*
  76 * For user-only, since we are protecting all of memory with a single lock,
  77 * and because the two pages of a TranslationBlock are always contiguous,
  78 * use a single data structure to record all TranslationBlocks.
  79 */
  80static IntervalTreeRoot tb_root;
  81
  82static void tb_remove_all(void)
  83{
  84    assert_memory_lock();
  85    memset(&tb_root, 0, sizeof(tb_root));
  86}
  87
  88/* Call with mmap_lock held. */
  89static void tb_record(TranslationBlock *tb)
  90{
  91    vaddr addr;
  92    int flags;
  93
  94    assert_memory_lock();
  95    tb->itree.last = tb->itree.start + tb->size - 1;
  96
  97    /* translator_loop() must have made all TB pages non-writable */
  98    addr = tb_page_addr0(tb);
  99    flags = page_get_flags(addr);
 100    assert(!(flags & PAGE_WRITE));
 101
 102    addr = tb_page_addr1(tb);
 103    if (addr != -1) {
 104        flags = page_get_flags(addr);
 105        assert(!(flags & PAGE_WRITE));
 106    }
 107
 108    interval_tree_insert(&tb->itree, &tb_root);
 109}
 110
 111/* Call with mmap_lock held. */
 112static void tb_remove(TranslationBlock *tb)
 113{
 114    assert_memory_lock();
 115    interval_tree_remove(&tb->itree, &tb_root);
 116}
 117
 118/* TODO: For now, still shared with translate-all.c for system mode. */
 119#define PAGE_FOR_EACH_TB(start, last, pagedesc, T, N)   \
 120    for (T = foreach_tb_first(start, last),             \
 121         N = foreach_tb_next(T, start, last);           \
 122         T != NULL;                                     \
 123         T = N, N = foreach_tb_next(N, start, last))
 124
 125typedef TranslationBlock *PageForEachNext;
 126
 127static PageForEachNext foreach_tb_first(tb_page_addr_t start,
 128                                        tb_page_addr_t last)
 129{
 130    IntervalTreeNode *n = interval_tree_iter_first(&tb_root, start, last);
 131    return n ? container_of(n, TranslationBlock, itree) : NULL;
 132}
 133
 134static PageForEachNext foreach_tb_next(PageForEachNext tb,
 135                                       tb_page_addr_t start,
 136                                       tb_page_addr_t last)
 137{
 138    IntervalTreeNode *n;
 139
 140    if (tb) {
 141        n = interval_tree_iter_next(&tb->itree, start, last);
 142        if (n) {
 143            return container_of(n, TranslationBlock, itree);
 144        }
 145    }
 146    return NULL;
 147}
 148
 149#else
 150/*
 151 * In system mode we want L1_MAP to be based on ram offsets.
 152 */
 153#if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
 154# define L1_MAP_ADDR_SPACE_BITS  HOST_LONG_BITS
 155#else
 156# define L1_MAP_ADDR_SPACE_BITS  TARGET_PHYS_ADDR_SPACE_BITS
 157#endif
 158
 159/* Size of the L2 (and L3, etc) page tables.  */
 160#define V_L2_BITS 10
 161#define V_L2_SIZE (1 << V_L2_BITS)
 162
 163/*
 164 * L1 Mapping properties
 165 */
 166static int v_l1_size;
 167static int v_l1_shift;
 168static int v_l2_levels;
 169
 170/*
 171 * The bottom level has pointers to PageDesc, and is indexed by
 172 * anything from 4 to (V_L2_BITS + 3) bits, depending on target page size.
 173 */
 174#define V_L1_MIN_BITS 4
 175#define V_L1_MAX_BITS (V_L2_BITS + 3)
 176#define V_L1_MAX_SIZE (1 << V_L1_MAX_BITS)
 177
 178static void *l1_map[V_L1_MAX_SIZE];
 179
 180struct PageDesc {
 181    QemuSpin lock;
 182    /* list of TBs intersecting this ram page */
 183    uintptr_t first_tb;
 184};
 185
 186void page_table_config_init(void)
 187{
 188    uint32_t v_l1_bits;
 189
 190    assert(TARGET_PAGE_BITS);
 191    /* The bits remaining after N lower levels of page tables.  */
 192    v_l1_bits = (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % V_L2_BITS;
 193    if (v_l1_bits < V_L1_MIN_BITS) {
 194        v_l1_bits += V_L2_BITS;
 195    }
 196
 197    v_l1_size = 1 << v_l1_bits;
 198    v_l1_shift = L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - v_l1_bits;
 199    v_l2_levels = v_l1_shift / V_L2_BITS - 1;
 200
 201    assert(v_l1_bits <= V_L1_MAX_BITS);
 202    assert(v_l1_shift % V_L2_BITS == 0);
 203    assert(v_l2_levels >= 0);
 204}
 205
 206static PageDesc *page_find_alloc(tb_page_addr_t index, bool alloc)
 207{
 208    PageDesc *pd;
 209    void **lp;
 210    int i;
 211
 212    /* Level 1.  Always allocated.  */
 213    lp = l1_map + ((index >> v_l1_shift) & (v_l1_size - 1));
 214
 215    /* Level 2..N-1.  */
 216    for (i = v_l2_levels; i > 0; i--) {
 217        void **p = qatomic_rcu_read(lp);
 218
 219        if (p == NULL) {
 220            void *existing;
 221
 222            if (!alloc) {
 223                return NULL;
 224            }
 225            p = g_new0(void *, V_L2_SIZE);
 226            existing = qatomic_cmpxchg(lp, NULL, p);
 227            if (unlikely(existing)) {
 228                g_free(p);
 229                p = existing;
 230            }
 231        }
 232
 233        lp = p + ((index >> (i * V_L2_BITS)) & (V_L2_SIZE - 1));
 234    }
 235
 236    pd = qatomic_rcu_read(lp);
 237    if (pd == NULL) {
 238        void *existing;
 239
 240        if (!alloc) {
 241            return NULL;
 242        }
 243
 244        pd = g_new0(PageDesc, V_L2_SIZE);
 245        for (int i = 0; i < V_L2_SIZE; i++) {
 246            qemu_spin_init(&pd[i].lock);
 247        }
 248
 249        existing = qatomic_cmpxchg(lp, NULL, pd);
 250        if (unlikely(existing)) {
 251            for (int i = 0; i < V_L2_SIZE; i++) {
 252                qemu_spin_destroy(&pd[i].lock);
 253            }
 254            g_free(pd);
 255            pd = existing;
 256        }
 257    }
 258
 259    return pd + (index & (V_L2_SIZE - 1));
 260}
 261
 262static inline PageDesc *page_find(tb_page_addr_t index)
 263{
 264    return page_find_alloc(index, false);
 265}
 266
 267/**
 268 * struct page_entry - page descriptor entry
 269 * @pd:     pointer to the &struct PageDesc of the page this entry represents
 270 * @index:  page index of the page
 271 * @locked: whether the page is locked
 272 *
 273 * This struct helps us keep track of the locked state of a page, without
 274 * bloating &struct PageDesc.
 275 *
 276 * A page lock protects accesses to all fields of &struct PageDesc.
 277 *
 278 * See also: &struct page_collection.
 279 */
 280struct page_entry {
 281    PageDesc *pd;
 282    tb_page_addr_t index;
 283    bool locked;
 284};
 285
 286/**
 287 * struct page_collection - tracks a set of pages (i.e. &struct page_entry's)
 288 * @tree:   Binary search tree (BST) of the pages, with key == page index
 289 * @max:    Pointer to the page in @tree with the highest page index
 290 *
 291 * To avoid deadlock we lock pages in ascending order of page index.
 292 * When operating on a set of pages, we need to keep track of them so that
 293 * we can lock them in order and also unlock them later. For this we collect
 294 * pages (i.e. &struct page_entry's) in a binary search @tree. Given that the
 295 * @tree implementation we use does not provide an O(1) operation to obtain the
 296 * highest-ranked element, we use @max to keep track of the inserted page
 297 * with the highest index. This is valuable because if a page is not in
 298 * the tree and its index is higher than @max's, then we can lock it
 299 * without breaking the locking order rule.
 300 *
 301 * Note on naming: 'struct page_set' would be shorter, but we already have a few
 302 * page_set_*() helpers, so page_collection is used instead to avoid confusion.
 303 *
 304 * See also: page_collection_lock().
 305 */
 306struct page_collection {
 307    QTree *tree;
 308    struct page_entry *max;
 309};
 310
 311typedef int PageForEachNext;
 312#define PAGE_FOR_EACH_TB(start, last, pagedesc, tb, n) \
 313    TB_FOR_EACH_TAGGED((pagedesc)->first_tb, tb, n, page_next)
 314
 315#ifdef CONFIG_DEBUG_TCG
 316
 317static __thread GHashTable *ht_pages_locked_debug;
 318
 319static void ht_pages_locked_debug_init(void)
 320{
 321    if (ht_pages_locked_debug) {
 322        return;
 323    }
 324    ht_pages_locked_debug = g_hash_table_new(NULL, NULL);
 325}
 326
 327static bool page_is_locked(const PageDesc *pd)
 328{
 329    PageDesc *found;
 330
 331    ht_pages_locked_debug_init();
 332    found = g_hash_table_lookup(ht_pages_locked_debug, pd);
 333    return !!found;
 334}
 335
 336static void page_lock__debug(PageDesc *pd)
 337{
 338    ht_pages_locked_debug_init();
 339    g_assert(!page_is_locked(pd));
 340    g_hash_table_insert(ht_pages_locked_debug, pd, pd);
 341}
 342
 343static void page_unlock__debug(const PageDesc *pd)
 344{
 345    bool removed;
 346
 347    ht_pages_locked_debug_init();
 348    g_assert(page_is_locked(pd));
 349    removed = g_hash_table_remove(ht_pages_locked_debug, pd);
 350    g_assert(removed);
 351}
 352
 353static void do_assert_page_locked(const PageDesc *pd,
 354                                  const char *file, int line)
 355{
 356    if (unlikely(!page_is_locked(pd))) {
 357        error_report("assert_page_lock: PageDesc %p not locked @ %s:%d",
 358                     pd, file, line);
 359        abort();
 360    }
 361}
 362#define assert_page_locked(pd) do_assert_page_locked(pd, __FILE__, __LINE__)
 363
 364void assert_no_pages_locked(void)
 365{
 366    ht_pages_locked_debug_init();
 367    g_assert(g_hash_table_size(ht_pages_locked_debug) == 0);
 368}
 369
 370#else /* !CONFIG_DEBUG_TCG */
 371
 372static inline void page_lock__debug(const PageDesc *pd) { }
 373static inline void page_unlock__debug(const PageDesc *pd) { }
 374static inline void assert_page_locked(const PageDesc *pd) { }
 375
 376#endif /* CONFIG_DEBUG_TCG */
 377
 378static void page_lock(PageDesc *pd)
 379{
 380    page_lock__debug(pd);
 381    qemu_spin_lock(&pd->lock);
 382}
 383
 384/* Like qemu_spin_trylock, returns false on success */
 385static bool page_trylock(PageDesc *pd)
 386{
 387    bool busy = qemu_spin_trylock(&pd->lock);
 388    if (!busy) {
 389        page_lock__debug(pd);
 390    }
 391    return busy;
 392}
 393
 394static void page_unlock(PageDesc *pd)
 395{
 396    qemu_spin_unlock(&pd->lock);
 397    page_unlock__debug(pd);
 398}
 399
 400void tb_lock_page0(tb_page_addr_t paddr)
 401{
 402    page_lock(page_find_alloc(paddr >> TARGET_PAGE_BITS, true));
 403}
 404
 405void tb_lock_page1(tb_page_addr_t paddr0, tb_page_addr_t paddr1)
 406{
 407    tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
 408    tb_page_addr_t pindex1 = paddr1 >> TARGET_PAGE_BITS;
 409    PageDesc *pd0, *pd1;
 410
 411    if (pindex0 == pindex1) {
 412        /* Identical pages, and the first page is already locked. */
 413        return;
 414    }
 415
 416    pd1 = page_find_alloc(pindex1, true);
 417    if (pindex0 < pindex1) {
 418        /* Correct locking order, we may block. */
 419        page_lock(pd1);
 420        return;
 421    }
 422
 423    /* Incorrect locking order, we cannot block lest we deadlock. */
 424    if (!page_trylock(pd1)) {
 425        return;
 426    }
 427
 428    /*
 429     * Drop the lock on page0 and get both page locks in the right order.
 430     * Restart translation via longjmp.
 431     */
 432    pd0 = page_find_alloc(pindex0, false);
 433    page_unlock(pd0);
 434    page_lock(pd1);
 435    page_lock(pd0);
 436    siglongjmp(tcg_ctx->jmp_trans, -3);
 437}
 438
 439void tb_unlock_page1(tb_page_addr_t paddr0, tb_page_addr_t paddr1)
 440{
 441    tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
 442    tb_page_addr_t pindex1 = paddr1 >> TARGET_PAGE_BITS;
 443
 444    if (pindex0 != pindex1) {
 445        page_unlock(page_find_alloc(pindex1, false));
 446    }
 447}
 448
 449static void tb_lock_pages(TranslationBlock *tb)
 450{
 451    tb_page_addr_t paddr0 = tb_page_addr0(tb);
 452    tb_page_addr_t paddr1 = tb_page_addr1(tb);
 453    tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
 454    tb_page_addr_t pindex1 = paddr1 >> TARGET_PAGE_BITS;
 455
 456    if (unlikely(paddr0 == -1)) {
 457        return;
 458    }
 459    if (unlikely(paddr1 != -1) && pindex0 != pindex1) {
 460        if (pindex0 < pindex1) {
 461            page_lock(page_find_alloc(pindex0, true));
 462            page_lock(page_find_alloc(pindex1, true));
 463            return;
 464        }
 465        page_lock(page_find_alloc(pindex1, true));
 466    }
 467    page_lock(page_find_alloc(pindex0, true));
 468}
 469
 470void tb_unlock_pages(TranslationBlock *tb)
 471{
 472    tb_page_addr_t paddr0 = tb_page_addr0(tb);
 473    tb_page_addr_t paddr1 = tb_page_addr1(tb);
 474    tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
 475    tb_page_addr_t pindex1 = paddr1 >> TARGET_PAGE_BITS;
 476
 477    if (unlikely(paddr0 == -1)) {
 478        return;
 479    }
 480    if (unlikely(paddr1 != -1) && pindex0 != pindex1) {
 481        page_unlock(page_find_alloc(pindex1, false));
 482    }
 483    page_unlock(page_find_alloc(pindex0, false));
 484}
 485
 486static inline struct page_entry *
 487page_entry_new(PageDesc *pd, tb_page_addr_t index)
 488{
 489    struct page_entry *pe = g_malloc(sizeof(*pe));
 490
 491    pe->index = index;
 492    pe->pd = pd;
 493    pe->locked = false;
 494    return pe;
 495}
 496
 497static void page_entry_destroy(gpointer p)
 498{
 499    struct page_entry *pe = p;
 500
 501    g_assert(pe->locked);
 502    page_unlock(pe->pd);
 503    g_free(pe);
 504}
 505
 506/* returns false on success */
 507static bool page_entry_trylock(struct page_entry *pe)
 508{
 509    bool busy = page_trylock(pe->pd);
 510    if (!busy) {
 511        g_assert(!pe->locked);
 512        pe->locked = true;
 513    }
 514    return busy;
 515}
 516
 517static void do_page_entry_lock(struct page_entry *pe)
 518{
 519    page_lock(pe->pd);
 520    g_assert(!pe->locked);
 521    pe->locked = true;
 522}
 523
 524static gboolean page_entry_lock(gpointer key, gpointer value, gpointer data)
 525{
 526    struct page_entry *pe = value;
 527
 528    do_page_entry_lock(pe);
 529    return FALSE;
 530}
 531
 532static gboolean page_entry_unlock(gpointer key, gpointer value, gpointer data)
 533{
 534    struct page_entry *pe = value;
 535
 536    if (pe->locked) {
 537        pe->locked = false;
 538        page_unlock(pe->pd);
 539    }
 540    return FALSE;
 541}
 542
 543/*
 544 * Trylock a page, and if successful, add the page to a collection.
 545 * Returns true ("busy") if the page could not be locked; false otherwise.
 546 */
 547static bool page_trylock_add(struct page_collection *set, tb_page_addr_t addr)
 548{
 549    tb_page_addr_t index = addr >> TARGET_PAGE_BITS;
 550    struct page_entry *pe;
 551    PageDesc *pd;
 552
 553    pe = q_tree_lookup(set->tree, &index);
 554    if (pe) {
 555        return false;
 556    }
 557
 558    pd = page_find(index);
 559    if (pd == NULL) {
 560        return false;
 561    }
 562
 563    pe = page_entry_new(pd, index);
 564    q_tree_insert(set->tree, &pe->index, pe);
 565
 566    /*
 567     * If this is either (1) the first insertion or (2) a page whose index
 568     * is higher than any other so far, just lock the page and move on.
 569     */
 570    if (set->max == NULL || pe->index > set->max->index) {
 571        set->max = pe;
 572        do_page_entry_lock(pe);
 573        return false;
 574    }
 575    /*
 576     * Try to acquire out-of-order lock; if busy, return busy so that we acquire
 577     * locks in order.
 578     */
 579    return page_entry_trylock(pe);
 580}
 581
 582static gint tb_page_addr_cmp(gconstpointer ap, gconstpointer bp, gpointer udata)
 583{
 584    tb_page_addr_t a = *(const tb_page_addr_t *)ap;
 585    tb_page_addr_t b = *(const tb_page_addr_t *)bp;
 586
 587    if (a == b) {
 588        return 0;
 589    } else if (a < b) {
 590        return -1;
 591    }
 592    return 1;
 593}
 594
 595/*
 596 * Lock a range of pages ([@start,@last]) as well as the pages of all
 597 * intersecting TBs.
 598 * Locking order: acquire locks in ascending order of page index.
 599 */
 600static struct page_collection *page_collection_lock(tb_page_addr_t start,
 601                                                    tb_page_addr_t last)
 602{
 603    struct page_collection *set = g_malloc(sizeof(*set));
 604    tb_page_addr_t index;
 605    PageDesc *pd;
 606
 607    start >>= TARGET_PAGE_BITS;
 608    last >>= TARGET_PAGE_BITS;
 609    g_assert(start <= last);
 610
 611    set->tree = q_tree_new_full(tb_page_addr_cmp, NULL, NULL,
 612                                page_entry_destroy);
 613    set->max = NULL;
 614    assert_no_pages_locked();
 615
 616 retry:
 617    q_tree_foreach(set->tree, page_entry_lock, NULL);
 618
 619    for (index = start; index <= last; index++) {
 620        TranslationBlock *tb;
 621        PageForEachNext n;
 622
 623        pd = page_find(index);
 624        if (pd == NULL) {
 625            continue;
 626        }
 627        if (page_trylock_add(set, index << TARGET_PAGE_BITS)) {
 628            q_tree_foreach(set->tree, page_entry_unlock, NULL);
 629            goto retry;
 630        }
 631        assert_page_locked(pd);
 632        PAGE_FOR_EACH_TB(unused, unused, pd, tb, n) {
 633            if (page_trylock_add(set, tb_page_addr0(tb)) ||
 634                (tb_page_addr1(tb) != -1 &&
 635                 page_trylock_add(set, tb_page_addr1(tb)))) {
 636                /* drop all locks, and reacquire in order */
 637                q_tree_foreach(set->tree, page_entry_unlock, NULL);
 638                goto retry;
 639            }
 640        }
 641    }
 642    return set;
 643}
 644
 645static void page_collection_unlock(struct page_collection *set)
 646{
 647    /* entries are unlocked and freed via page_entry_destroy */
 648    q_tree_destroy(set->tree);
 649    g_free(set);
 650}
 651
 652/* Set to NULL all the 'first_tb' fields in all PageDescs. */
 653static void tb_remove_all_1(int level, void **lp)
 654{
 655    int i;
 656
 657    if (*lp == NULL) {
 658        return;
 659    }
 660    if (level == 0) {
 661        PageDesc *pd = *lp;
 662
 663        for (i = 0; i < V_L2_SIZE; ++i) {
 664            page_lock(&pd[i]);
 665            pd[i].first_tb = (uintptr_t)NULL;
 666            page_unlock(&pd[i]);
 667        }
 668    } else {
 669        void **pp = *lp;
 670
 671        for (i = 0; i < V_L2_SIZE; ++i) {
 672            tb_remove_all_1(level - 1, pp + i);
 673        }
 674    }
 675}
 676
 677static void tb_remove_all(void)
 678{
 679    int i, l1_sz = v_l1_size;
 680
 681    for (i = 0; i < l1_sz; i++) {
 682        tb_remove_all_1(v_l2_levels, l1_map + i);
 683    }
 684}
 685
 686/*
 687 * Add the tb in the target page and protect it if necessary.
 688 * Called with @p->lock held.
 689 */
 690static void tb_page_add(PageDesc *p, TranslationBlock *tb, unsigned int n)
 691{
 692    bool page_already_protected;
 693
 694    assert_page_locked(p);
 695
 696    tb->page_next[n] = p->first_tb;
 697    page_already_protected = p->first_tb != 0;
 698    p->first_tb = (uintptr_t)tb | n;
 699
 700    /*
 701     * If some code is already present, then the pages are already
 702     * protected. So we handle the case where only the first TB is
 703     * allocated in a physical page.
 704     */
 705    if (!page_already_protected) {
 706        tlb_protect_code(tb->page_addr[n] & TARGET_PAGE_MASK);
 707    }
 708}
 709
 710static void tb_record(TranslationBlock *tb)
 711{
 712    tb_page_addr_t paddr0 = tb_page_addr0(tb);
 713    tb_page_addr_t paddr1 = tb_page_addr1(tb);
 714    tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
 715    tb_page_addr_t pindex1 = paddr0 >> TARGET_PAGE_BITS;
 716
 717    assert(paddr0 != -1);
 718    if (unlikely(paddr1 != -1) && pindex0 != pindex1) {
 719        tb_page_add(page_find_alloc(pindex1, false), tb, 1);
 720    }
 721    tb_page_add(page_find_alloc(pindex0, false), tb, 0);
 722}
 723
 724static void tb_page_remove(PageDesc *pd, TranslationBlock *tb)
 725{
 726    TranslationBlock *tb1;
 727    uintptr_t *pprev;
 728    PageForEachNext n1;
 729
 730    assert_page_locked(pd);
 731    pprev = &pd->first_tb;
 732    PAGE_FOR_EACH_TB(unused, unused, pd, tb1, n1) {
 733        if (tb1 == tb) {
 734            *pprev = tb1->page_next[n1];
 735            return;
 736        }
 737        pprev = &tb1->page_next[n1];
 738    }
 739    g_assert_not_reached();
 740}
 741
 742static void tb_remove(TranslationBlock *tb)
 743{
 744    tb_page_addr_t paddr0 = tb_page_addr0(tb);
 745    tb_page_addr_t paddr1 = tb_page_addr1(tb);
 746    tb_page_addr_t pindex0 = paddr0 >> TARGET_PAGE_BITS;
 747    tb_page_addr_t pindex1 = paddr0 >> TARGET_PAGE_BITS;
 748
 749    assert(paddr0 != -1);
 750    if (unlikely(paddr1 != -1) && pindex0 != pindex1) {
 751        tb_page_remove(page_find_alloc(pindex1, false), tb);
 752    }
 753    tb_page_remove(page_find_alloc(pindex0, false), tb);
 754}
 755#endif /* CONFIG_USER_ONLY */
 756
 757/* flush all the translation blocks */
 758static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
 759{
 760    bool did_flush = false;
 761
 762    mmap_lock();
 763    /* If it is already been done on request of another CPU, just retry. */
 764    if (tb_ctx.tb_flush_count != tb_flush_count.host_int) {
 765        goto done;
 766    }
 767    did_flush = true;
 768
 769    CPU_FOREACH(cpu) {
 770        tcg_flush_jmp_cache(cpu);
 771    }
 772
 773    qht_reset_size(&tb_ctx.htable, CODE_GEN_HTABLE_SIZE);
 774    tb_remove_all();
 775
 776    tcg_region_reset_all();
 777    /* XXX: flush processor icache at this point if cache flush is expensive */
 778    qatomic_inc(&tb_ctx.tb_flush_count);
 779
 780done:
 781    mmap_unlock();
 782    if (did_flush) {
 783        qemu_plugin_flush_cb();
 784    }
 785}
 786
 787void tb_flush(CPUState *cpu)
 788{
 789    if (tcg_enabled()) {
 790        unsigned tb_flush_count = qatomic_read(&tb_ctx.tb_flush_count);
 791
 792        if (cpu_in_serial_context(cpu)) {
 793            do_tb_flush(cpu, RUN_ON_CPU_HOST_INT(tb_flush_count));
 794        } else {
 795            async_safe_run_on_cpu(cpu, do_tb_flush,
 796                                  RUN_ON_CPU_HOST_INT(tb_flush_count));
 797        }
 798    }
 799}
 800
 801/* remove @orig from its @n_orig-th jump list */
 802static inline void tb_remove_from_jmp_list(TranslationBlock *orig, int n_orig)
 803{
 804    uintptr_t ptr, ptr_locked;
 805    TranslationBlock *dest;
 806    TranslationBlock *tb;
 807    uintptr_t *pprev;
 808    int n;
 809
 810    /* mark the LSB of jmp_dest[] so that no further jumps can be inserted */
 811    ptr = qatomic_or_fetch(&orig->jmp_dest[n_orig], 1);
 812    dest = (TranslationBlock *)(ptr & ~1);
 813    if (dest == NULL) {
 814        return;
 815    }
 816
 817    qemu_spin_lock(&dest->jmp_lock);
 818    /*
 819     * While acquiring the lock, the jump might have been removed if the
 820     * destination TB was invalidated; check again.
 821     */
 822    ptr_locked = qatomic_read(&orig->jmp_dest[n_orig]);
 823    if (ptr_locked != ptr) {
 824        qemu_spin_unlock(&dest->jmp_lock);
 825        /*
 826         * The only possibility is that the jump was unlinked via
 827         * tb_jump_unlink(dest). Seeing here another destination would be a bug,
 828         * because we set the LSB above.
 829         */
 830        g_assert(ptr_locked == 1 && dest->cflags & CF_INVALID);
 831        return;
 832    }
 833    /*
 834     * We first acquired the lock, and since the destination pointer matches,
 835     * we know for sure that @orig is in the jmp list.
 836     */
 837    pprev = &dest->jmp_list_head;
 838    TB_FOR_EACH_JMP(dest, tb, n) {
 839        if (tb == orig && n == n_orig) {
 840            *pprev = tb->jmp_list_next[n];
 841            /* no need to set orig->jmp_dest[n]; setting the LSB was enough */
 842            qemu_spin_unlock(&dest->jmp_lock);
 843            return;
 844        }
 845        pprev = &tb->jmp_list_next[n];
 846    }
 847    g_assert_not_reached();
 848}
 849
 850/*
 851 * Reset the jump entry 'n' of a TB so that it is not chained to another TB.
 852 */
 853void tb_reset_jump(TranslationBlock *tb, int n)
 854{
 855    uintptr_t addr = (uintptr_t)(tb->tc.ptr + tb->jmp_reset_offset[n]);
 856    tb_set_jmp_target(tb, n, addr);
 857}
 858
 859/* remove any jumps to the TB */
 860static inline void tb_jmp_unlink(TranslationBlock *dest)
 861{
 862    TranslationBlock *tb;
 863    int n;
 864
 865    qemu_spin_lock(&dest->jmp_lock);
 866
 867    TB_FOR_EACH_JMP(dest, tb, n) {
 868        tb_reset_jump(tb, n);
 869        qatomic_and(&tb->jmp_dest[n], (uintptr_t)NULL | 1);
 870        /* No need to clear the list entry; setting the dest ptr is enough */
 871    }
 872    dest->jmp_list_head = (uintptr_t)NULL;
 873
 874    qemu_spin_unlock(&dest->jmp_lock);
 875}
 876
 877static void tb_jmp_cache_inval_tb(TranslationBlock *tb)
 878{
 879    CPUState *cpu;
 880
 881    if (tb_cflags(tb) & CF_PCREL) {
 882        /* A TB may be at any virtual address */
 883        CPU_FOREACH(cpu) {
 884            tcg_flush_jmp_cache(cpu);
 885        }
 886    } else {
 887        uint32_t h = tb_jmp_cache_hash_func(tb->pc);
 888
 889        CPU_FOREACH(cpu) {
 890            CPUJumpCache *jc = cpu->tb_jmp_cache;
 891
 892            if (qatomic_read(&jc->array[h].tb) == tb) {
 893                qatomic_set(&jc->array[h].tb, NULL);
 894            }
 895        }
 896    }
 897}
 898
 899/*
 900 * In user-mode, call with mmap_lock held.
 901 * In !user-mode, if @rm_from_page_list is set, call with the TB's pages'
 902 * locks held.
 903 */
 904static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
 905{
 906    uint32_t h;
 907    tb_page_addr_t phys_pc;
 908    uint32_t orig_cflags = tb_cflags(tb);
 909
 910    assert_memory_lock();
 911
 912    /* make sure no further incoming jumps will be chained to this TB */
 913    qemu_spin_lock(&tb->jmp_lock);
 914    qatomic_set(&tb->cflags, tb->cflags | CF_INVALID);
 915    qemu_spin_unlock(&tb->jmp_lock);
 916
 917    /* remove the TB from the hash list */
 918    phys_pc = tb_page_addr0(tb);
 919    h = tb_hash_func(phys_pc, (orig_cflags & CF_PCREL ? 0 : tb->pc),
 920                     tb->flags, tb->cs_base, orig_cflags);
 921    if (!qht_remove(&tb_ctx.htable, tb, h)) {
 922        return;
 923    }
 924
 925    /* remove the TB from the page list */
 926    if (rm_from_page_list) {
 927        tb_remove(tb);
 928    }
 929
 930    /* remove the TB from the hash list */
 931    tb_jmp_cache_inval_tb(tb);
 932
 933    /* suppress this TB from the two jump lists */
 934    tb_remove_from_jmp_list(tb, 0);
 935    tb_remove_from_jmp_list(tb, 1);
 936
 937    /* suppress any remaining jumps to this TB */
 938    tb_jmp_unlink(tb);
 939
 940    qatomic_set(&tb_ctx.tb_phys_invalidate_count,
 941                tb_ctx.tb_phys_invalidate_count + 1);
 942}
 943
 944static void tb_phys_invalidate__locked(TranslationBlock *tb)
 945{
 946    qemu_thread_jit_write();
 947    do_tb_phys_invalidate(tb, true);
 948    qemu_thread_jit_execute();
 949}
 950
 951/*
 952 * Invalidate one TB.
 953 * Called with mmap_lock held in user-mode.
 954 */
 955void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
 956{
 957    if (page_addr == -1 && tb_page_addr0(tb) != -1) {
 958        tb_lock_pages(tb);
 959        do_tb_phys_invalidate(tb, true);
 960        tb_unlock_pages(tb);
 961    } else {
 962        do_tb_phys_invalidate(tb, false);
 963    }
 964}
 965
 966/*
 967 * Add a new TB and link it to the physical page tables.
 968 * Called with mmap_lock held for user-mode emulation.
 969 *
 970 * Returns a pointer @tb, or a pointer to an existing TB that matches @tb.
 971 * Note that in !user-mode, another thread might have already added a TB
 972 * for the same block of guest code that @tb corresponds to. In that case,
 973 * the caller should discard the original @tb, and use instead the returned TB.
 974 */
 975TranslationBlock *tb_link_page(TranslationBlock *tb)
 976{
 977    void *existing_tb = NULL;
 978    uint32_t h;
 979
 980    assert_memory_lock();
 981    tcg_debug_assert(!(tb->cflags & CF_INVALID));
 982
 983    tb_record(tb);
 984
 985    /* add in the hash table */
 986    h = tb_hash_func(tb_page_addr0(tb), (tb->cflags & CF_PCREL ? 0 : tb->pc),
 987                     tb->flags, tb->cs_base, tb->cflags);
 988    qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
 989
 990    /* remove TB from the page(s) if we couldn't insert it */
 991    if (unlikely(existing_tb)) {
 992        tb_remove(tb);
 993        tb_unlock_pages(tb);
 994        return existing_tb;
 995    }
 996
 997    tb_unlock_pages(tb);
 998    return tb;
 999}
1000
1001#ifdef CONFIG_USER_ONLY
1002/*
1003 * Invalidate all TBs which intersect with the target address range.
1004 * Called with mmap_lock held for user-mode emulation.
1005 * NOTE: this function must not be called while a TB is running.
1006 */
1007void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t last)
1008{
1009    TranslationBlock *tb;
1010    PageForEachNext n;
1011
1012    assert_memory_lock();
1013
1014    PAGE_FOR_EACH_TB(start, last, unused, tb, n) {
1015        tb_phys_invalidate__locked(tb);
1016    }
1017}
1018
1019/*
1020 * Invalidate all TBs which intersect with the target address page @addr.
1021 * Called with mmap_lock held for user-mode emulation
1022 * NOTE: this function must not be called while a TB is running.
1023 */
1024void tb_invalidate_phys_page(tb_page_addr_t addr)
1025{
1026    tb_page_addr_t start, last;
1027
1028    start = addr & TARGET_PAGE_MASK;
1029    last = addr | ~TARGET_PAGE_MASK;
1030    tb_invalidate_phys_range(start, last);
1031}
1032
1033/*
1034 * Called with mmap_lock held. If pc is not 0 then it indicates the
1035 * host PC of the faulting store instruction that caused this invalidate.
1036 * Returns true if the caller needs to abort execution of the current
1037 * TB (because it was modified by this store and the guest CPU has
1038 * precise-SMC semantics).
1039 */
1040bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc)
1041{
1042    TranslationBlock *current_tb;
1043    bool current_tb_modified;
1044    TranslationBlock *tb;
1045    PageForEachNext n;
1046    tb_page_addr_t last;
1047
1048    /*
1049     * Without precise smc semantics, or when outside of a TB,
1050     * we can skip to invalidate.
1051     */
1052#ifndef TARGET_HAS_PRECISE_SMC
1053    pc = 0;
1054#endif
1055    if (!pc) {
1056        tb_invalidate_phys_page(addr);
1057        return false;
1058    }
1059
1060    assert_memory_lock();
1061    current_tb = tcg_tb_lookup(pc);
1062
1063    last = addr | ~TARGET_PAGE_MASK;
1064    addr &= TARGET_PAGE_MASK;
1065    current_tb_modified = false;
1066
1067    PAGE_FOR_EACH_TB(addr, last, unused, tb, n) {
1068        if (current_tb == tb &&
1069            (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
1070            /*
1071             * If we are modifying the current TB, we must stop its
1072             * execution. We could be more precise by checking that
1073             * the modification is after the current PC, but it would
1074             * require a specialized function to partially restore
1075             * the CPU state.
1076             */
1077            current_tb_modified = true;
1078            cpu_restore_state_from_tb(current_cpu, current_tb, pc);
1079        }
1080        tb_phys_invalidate__locked(tb);
1081    }
1082
1083    if (current_tb_modified) {
1084        /* Force execution of one insn next time.  */
1085        CPUState *cpu = current_cpu;
1086        cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(current_cpu);
1087        return true;
1088    }
1089    return false;
1090}
1091#else
1092/*
1093 * @p must be non-NULL.
1094 * Call with all @pages locked.
1095 */
1096static void
1097tb_invalidate_phys_page_range__locked(struct page_collection *pages,
1098                                      PageDesc *p, tb_page_addr_t start,
1099                                      tb_page_addr_t last,
1100                                      uintptr_t retaddr)
1101{
1102    TranslationBlock *tb;
1103    PageForEachNext n;
1104#ifdef TARGET_HAS_PRECISE_SMC
1105    bool current_tb_modified = false;
1106    TranslationBlock *current_tb = retaddr ? tcg_tb_lookup(retaddr) : NULL;
1107#endif /* TARGET_HAS_PRECISE_SMC */
1108
1109    /* Range may not cross a page. */
1110    tcg_debug_assert(((start ^ last) & TARGET_PAGE_MASK) == 0);
1111
1112    /*
1113     * We remove all the TBs in the range [start, last].
1114     * XXX: see if in some cases it could be faster to invalidate all the code
1115     */
1116    PAGE_FOR_EACH_TB(start, last, p, tb, n) {
1117        tb_page_addr_t tb_start, tb_last;
1118
1119        /* NOTE: this is subtle as a TB may span two physical pages */
1120        tb_start = tb_page_addr0(tb);
1121        tb_last = tb_start + tb->size - 1;
1122        if (n == 0) {
1123            tb_last = MIN(tb_last, tb_start | ~TARGET_PAGE_MASK);
1124        } else {
1125            tb_start = tb_page_addr1(tb);
1126            tb_last = tb_start + (tb_last & ~TARGET_PAGE_MASK);
1127        }
1128        if (!(tb_last < start || tb_start > last)) {
1129#ifdef TARGET_HAS_PRECISE_SMC
1130            if (current_tb == tb &&
1131                (tb_cflags(current_tb) & CF_COUNT_MASK) != 1) {
1132                /*
1133                 * If we are modifying the current TB, we must stop
1134                 * its execution. We could be more precise by checking
1135                 * that the modification is after the current PC, but it
1136                 * would require a specialized function to partially
1137                 * restore the CPU state.
1138                 */
1139                current_tb_modified = true;
1140                cpu_restore_state_from_tb(current_cpu, current_tb, retaddr);
1141            }
1142#endif /* TARGET_HAS_PRECISE_SMC */
1143            tb_phys_invalidate__locked(tb);
1144        }
1145    }
1146
1147    /* if no code remaining, no need to continue to use slow writes */
1148    if (!p->first_tb) {
1149        tlb_unprotect_code(start);
1150    }
1151
1152#ifdef TARGET_HAS_PRECISE_SMC
1153    if (current_tb_modified) {
1154        page_collection_unlock(pages);
1155        /* Force execution of one insn next time.  */
1156        current_cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(current_cpu);
1157        mmap_unlock();
1158        cpu_loop_exit_noexc(current_cpu);
1159    }
1160#endif
1161}
1162
1163/*
1164 * Invalidate all TBs which intersect with the target physical
1165 * address page @addr.
1166 */
1167void tb_invalidate_phys_page(tb_page_addr_t addr)
1168{
1169    struct page_collection *pages;
1170    tb_page_addr_t start, last;
1171    PageDesc *p;
1172
1173    p = page_find(addr >> TARGET_PAGE_BITS);
1174    if (p == NULL) {
1175        return;
1176    }
1177
1178    start = addr & TARGET_PAGE_MASK;
1179    last = addr | ~TARGET_PAGE_MASK;
1180    pages = page_collection_lock(start, last);
1181    tb_invalidate_phys_page_range__locked(pages, p, start, last, 0);
1182    page_collection_unlock(pages);
1183}
1184
1185/*
1186 * Invalidate all TBs which intersect with the target physical address range
1187 * [start;last]. NOTE: start and end may refer to *different* physical pages.
1188 * 'is_cpu_write_access' should be true if called from a real cpu write
1189 * access: the virtual CPU will exit the current TB if code is modified inside
1190 * this TB.
1191 */
1192void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t last)
1193{
1194    struct page_collection *pages;
1195    tb_page_addr_t index, index_last;
1196
1197    pages = page_collection_lock(start, last);
1198
1199    index_last = last >> TARGET_PAGE_BITS;
1200    for (index = start >> TARGET_PAGE_BITS; index <= index_last; index++) {
1201        PageDesc *pd = page_find(index);
1202        tb_page_addr_t page_start, page_last;
1203
1204        if (pd == NULL) {
1205            continue;
1206        }
1207        assert_page_locked(pd);
1208        page_start = index << TARGET_PAGE_BITS;
1209        page_last = page_start | ~TARGET_PAGE_MASK;
1210        page_last = MIN(page_last, last);
1211        tb_invalidate_phys_page_range__locked(pages, pd,
1212                                              page_start, page_last, 0);
1213    }
1214    page_collection_unlock(pages);
1215}
1216
1217/*
1218 * Call with all @pages in the range [@start, @start + len[ locked.
1219 */
1220static void tb_invalidate_phys_page_fast__locked(struct page_collection *pages,
1221                                                 tb_page_addr_t start,
1222                                                 unsigned len, uintptr_t ra)
1223{
1224    PageDesc *p;
1225
1226    p = page_find(start >> TARGET_PAGE_BITS);
1227    if (!p) {
1228        return;
1229    }
1230
1231    assert_page_locked(p);
1232    tb_invalidate_phys_page_range__locked(pages, p, start, start + len - 1, ra);
1233}
1234
1235/*
1236 * len must be <= 8 and start must be a multiple of len.
1237 * Called via softmmu_template.h when code areas are written to with
1238 * iothread mutex not held.
1239 */
1240void tb_invalidate_phys_range_fast(ram_addr_t ram_addr,
1241                                   unsigned size,
1242                                   uintptr_t retaddr)
1243{
1244    struct page_collection *pages;
1245
1246    pages = page_collection_lock(ram_addr, ram_addr + size - 1);
1247    tb_invalidate_phys_page_fast__locked(pages, ram_addr, size, retaddr);
1248    page_collection_unlock(pages);
1249}
1250
1251#endif /* CONFIG_USER_ONLY */
1252