LXR qemu/tcg/region.c

   1/*
   2 * Memory region management for Tiny Code Generator for QEMU
   3 *
   4 * Copyright (c) 2008 Fabrice Bellard
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24
  25#include "qemu/osdep.h"
  26#include "qemu/units.h"
  27#include "qemu/madvise.h"
  28#include "qemu/mprotect.h"
  29#include "qemu/memalign.h"
  30#include "qemu/cacheinfo.h"
  31#include "qemu/qtree.h"
  32#include "qapi/error.h"
  33#include "exec/exec-all.h"
  34#include "tcg/tcg.h"
  35#include "tcg-internal.h"
  36
  37
  38struct tcg_region_tree {
  39    QemuMutex lock;
  40    QTree *tree;
  41    /* padding to avoid false sharing is computed at run-time */
  42};
  43
  44/*
  45 * We divide code_gen_buffer into equally-sized "regions" that TCG threads
  46 * dynamically allocate from as demand dictates. Given appropriate region
  47 * sizing, this minimizes flushes even when some TCG threads generate a lot
  48 * more code than others.
  49 */
  50struct tcg_region_state {
  51    QemuMutex lock;
  52
  53    /* fields set at init time */
  54    void *start_aligned;
  55    void *after_prologue;
  56    size_t n;
  57    size_t size; /* size of one region */
  58    size_t stride; /* .size + guard size */
  59    size_t total_size; /* size of entire buffer, >= n * stride */
  60
  61    /* fields protected by the lock */
  62    size_t current; /* current region index */
  63    size_t agg_size_full; /* aggregate size of full regions */
  64};
  65
  66static struct tcg_region_state region;
  67
  68/*
  69 * This is an array of struct tcg_region_tree's, with padding.
  70 * We use void * to simplify the computation of region_trees[i]; each
  71 * struct is found every tree_size bytes.
  72 */
  73static void *region_trees;
  74static size_t tree_size;
  75
  76bool in_code_gen_buffer(const void *p)
  77{
  78    /*
  79     * Much like it is valid to have a pointer to the byte past the
  80     * end of an array (so long as you don't dereference it), allow
  81     * a pointer to the byte past the end of the code gen buffer.
  82     */
  83    return (size_t)(p - region.start_aligned) <= region.total_size;
  84}
  85
  86#ifdef CONFIG_DEBUG_TCG
  87const void *tcg_splitwx_to_rx(void *rw)
  88{
  89    /* Pass NULL pointers unchanged. */
  90    if (rw) {
  91        g_assert(in_code_gen_buffer(rw));
  92        rw += tcg_splitwx_diff;
  93    }
  94    return rw;
  95}
  96
  97void *tcg_splitwx_to_rw(const void *rx)
  98{
  99    /* Pass NULL pointers unchanged. */
 100    if (rx) {
 101        rx -= tcg_splitwx_diff;
 102        /* Assert that we end with a pointer in the rw region. */
 103        g_assert(in_code_gen_buffer(rx));
 104    }
 105    return (void *)rx;
 106}
 107#endif /* CONFIG_DEBUG_TCG */
 108
 109/* compare a pointer @ptr and a tb_tc @s */
 110static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
 111{
 112    if (ptr >= s->ptr + s->size) {
 113        return 1;
 114    } else if (ptr < s->ptr) {
 115        return -1;
 116    }
 117    return 0;
 118}
 119
 120static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp, gpointer userdata)
 121{
 122    const struct tb_tc *a = ap;
 123    const struct tb_tc *b = bp;
 124
 125    /*
 126     * When both sizes are set, we know this isn't a lookup.
 127     * This is the most likely case: every TB must be inserted; lookups
 128     * are a lot less frequent.
 129     */
 130    if (likely(a->size && b->size)) {
 131        if (a->ptr > b->ptr) {
 132            return 1;
 133        } else if (a->ptr < b->ptr) {
 134            return -1;
 135        }
 136        /* a->ptr == b->ptr should happen only on deletions */
 137        g_assert(a->size == b->size);
 138        return 0;
 139    }
 140    /*
 141     * All lookups have either .size field set to 0.
 142     * From the glib sources we see that @ap is always the lookup key. However
 143     * the docs provide no guarantee, so we just mark this case as likely.
 144     */
 145    if (likely(a->size == 0)) {
 146        return ptr_cmp_tb_tc(a->ptr, b);
 147    }
 148    return ptr_cmp_tb_tc(b->ptr, a);
 149}
 150
 151static void tb_destroy(gpointer value)
 152{
 153    TranslationBlock *tb = value;
 154    qemu_spin_destroy(&tb->jmp_lock);
 155}
 156
 157static void tcg_region_trees_init(void)
 158{
 159    size_t i;
 160
 161    tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
 162    region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
 163    for (i = 0; i < region.n; i++) {
 164        struct tcg_region_tree *rt = region_trees + i * tree_size;
 165
 166        qemu_mutex_init(&rt->lock);
 167        rt->tree = q_tree_new_full(tb_tc_cmp, NULL, NULL, tb_destroy);
 168    }
 169}
 170
 171static struct tcg_region_tree *tc_ptr_to_region_tree(const void *p)
 172{
 173    size_t region_idx;
 174
 175    /*
 176     * Like tcg_splitwx_to_rw, with no assert.  The pc may come from
 177     * a signal handler over which the caller has no control.
 178     */
 179    if (!in_code_gen_buffer(p)) {
 180        p -= tcg_splitwx_diff;
 181        if (!in_code_gen_buffer(p)) {
 182            return NULL;
 183        }
 184    }
 185
 186    if (p < region.start_aligned) {
 187        region_idx = 0;
 188    } else {
 189        ptrdiff_t offset = p - region.start_aligned;
 190
 191        if (offset > region.stride * (region.n - 1)) {
 192            region_idx = region.n - 1;
 193        } else {
 194            region_idx = offset / region.stride;
 195        }
 196    }
 197    return region_trees + region_idx * tree_size;
 198}
 199
 200void tcg_tb_insert(TranslationBlock *tb)
 201{
 202    struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
 203
 204    g_assert(rt != NULL);
 205    qemu_mutex_lock(&rt->lock);
 206    q_tree_insert(rt->tree, &tb->tc, tb);
 207    qemu_mutex_unlock(&rt->lock);
 208}
 209
 210void tcg_tb_remove(TranslationBlock *tb)
 211{
 212    struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
 213
 214    g_assert(rt != NULL);
 215    qemu_mutex_lock(&rt->lock);
 216    q_tree_remove(rt->tree, &tb->tc);
 217    qemu_mutex_unlock(&rt->lock);
 218}
 219
 220/*
 221 * Find the TB 'tb' such that
 222 * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
 223 * Return NULL if not found.
 224 */
 225TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
 226{
 227    struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
 228    TranslationBlock *tb;
 229    struct tb_tc s = { .ptr = (void *)tc_ptr };
 230
 231    if (rt == NULL) {
 232        return NULL;
 233    }
 234
 235    qemu_mutex_lock(&rt->lock);
 236    tb = q_tree_lookup(rt->tree, &s);
 237    qemu_mutex_unlock(&rt->lock);
 238    return tb;
 239}
 240
 241static void tcg_region_tree_lock_all(void)
 242{
 243    size_t i;
 244
 245    for (i = 0; i < region.n; i++) {
 246        struct tcg_region_tree *rt = region_trees + i * tree_size;
 247
 248        qemu_mutex_lock(&rt->lock);
 249    }
 250}
 251
 252static void tcg_region_tree_unlock_all(void)
 253{
 254    size_t i;
 255
 256    for (i = 0; i < region.n; i++) {
 257        struct tcg_region_tree *rt = region_trees + i * tree_size;
 258
 259        qemu_mutex_unlock(&rt->lock);
 260    }
 261}
 262
 263void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
 264{
 265    size_t i;
 266
 267    tcg_region_tree_lock_all();
 268    for (i = 0; i < region.n; i++) {
 269        struct tcg_region_tree *rt = region_trees + i * tree_size;
 270
 271        q_tree_foreach(rt->tree, func, user_data);
 272    }
 273    tcg_region_tree_unlock_all();
 274}
 275
 276size_t tcg_nb_tbs(void)
 277{
 278    size_t nb_tbs = 0;
 279    size_t i;
 280
 281    tcg_region_tree_lock_all();
 282    for (i = 0; i < region.n; i++) {
 283        struct tcg_region_tree *rt = region_trees + i * tree_size;
 284
 285        nb_tbs += q_tree_nnodes(rt->tree);
 286    }
 287    tcg_region_tree_unlock_all();
 288    return nb_tbs;
 289}
 290
 291static void tcg_region_tree_reset_all(void)
 292{
 293    size_t i;
 294
 295    tcg_region_tree_lock_all();
 296    for (i = 0; i < region.n; i++) {
 297        struct tcg_region_tree *rt = region_trees + i * tree_size;
 298
 299        /* Increment the refcount first so that destroy acts as a reset */
 300        q_tree_ref(rt->tree);
 301        q_tree_destroy(rt->tree);
 302    }
 303    tcg_region_tree_unlock_all();
 304}
 305
 306static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
 307{
 308    void *start, *end;
 309
 310    start = region.start_aligned + curr_region * region.stride;
 311    end = start + region.size;
 312
 313    if (curr_region == 0) {
 314        start = region.after_prologue;
 315    }
 316    /* The final region may have a few extra pages due to earlier rounding. */
 317    if (curr_region == region.n - 1) {
 318        end = region.start_aligned + region.total_size;
 319    }
 320
 321    *pstart = start;
 322    *pend = end;
 323}
 324
 325static void tcg_region_assign(TCGContext *s, size_t curr_region)
 326{
 327    void *start, *end;
 328
 329    tcg_region_bounds(curr_region, &start, &end);
 330
 331    s->code_gen_buffer = start;
 332    s->code_gen_ptr = start;
 333    s->code_gen_buffer_size = end - start;
 334    s->code_gen_highwater = end - TCG_HIGHWATER;
 335}
 336
 337static bool tcg_region_alloc__locked(TCGContext *s)
 338{
 339    if (region.current == region.n) {
 340        return true;
 341    }
 342    tcg_region_assign(s, region.current);
 343    region.current++;
 344    return false;
 345}
 346
 347/*
 348 * Request a new region once the one in use has filled up.
 349 * Returns true on error.
 350 */
 351bool tcg_region_alloc(TCGContext *s)
 352{
 353    bool err;
 354    /* read the region size now; alloc__locked will overwrite it on success */
 355    size_t size_full = s->code_gen_buffer_size;
 356
 357    qemu_mutex_lock(&region.lock);
 358    err = tcg_region_alloc__locked(s);
 359    if (!err) {
 360        region.agg_size_full += size_full - TCG_HIGHWATER;
 361    }
 362    qemu_mutex_unlock(&region.lock);
 363    return err;
 364}
 365
 366/*
 367 * Perform a context's first region allocation.
 368 * This function does _not_ increment region.agg_size_full.
 369 */
 370static void tcg_region_initial_alloc__locked(TCGContext *s)
 371{
 372    bool err = tcg_region_alloc__locked(s);
 373    g_assert(!err);
 374}
 375
 376void tcg_region_initial_alloc(TCGContext *s)
 377{
 378    qemu_mutex_lock(&region.lock);
 379    tcg_region_initial_alloc__locked(s);
 380    qemu_mutex_unlock(&region.lock);
 381}
 382
 383/* Call from a safe-work context */
 384void tcg_region_reset_all(void)
 385{
 386    unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
 387    unsigned int i;
 388
 389    qemu_mutex_lock(&region.lock);
 390    region.current = 0;
 391    region.agg_size_full = 0;
 392
 393    for (i = 0; i < n_ctxs; i++) {
 394        TCGContext *s = qatomic_read(&tcg_ctxs[i]);
 395        tcg_region_initial_alloc__locked(s);
 396    }
 397    qemu_mutex_unlock(&region.lock);
 398
 399    tcg_region_tree_reset_all();
 400}
 401
 402static size_t tcg_n_regions(size_t tb_size, unsigned max_cpus)
 403{
 404#ifdef CONFIG_USER_ONLY
 405    return 1;
 406#else
 407    size_t n_regions;
 408
 409    /*
 410     * It is likely that some vCPUs will translate more code than others,
 411     * so we first try to set more regions than max_cpus, with those regions
 412     * being of reasonable size. If that's not possible we make do by evenly
 413     * dividing the code_gen_buffer among the vCPUs.
 414     */
 415    /* Use a single region if all we have is one vCPU thread */
 416    if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
 417        return 1;
 418    }
 419
 420    /*
 421     * Try to have more regions than max_cpus, with each region being >= 2 MB.
 422     * If we can't, then just allocate one region per vCPU thread.
 423     */
 424    n_regions = tb_size / (2 * MiB);
 425    if (n_regions <= max_cpus) {
 426        return max_cpus;
 427    }
 428    return MIN(n_regions, max_cpus * 8);
 429#endif
 430}
 431
 432/*
 433 * Minimum size of the code gen buffer.  This number is randomly chosen,
 434 * but not so small that we can't have a fair number of TB's live.
 435 *
 436 * Maximum size, MAX_CODE_GEN_BUFFER_SIZE, is defined in tcg-target.h.
 437 * Unless otherwise indicated, this is constrained by the range of
 438 * direct branches on the host cpu, as used by the TCG implementation
 439 * of goto_tb.
 440 */
 441#define MIN_CODE_GEN_BUFFER_SIZE     (1 * MiB)
 442
 443#if TCG_TARGET_REG_BITS == 32
 444#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32 * MiB)
 445#ifdef CONFIG_USER_ONLY
 446/*
 447 * For user mode on smaller 32 bit systems we may run into trouble
 448 * allocating big chunks of data in the right place. On these systems
 449 * we utilise a static code generation buffer directly in the binary.
 450 */
 451#define USE_STATIC_CODE_GEN_BUFFER
 452#endif
 453#else /* TCG_TARGET_REG_BITS == 64 */
 454#ifdef CONFIG_USER_ONLY
 455/*
 456 * As user-mode emulation typically means running multiple instances
 457 * of the translator don't go too nuts with our default code gen
 458 * buffer lest we make things too hard for the OS.
 459 */
 460#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (128 * MiB)
 461#else
 462/*
 463 * We expect most system emulation to run one or two guests per host.
 464 * Users running large scale system emulation may want to tweak their
 465 * runtime setup via the tb-size control on the command line.
 466 */
 467#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (1 * GiB)
 468#endif
 469#endif
 470
 471#define DEFAULT_CODE_GEN_BUFFER_SIZE \
 472  (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
 473   ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
 474
 475#ifdef USE_STATIC_CODE_GEN_BUFFER
 476static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
 477    __attribute__((aligned(CODE_GEN_ALIGN)));
 478
 479static int alloc_code_gen_buffer(size_t tb_size, int splitwx, Error **errp)
 480{
 481    void *buf, *end;
 482    size_t size;
 483
 484    if (splitwx > 0) {
 485        error_setg(errp, "jit split-wx not supported");
 486        return -1;
 487    }
 488
 489    /* page-align the beginning and end of the buffer */
 490    buf = static_code_gen_buffer;
 491    end = static_code_gen_buffer + sizeof(static_code_gen_buffer);
 492    buf = QEMU_ALIGN_PTR_UP(buf, qemu_real_host_page_size());
 493    end = QEMU_ALIGN_PTR_DOWN(end, qemu_real_host_page_size());
 494
 495    size = end - buf;
 496
 497    /* Honor a command-line option limiting the size of the buffer.  */
 498    if (size > tb_size) {
 499        size = QEMU_ALIGN_DOWN(tb_size, qemu_real_host_page_size());
 500    }
 501
 502    region.start_aligned = buf;
 503    region.total_size = size;
 504
 505    return PROT_READ | PROT_WRITE;
 506}
 507#elif defined(_WIN32)
 508static int alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
 509{
 510    void *buf;
 511
 512    if (splitwx > 0) {
 513        error_setg(errp, "jit split-wx not supported");
 514        return -1;
 515    }
 516
 517    buf = VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT,
 518                             PAGE_EXECUTE_READWRITE);
 519    if (buf == NULL) {
 520        error_setg_win32(errp, GetLastError(),
 521                         "allocate %zu bytes for jit buffer", size);
 522        return false;
 523    }
 524
 525    region.start_aligned = buf;
 526    region.total_size = size;
 527
 528    return PAGE_READ | PAGE_WRITE | PAGE_EXEC;
 529}
 530#else
 531static int alloc_code_gen_buffer_anon(size_t size, int prot,
 532                                      int flags, Error **errp)
 533{
 534    void *buf;
 535
 536    buf = mmap(NULL, size, prot, flags, -1, 0);
 537    if (buf == MAP_FAILED) {
 538        error_setg_errno(errp, errno,
 539                         "allocate %zu bytes for jit buffer", size);
 540        return -1;
 541    }
 542
 543    region.start_aligned = buf;
 544    region.total_size = size;
 545    return prot;
 546}
 547
 548#ifndef CONFIG_TCG_INTERPRETER
 549#ifdef CONFIG_POSIX
 550#include "qemu/memfd.h"
 551
 552static int alloc_code_gen_buffer_splitwx_memfd(size_t size, Error **errp)
 553{
 554    void *buf_rw = NULL, *buf_rx = MAP_FAILED;
 555    int fd = -1;
 556
 557    buf_rw = qemu_memfd_alloc("tcg-jit", size, 0, &fd, errp);
 558    if (buf_rw == NULL) {
 559        goto fail;
 560    }
 561
 562    buf_rx = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);
 563    if (buf_rx == MAP_FAILED) {
 564        goto fail_rx;
 565    }
 566
 567    close(fd);
 568    region.start_aligned = buf_rw;
 569    region.total_size = size;
 570    tcg_splitwx_diff = buf_rx - buf_rw;
 571
 572    return PROT_READ | PROT_WRITE;
 573
 574 fail_rx:
 575    error_setg_errno(errp, errno, "failed to map shared memory for execute");
 576 fail:
 577    if (buf_rx != MAP_FAILED) {
 578        munmap(buf_rx, size);
 579    }
 580    if (buf_rw) {
 581        munmap(buf_rw, size);
 582    }
 583    if (fd >= 0) {
 584        close(fd);
 585    }
 586    return -1;
 587}
 588#endif /* CONFIG_POSIX */
 589
 590#ifdef CONFIG_DARWIN
 591#include <mach/mach.h>
 592
 593extern kern_return_t mach_vm_remap(vm_map_t target_task,
 594                                   mach_vm_address_t *target_address,
 595                                   mach_vm_size_t size,
 596                                   mach_vm_offset_t mask,
 597                                   int flags,
 598                                   vm_map_t src_task,
 599                                   mach_vm_address_t src_address,
 600                                   boolean_t copy,
 601                                   vm_prot_t *cur_protection,
 602                                   vm_prot_t *max_protection,
 603                                   vm_inherit_t inheritance);
 604
 605static int alloc_code_gen_buffer_splitwx_vmremap(size_t size, Error **errp)
 606{
 607    kern_return_t ret;
 608    mach_vm_address_t buf_rw, buf_rx;
 609    vm_prot_t cur_prot, max_prot;
 610
 611    /* Map the read-write portion via normal anon memory. */
 612    if (!alloc_code_gen_buffer_anon(size, PROT_READ | PROT_WRITE,
 613                                    MAP_PRIVATE | MAP_ANONYMOUS, errp)) {
 614        return -1;
 615    }
 616
 617    buf_rw = (mach_vm_address_t)region.start_aligned;
 618    buf_rx = 0;
 619    ret = mach_vm_remap(mach_task_self(),
 620                        &buf_rx,
 621                        size,
 622                        0,
 623                        VM_FLAGS_ANYWHERE,
 624                        mach_task_self(),
 625                        buf_rw,
 626                        false,
 627                        &cur_prot,
 628                        &max_prot,
 629                        VM_INHERIT_NONE);
 630    if (ret != KERN_SUCCESS) {
 631        /* TODO: Convert "ret" to a human readable error message. */
 632        error_setg(errp, "vm_remap for jit splitwx failed");
 633        munmap((void *)buf_rw, size);
 634        return -1;
 635    }
 636
 637    if (mprotect((void *)buf_rx, size, PROT_READ | PROT_EXEC) != 0) {
 638        error_setg_errno(errp, errno, "mprotect for jit splitwx");
 639        munmap((void *)buf_rx, size);
 640        munmap((void *)buf_rw, size);
 641        return -1;
 642    }
 643
 644    tcg_splitwx_diff = buf_rx - buf_rw;
 645    return PROT_READ | PROT_WRITE;
 646}
 647#endif /* CONFIG_DARWIN */
 648#endif /* CONFIG_TCG_INTERPRETER */
 649
 650static int alloc_code_gen_buffer_splitwx(size_t size, Error **errp)
 651{
 652#ifndef CONFIG_TCG_INTERPRETER
 653# ifdef CONFIG_DARWIN
 654    return alloc_code_gen_buffer_splitwx_vmremap(size, errp);
 655# endif
 656# ifdef CONFIG_POSIX
 657    return alloc_code_gen_buffer_splitwx_memfd(size, errp);
 658# endif
 659#endif
 660    error_setg(errp, "jit split-wx not supported");
 661    return -1;
 662}
 663
 664static int alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
 665{
 666    ERRP_GUARD();
 667    int prot, flags;
 668
 669    if (splitwx) {
 670        prot = alloc_code_gen_buffer_splitwx(size, errp);
 671        if (prot >= 0) {
 672            return prot;
 673        }
 674        /*
 675         * If splitwx force-on (1), fail;
 676         * if splitwx default-on (-1), fall through to splitwx off.
 677         */
 678        if (splitwx > 0) {
 679            return -1;
 680        }
 681        error_free_or_abort(errp);
 682    }
 683
 684    /*
 685     * macOS 11.2 has a bug (Apple Feedback FB8994773) in which mprotect
 686     * rejects a permission change from RWX -> NONE when reserving the
 687     * guard pages later.  We can go the other way with the same number
 688     * of syscalls, so always begin with PROT_NONE.
 689     */
 690    prot = PROT_NONE;
 691    flags = MAP_PRIVATE | MAP_ANONYMOUS;
 692#ifdef CONFIG_DARWIN
 693    /* Applicable to both iOS and macOS (Apple Silicon). */
 694    if (!splitwx) {
 695        flags |= MAP_JIT;
 696    }
 697#endif
 698
 699    return alloc_code_gen_buffer_anon(size, prot, flags, errp);
 700}
 701#endif /* USE_STATIC_CODE_GEN_BUFFER, WIN32, POSIX */
 702
 703/*
 704 * Initializes region partitioning.
 705 *
 706 * Called at init time from the parent thread (i.e. the one calling
 707 * tcg_context_init), after the target's TCG globals have been set.
 708 *
 709 * Region partitioning works by splitting code_gen_buffer into separate regions,
 710 * and then assigning regions to TCG threads so that the threads can translate
 711 * code in parallel without synchronization.
 712 *
 713 * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
 714 * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
 715 * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
 716 * must have been parsed before calling this function, since it calls
 717 * qemu_tcg_mttcg_enabled().
 718 *
 719 * In user-mode we use a single region.  Having multiple regions in user-mode
 720 * is not supported, because the number of vCPU threads (recall that each thread
 721 * spawned by the guest corresponds to a vCPU thread) is only bounded by the
 722 * OS, and usually this number is huge (tens of thousands is not uncommon).
 723 * Thus, given this large bound on the number of vCPU threads and the fact
 724 * that code_gen_buffer is allocated at compile-time, we cannot guarantee
 725 * that the availability of at least one region per vCPU thread.
 726 *
 727 * However, this user-mode limitation is unlikely to be a significant problem
 728 * in practice. Multi-threaded guests share most if not all of their translated
 729 * code, which makes parallel code generation less appealing than in softmmu.
 730 */
 731void tcg_region_init(size_t tb_size, int splitwx, unsigned max_cpus)
 732{
 733    const size_t page_size = qemu_real_host_page_size();
 734    size_t region_size;
 735    int have_prot, need_prot;
 736
 737    /* Size the buffer.  */
 738    if (tb_size == 0) {
 739        size_t phys_mem = qemu_get_host_physmem();
 740        if (phys_mem == 0) {
 741            tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
 742        } else {
 743            tb_size = QEMU_ALIGN_DOWN(phys_mem / 8, page_size);
 744            tb_size = MIN(DEFAULT_CODE_GEN_BUFFER_SIZE, tb_size);
 745        }
 746    }
 747    if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
 748        tb_size = MIN_CODE_GEN_BUFFER_SIZE;
 749    }
 750    if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
 751        tb_size = MAX_CODE_GEN_BUFFER_SIZE;
 752    }
 753
 754    have_prot = alloc_code_gen_buffer(tb_size, splitwx, &error_fatal);
 755    assert(have_prot >= 0);
 756
 757    /* Request large pages for the buffer and the splitwx.  */
 758    qemu_madvise(region.start_aligned, region.total_size, QEMU_MADV_HUGEPAGE);
 759    if (tcg_splitwx_diff) {
 760        qemu_madvise(region.start_aligned + tcg_splitwx_diff,
 761                     region.total_size, QEMU_MADV_HUGEPAGE);
 762    }
 763
 764    /*
 765     * Make region_size a multiple of page_size, using aligned as the start.
 766     * As a result of this we might end up with a few extra pages at the end of
 767     * the buffer; we will assign those to the last region.
 768     */
 769    region.n = tcg_n_regions(tb_size, max_cpus);
 770    region_size = tb_size / region.n;
 771    region_size = QEMU_ALIGN_DOWN(region_size, page_size);
 772
 773    /* A region must have at least 2 pages; one code, one guard */
 774    g_assert(region_size >= 2 * page_size);
 775    region.stride = region_size;
 776
 777    /* Reserve space for guard pages. */
 778    region.size = region_size - page_size;
 779    region.total_size -= page_size;
 780
 781    /*
 782     * The first region will be smaller than the others, via the prologue,
 783     * which has yet to be allocated.  For now, the first region begins at
 784     * the page boundary.
 785     */
 786    region.after_prologue = region.start_aligned;
 787
 788    /* init the region struct */
 789    qemu_mutex_init(&region.lock);
 790
 791    /*
 792     * Set guard pages in the rw buffer, as that's the one into which
 793     * buffer overruns could occur.  Do not set guard pages in the rx
 794     * buffer -- let that one use hugepages throughout.
 795     * Work with the page protections set up with the initial mapping.
 796     */
 797    need_prot = PAGE_READ | PAGE_WRITE;
 798#ifndef CONFIG_TCG_INTERPRETER
 799    if (tcg_splitwx_diff == 0) {
 800        need_prot |= PAGE_EXEC;
 801    }
 802#endif
 803    for (size_t i = 0, n = region.n; i < n; i++) {
 804        void *start, *end;
 805
 806        tcg_region_bounds(i, &start, &end);
 807        if (have_prot != need_prot) {
 808            int rc;
 809
 810            if (need_prot == (PAGE_READ | PAGE_WRITE | PAGE_EXEC)) {
 811                rc = qemu_mprotect_rwx(start, end - start);
 812            } else if (need_prot == (PAGE_READ | PAGE_WRITE)) {
 813                rc = qemu_mprotect_rw(start, end - start);
 814            } else {
 815                g_assert_not_reached();
 816            }
 817            if (rc) {
 818                error_setg_errno(&error_fatal, errno,
 819                                 "mprotect of jit buffer");
 820            }
 821        }
 822        if (have_prot != 0) {
 823            /* Guard pages are nice for bug detection but are not essential. */
 824            (void)qemu_mprotect_none(end, page_size);
 825        }
 826    }
 827
 828    tcg_region_trees_init();
 829
 830    /*
 831     * Leave the initial context initialized to the first region.
 832     * This will be the context into which we generate the prologue.
 833     * It is also the only context for CONFIG_USER_ONLY.
 834     */
 835    tcg_region_initial_alloc__locked(&tcg_init_ctx);
 836}
 837
 838void tcg_region_prologue_set(TCGContext *s)
 839{
 840    /* Deduct the prologue from the first region.  */
 841    g_assert(region.start_aligned == s->code_gen_buffer);
 842    region.after_prologue = s->code_ptr;
 843
 844    /* Recompute boundaries of the first region. */
 845    tcg_region_assign(s, 0);
 846
 847    /* Register the balance of the buffer with gdb. */
 848    tcg_register_jit(tcg_splitwx_to_rx(region.after_prologue),
 849                     region.start_aligned + region.total_size -
 850                     region.after_prologue);
 851}
 852
 853/*
 854 * Returns the size (in bytes) of all translated code (i.e. from all regions)
 855 * currently in the cache.
 856 * See also: tcg_code_capacity()
 857 * Do not confuse with tcg_current_code_size(); that one applies to a single
 858 * TCG context.
 859 */
 860size_t tcg_code_size(void)
 861{
 862    unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
 863    unsigned int i;
 864    size_t total;
 865
 866    qemu_mutex_lock(&region.lock);
 867    total = region.agg_size_full;
 868    for (i = 0; i < n_ctxs; i++) {
 869        const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
 870        size_t size;
 871
 872        size = qatomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
 873        g_assert(size <= s->code_gen_buffer_size);
 874        total += size;
 875    }
 876    qemu_mutex_unlock(&region.lock);
 877    return total;
 878}
 879
 880/*
 881 * Returns the code capacity (in bytes) of the entire cache, i.e. including all
 882 * regions.
 883 * See also: tcg_code_size()
 884 */
 885size_t tcg_code_capacity(void)
 886{
 887    size_t guard_size, capacity;
 888
 889    /* no need for synchronization; these variables are set at init time */
 890    guard_size = region.stride - region.size;
 891    capacity = region.total_size;
 892    capacity -= (region.n - 1) * guard_size;
 893    capacity -= region.n * TCG_HIGHWATER;
 894
 895    return capacity;
 896}
 897