qemu/tcg/tcg.c
<<
>>
Prefs
   1/*
   2 * Tiny Code Generator for QEMU
   3 *
   4 * Copyright (c) 2008 Fabrice Bellard
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24
  25/* define it to use liveness analysis (better code) */
  26#define USE_TCG_OPTIMIZATIONS
  27
  28#include "qemu/osdep.h"
  29
  30/* Define to jump the ELF file used to communicate with GDB.  */
  31#undef DEBUG_JIT
  32
  33#include "qemu/cutils.h"
  34#include "qemu/host-utils.h"
  35#include "qemu/timer.h"
  36
  37/* Note: the long term plan is to reduce the dependencies on the QEMU
  38   CPU definitions. Currently they are used for qemu_ld/st
  39   instructions */
  40#define NO_CPU_IO_DEFS
  41#include "cpu.h"
  42
  43#include "exec/cpu-common.h"
  44#include "exec/exec-all.h"
  45
  46#include "tcg-op.h"
  47
  48#if UINTPTR_MAX == UINT32_MAX
  49# define ELF_CLASS  ELFCLASS32
  50#else
  51# define ELF_CLASS  ELFCLASS64
  52#endif
  53#ifdef HOST_WORDS_BIGENDIAN
  54# define ELF_DATA   ELFDATA2MSB
  55#else
  56# define ELF_DATA   ELFDATA2LSB
  57#endif
  58
  59#include "elf.h"
  60#include "exec/log.h"
  61#include "sysemu/sysemu.h"
  62
  63/* Forward declarations for functions declared in tcg-target.inc.c and
  64   used here. */
  65static void tcg_target_init(TCGContext *s);
  66static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode);
  67static void tcg_target_qemu_prologue(TCGContext *s);
  68static void patch_reloc(tcg_insn_unit *code_ptr, int type,
  69                        intptr_t value, intptr_t addend);
  70
  71/* The CIE and FDE header definitions will be common to all hosts.  */
  72typedef struct {
  73    uint32_t len __attribute__((aligned((sizeof(void *)))));
  74    uint32_t id;
  75    uint8_t version;
  76    char augmentation[1];
  77    uint8_t code_align;
  78    uint8_t data_align;
  79    uint8_t return_column;
  80} DebugFrameCIE;
  81
  82typedef struct QEMU_PACKED {
  83    uint32_t len __attribute__((aligned((sizeof(void *)))));
  84    uint32_t cie_offset;
  85    uintptr_t func_start;
  86    uintptr_t func_len;
  87} DebugFrameFDEHeader;
  88
  89typedef struct QEMU_PACKED {
  90    DebugFrameCIE cie;
  91    DebugFrameFDEHeader fde;
  92} DebugFrameHeader;
  93
  94static void tcg_register_jit_int(void *buf, size_t size,
  95                                 const void *debug_frame,
  96                                 size_t debug_frame_size)
  97    __attribute__((unused));
  98
  99/* Forward declarations for functions declared and used in tcg-target.inc.c. */
 100static const char *target_parse_constraint(TCGArgConstraint *ct,
 101                                           const char *ct_str, TCGType type);
 102static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
 103                       intptr_t arg2);
 104static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
 105static void tcg_out_movi(TCGContext *s, TCGType type,
 106                         TCGReg ret, tcg_target_long arg);
 107static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
 108                       const int *const_args);
 109#if TCG_TARGET_MAYBE_vec
 110static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
 111                           unsigned vece, const TCGArg *args,
 112                           const int *const_args);
 113#else
 114static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
 115                                  unsigned vece, const TCGArg *args,
 116                                  const int *const_args)
 117{
 118    g_assert_not_reached();
 119}
 120#endif
 121static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
 122                       intptr_t arg2);
 123static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
 124                        TCGReg base, intptr_t ofs);
 125static void tcg_out_call(TCGContext *s, tcg_insn_unit *target);
 126static int tcg_target_const_match(tcg_target_long val, TCGType type,
 127                                  const TCGArgConstraint *arg_ct);
 128#ifdef TCG_TARGET_NEED_LDST_LABELS
 129static bool tcg_out_ldst_finalize(TCGContext *s);
 130#endif
 131
 132#define TCG_HIGHWATER 1024
 133
 134static TCGContext **tcg_ctxs;
 135static unsigned int n_tcg_ctxs;
 136TCGv_env cpu_env = 0;
 137
 138struct tcg_region_tree {
 139    QemuMutex lock;
 140    GTree *tree;
 141    /* padding to avoid false sharing is computed at run-time */
 142};
 143
 144/*
 145 * We divide code_gen_buffer into equally-sized "regions" that TCG threads
 146 * dynamically allocate from as demand dictates. Given appropriate region
 147 * sizing, this minimizes flushes even when some TCG threads generate a lot
 148 * more code than others.
 149 */
 150struct tcg_region_state {
 151    QemuMutex lock;
 152
 153    /* fields set at init time */
 154    void *start;
 155    void *start_aligned;
 156    void *end;
 157    size_t n;
 158    size_t size; /* size of one region */
 159    size_t stride; /* .size + guard size */
 160
 161    /* fields protected by the lock */
 162    size_t current; /* current region index */
 163    size_t agg_size_full; /* aggregate size of full regions */
 164};
 165
 166static struct tcg_region_state region;
 167/*
 168 * This is an array of struct tcg_region_tree's, with padding.
 169 * We use void * to simplify the computation of region_trees[i]; each
 170 * struct is found every tree_size bytes.
 171 */
 172static void *region_trees;
 173static size_t tree_size;
 174static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
 175static TCGRegSet tcg_target_call_clobber_regs;
 176
 177#if TCG_TARGET_INSN_UNIT_SIZE == 1
 178static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
 179{
 180    *s->code_ptr++ = v;
 181}
 182
 183static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
 184                                                      uint8_t v)
 185{
 186    *p = v;
 187}
 188#endif
 189
 190#if TCG_TARGET_INSN_UNIT_SIZE <= 2
 191static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
 192{
 193    if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
 194        *s->code_ptr++ = v;
 195    } else {
 196        tcg_insn_unit *p = s->code_ptr;
 197        memcpy(p, &v, sizeof(v));
 198        s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
 199    }
 200}
 201
 202static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
 203                                                       uint16_t v)
 204{
 205    if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
 206        *p = v;
 207    } else {
 208        memcpy(p, &v, sizeof(v));
 209    }
 210}
 211#endif
 212
 213#if TCG_TARGET_INSN_UNIT_SIZE <= 4
 214static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
 215{
 216    if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
 217        *s->code_ptr++ = v;
 218    } else {
 219        tcg_insn_unit *p = s->code_ptr;
 220        memcpy(p, &v, sizeof(v));
 221        s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
 222    }
 223}
 224
 225static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
 226                                                       uint32_t v)
 227{
 228    if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
 229        *p = v;
 230    } else {
 231        memcpy(p, &v, sizeof(v));
 232    }
 233}
 234#endif
 235
 236#if TCG_TARGET_INSN_UNIT_SIZE <= 8
 237static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
 238{
 239    if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
 240        *s->code_ptr++ = v;
 241    } else {
 242        tcg_insn_unit *p = s->code_ptr;
 243        memcpy(p, &v, sizeof(v));
 244        s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
 245    }
 246}
 247
 248static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
 249                                                       uint64_t v)
 250{
 251    if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
 252        *p = v;
 253    } else {
 254        memcpy(p, &v, sizeof(v));
 255    }
 256}
 257#endif
 258
 259/* label relocation processing */
 260
 261static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
 262                          TCGLabel *l, intptr_t addend)
 263{
 264    TCGRelocation *r;
 265
 266    if (l->has_value) {
 267        /* FIXME: This may break relocations on RISC targets that
 268           modify instruction fields in place.  The caller may not have 
 269           written the initial value.  */
 270        patch_reloc(code_ptr, type, l->u.value, addend);
 271    } else {
 272        /* add a new relocation entry */
 273        r = tcg_malloc(sizeof(TCGRelocation));
 274        r->type = type;
 275        r->ptr = code_ptr;
 276        r->addend = addend;
 277        r->next = l->u.first_reloc;
 278        l->u.first_reloc = r;
 279    }
 280}
 281
 282static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr)
 283{
 284    intptr_t value = (intptr_t)ptr;
 285    TCGRelocation *r;
 286
 287    tcg_debug_assert(!l->has_value);
 288
 289    for (r = l->u.first_reloc; r != NULL; r = r->next) {
 290        patch_reloc(r->ptr, r->type, value, r->addend);
 291    }
 292
 293    l->has_value = 1;
 294    l->u.value_ptr = ptr;
 295}
 296
 297TCGLabel *gen_new_label(void)
 298{
 299    TCGContext *s = tcg_ctx;
 300    TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
 301
 302    *l = (TCGLabel){
 303        .id = s->nb_labels++
 304    };
 305
 306    return l;
 307}
 308
 309static void set_jmp_reset_offset(TCGContext *s, int which)
 310{
 311    size_t off = tcg_current_code_size(s);
 312    s->tb_jmp_reset_offset[which] = off;
 313    /* Make sure that we didn't overflow the stored offset.  */
 314    assert(s->tb_jmp_reset_offset[which] == off);
 315}
 316
 317#include "tcg-target.inc.c"
 318
 319/* compare a pointer @ptr and a tb_tc @s */
 320static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
 321{
 322    if (ptr >= s->ptr + s->size) {
 323        return 1;
 324    } else if (ptr < s->ptr) {
 325        return -1;
 326    }
 327    return 0;
 328}
 329
 330static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
 331{
 332    const struct tb_tc *a = ap;
 333    const struct tb_tc *b = bp;
 334
 335    /*
 336     * When both sizes are set, we know this isn't a lookup.
 337     * This is the most likely case: every TB must be inserted; lookups
 338     * are a lot less frequent.
 339     */
 340    if (likely(a->size && b->size)) {
 341        if (a->ptr > b->ptr) {
 342            return 1;
 343        } else if (a->ptr < b->ptr) {
 344            return -1;
 345        }
 346        /* a->ptr == b->ptr should happen only on deletions */
 347        g_assert(a->size == b->size);
 348        return 0;
 349    }
 350    /*
 351     * All lookups have either .size field set to 0.
 352     * From the glib sources we see that @ap is always the lookup key. However
 353     * the docs provide no guarantee, so we just mark this case as likely.
 354     */
 355    if (likely(a->size == 0)) {
 356        return ptr_cmp_tb_tc(a->ptr, b);
 357    }
 358    return ptr_cmp_tb_tc(b->ptr, a);
 359}
 360
 361static void tcg_region_trees_init(void)
 362{
 363    size_t i;
 364
 365    tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
 366    region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
 367    for (i = 0; i < region.n; i++) {
 368        struct tcg_region_tree *rt = region_trees + i * tree_size;
 369
 370        qemu_mutex_init(&rt->lock);
 371        rt->tree = g_tree_new(tb_tc_cmp);
 372    }
 373}
 374
 375static struct tcg_region_tree *tc_ptr_to_region_tree(void *p)
 376{
 377    size_t region_idx;
 378
 379    if (p < region.start_aligned) {
 380        region_idx = 0;
 381    } else {
 382        ptrdiff_t offset = p - region.start_aligned;
 383
 384        if (offset > region.stride * (region.n - 1)) {
 385            region_idx = region.n - 1;
 386        } else {
 387            region_idx = offset / region.stride;
 388        }
 389    }
 390    return region_trees + region_idx * tree_size;
 391}
 392
 393void tcg_tb_insert(TranslationBlock *tb)
 394{
 395    struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
 396
 397    qemu_mutex_lock(&rt->lock);
 398    g_tree_insert(rt->tree, &tb->tc, tb);
 399    qemu_mutex_unlock(&rt->lock);
 400}
 401
 402void tcg_tb_remove(TranslationBlock *tb)
 403{
 404    struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
 405
 406    qemu_mutex_lock(&rt->lock);
 407    g_tree_remove(rt->tree, &tb->tc);
 408    qemu_mutex_unlock(&rt->lock);
 409}
 410
 411/*
 412 * Find the TB 'tb' such that
 413 * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
 414 * Return NULL if not found.
 415 */
 416TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
 417{
 418    struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
 419    TranslationBlock *tb;
 420    struct tb_tc s = { .ptr = (void *)tc_ptr };
 421
 422    qemu_mutex_lock(&rt->lock);
 423    tb = g_tree_lookup(rt->tree, &s);
 424    qemu_mutex_unlock(&rt->lock);
 425    return tb;
 426}
 427
 428static void tcg_region_tree_lock_all(void)
 429{
 430    size_t i;
 431
 432    for (i = 0; i < region.n; i++) {
 433        struct tcg_region_tree *rt = region_trees + i * tree_size;
 434
 435        qemu_mutex_lock(&rt->lock);
 436    }
 437}
 438
 439static void tcg_region_tree_unlock_all(void)
 440{
 441    size_t i;
 442
 443    for (i = 0; i < region.n; i++) {
 444        struct tcg_region_tree *rt = region_trees + i * tree_size;
 445
 446        qemu_mutex_unlock(&rt->lock);
 447    }
 448}
 449
 450void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
 451{
 452    size_t i;
 453
 454    tcg_region_tree_lock_all();
 455    for (i = 0; i < region.n; i++) {
 456        struct tcg_region_tree *rt = region_trees + i * tree_size;
 457
 458        g_tree_foreach(rt->tree, func, user_data);
 459    }
 460    tcg_region_tree_unlock_all();
 461}
 462
 463size_t tcg_nb_tbs(void)
 464{
 465    size_t nb_tbs = 0;
 466    size_t i;
 467
 468    tcg_region_tree_lock_all();
 469    for (i = 0; i < region.n; i++) {
 470        struct tcg_region_tree *rt = region_trees + i * tree_size;
 471
 472        nb_tbs += g_tree_nnodes(rt->tree);
 473    }
 474    tcg_region_tree_unlock_all();
 475    return nb_tbs;
 476}
 477
 478static void tcg_region_tree_reset_all(void)
 479{
 480    size_t i;
 481
 482    tcg_region_tree_lock_all();
 483    for (i = 0; i < region.n; i++) {
 484        struct tcg_region_tree *rt = region_trees + i * tree_size;
 485
 486        /* Increment the refcount first so that destroy acts as a reset */
 487        g_tree_ref(rt->tree);
 488        g_tree_destroy(rt->tree);
 489    }
 490    tcg_region_tree_unlock_all();
 491}
 492
 493static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
 494{
 495    void *start, *end;
 496
 497    start = region.start_aligned + curr_region * region.stride;
 498    end = start + region.size;
 499
 500    if (curr_region == 0) {
 501        start = region.start;
 502    }
 503    if (curr_region == region.n - 1) {
 504        end = region.end;
 505    }
 506
 507    *pstart = start;
 508    *pend = end;
 509}
 510
 511static void tcg_region_assign(TCGContext *s, size_t curr_region)
 512{
 513    void *start, *end;
 514
 515    tcg_region_bounds(curr_region, &start, &end);
 516
 517    s->code_gen_buffer = start;
 518    s->code_gen_ptr = start;
 519    s->code_gen_buffer_size = end - start;
 520    s->code_gen_highwater = end - TCG_HIGHWATER;
 521}
 522
 523static bool tcg_region_alloc__locked(TCGContext *s)
 524{
 525    if (region.current == region.n) {
 526        return true;
 527    }
 528    tcg_region_assign(s, region.current);
 529    region.current++;
 530    return false;
 531}
 532
 533/*
 534 * Request a new region once the one in use has filled up.
 535 * Returns true on error.
 536 */
 537static bool tcg_region_alloc(TCGContext *s)
 538{
 539    bool err;
 540    /* read the region size now; alloc__locked will overwrite it on success */
 541    size_t size_full = s->code_gen_buffer_size;
 542
 543    qemu_mutex_lock(&region.lock);
 544    err = tcg_region_alloc__locked(s);
 545    if (!err) {
 546        region.agg_size_full += size_full - TCG_HIGHWATER;
 547    }
 548    qemu_mutex_unlock(&region.lock);
 549    return err;
 550}
 551
 552/*
 553 * Perform a context's first region allocation.
 554 * This function does _not_ increment region.agg_size_full.
 555 */
 556static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
 557{
 558    return tcg_region_alloc__locked(s);
 559}
 560
 561/* Call from a safe-work context */
 562void tcg_region_reset_all(void)
 563{
 564    unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
 565    unsigned int i;
 566
 567    qemu_mutex_lock(&region.lock);
 568    region.current = 0;
 569    region.agg_size_full = 0;
 570
 571    for (i = 0; i < n_ctxs; i++) {
 572        TCGContext *s = atomic_read(&tcg_ctxs[i]);
 573        bool err = tcg_region_initial_alloc__locked(s);
 574
 575        g_assert(!err);
 576    }
 577    qemu_mutex_unlock(&region.lock);
 578
 579    tcg_region_tree_reset_all();
 580}
 581
 582#ifdef CONFIG_USER_ONLY
 583static size_t tcg_n_regions(void)
 584{
 585    return 1;
 586}
 587#else
 588/*
 589 * It is likely that some vCPUs will translate more code than others, so we
 590 * first try to set more regions than max_cpus, with those regions being of
 591 * reasonable size. If that's not possible we make do by evenly dividing
 592 * the code_gen_buffer among the vCPUs.
 593 */
 594static size_t tcg_n_regions(void)
 595{
 596    size_t i;
 597
 598    /* Use a single region if all we have is one vCPU thread */
 599    if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
 600        return 1;
 601    }
 602
 603    /* Try to have more regions than max_cpus, with each region being >= 2 MB */
 604    for (i = 8; i > 0; i--) {
 605        size_t regions_per_thread = i;
 606        size_t region_size;
 607
 608        region_size = tcg_init_ctx.code_gen_buffer_size;
 609        region_size /= max_cpus * regions_per_thread;
 610
 611        if (region_size >= 2 * 1024u * 1024) {
 612            return max_cpus * regions_per_thread;
 613        }
 614    }
 615    /* If we can't, then just allocate one region per vCPU thread */
 616    return max_cpus;
 617}
 618#endif
 619
 620/*
 621 * Initializes region partitioning.
 622 *
 623 * Called at init time from the parent thread (i.e. the one calling
 624 * tcg_context_init), after the target's TCG globals have been set.
 625 *
 626 * Region partitioning works by splitting code_gen_buffer into separate regions,
 627 * and then assigning regions to TCG threads so that the threads can translate
 628 * code in parallel without synchronization.
 629 *
 630 * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
 631 * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
 632 * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
 633 * must have been parsed before calling this function, since it calls
 634 * qemu_tcg_mttcg_enabled().
 635 *
 636 * In user-mode we use a single region.  Having multiple regions in user-mode
 637 * is not supported, because the number of vCPU threads (recall that each thread
 638 * spawned by the guest corresponds to a vCPU thread) is only bounded by the
 639 * OS, and usually this number is huge (tens of thousands is not uncommon).
 640 * Thus, given this large bound on the number of vCPU threads and the fact
 641 * that code_gen_buffer is allocated at compile-time, we cannot guarantee
 642 * that the availability of at least one region per vCPU thread.
 643 *
 644 * However, this user-mode limitation is unlikely to be a significant problem
 645 * in practice. Multi-threaded guests share most if not all of their translated
 646 * code, which makes parallel code generation less appealing than in softmmu.
 647 */
 648void tcg_region_init(void)
 649{
 650    void *buf = tcg_init_ctx.code_gen_buffer;
 651    void *aligned;
 652    size_t size = tcg_init_ctx.code_gen_buffer_size;
 653    size_t page_size = qemu_real_host_page_size;
 654    size_t region_size;
 655    size_t n_regions;
 656    size_t i;
 657
 658    n_regions = tcg_n_regions();
 659
 660    /* The first region will be 'aligned - buf' bytes larger than the others */
 661    aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
 662    g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
 663    /*
 664     * Make region_size a multiple of page_size, using aligned as the start.
 665     * As a result of this we might end up with a few extra pages at the end of
 666     * the buffer; we will assign those to the last region.
 667     */
 668    region_size = (size - (aligned - buf)) / n_regions;
 669    region_size = QEMU_ALIGN_DOWN(region_size, page_size);
 670
 671    /* A region must have at least 2 pages; one code, one guard */
 672    g_assert(region_size >= 2 * page_size);
 673
 674    /* init the region struct */
 675    qemu_mutex_init(&region.lock);
 676    region.n = n_regions;
 677    region.size = region_size - page_size;
 678    region.stride = region_size;
 679    region.start = buf;
 680    region.start_aligned = aligned;
 681    /* page-align the end, since its last page will be a guard page */
 682    region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
 683    /* account for that last guard page */
 684    region.end -= page_size;
 685
 686    /* set guard pages */
 687    for (i = 0; i < region.n; i++) {
 688        void *start, *end;
 689        int rc;
 690
 691        tcg_region_bounds(i, &start, &end);
 692        rc = qemu_mprotect_none(end, page_size);
 693        g_assert(!rc);
 694    }
 695
 696    tcg_region_trees_init();
 697
 698    /* In user-mode we support only one ctx, so do the initial allocation now */
 699#ifdef CONFIG_USER_ONLY
 700    {
 701        bool err = tcg_region_initial_alloc__locked(tcg_ctx);
 702
 703        g_assert(!err);
 704    }
 705#endif
 706}
 707
 708/*
 709 * All TCG threads except the parent (i.e. the one that called tcg_context_init
 710 * and registered the target's TCG globals) must register with this function
 711 * before initiating translation.
 712 *
 713 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
 714 * of tcg_region_init() for the reasoning behind this.
 715 *
 716 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
 717 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
 718 * is not used anymore for translation once this function is called.
 719 *
 720 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
 721 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
 722 */
 723#ifdef CONFIG_USER_ONLY
 724void tcg_register_thread(void)
 725{
 726    tcg_ctx = &tcg_init_ctx;
 727}
 728#else
 729void tcg_register_thread(void)
 730{
 731    TCGContext *s = g_malloc(sizeof(*s));
 732    unsigned int i, n;
 733    bool err;
 734
 735    *s = tcg_init_ctx;
 736
 737    /* Relink mem_base.  */
 738    for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
 739        if (tcg_init_ctx.temps[i].mem_base) {
 740            ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
 741            tcg_debug_assert(b >= 0 && b < n);
 742            s->temps[i].mem_base = &s->temps[b];
 743        }
 744    }
 745
 746    /* Claim an entry in tcg_ctxs */
 747    n = atomic_fetch_inc(&n_tcg_ctxs);
 748    g_assert(n < max_cpus);
 749    atomic_set(&tcg_ctxs[n], s);
 750
 751    tcg_ctx = s;
 752    qemu_mutex_lock(&region.lock);
 753    err = tcg_region_initial_alloc__locked(tcg_ctx);
 754    g_assert(!err);
 755    qemu_mutex_unlock(&region.lock);
 756}
 757#endif /* !CONFIG_USER_ONLY */
 758
 759/*
 760 * Returns the size (in bytes) of all translated code (i.e. from all regions)
 761 * currently in the cache.
 762 * See also: tcg_code_capacity()
 763 * Do not confuse with tcg_current_code_size(); that one applies to a single
 764 * TCG context.
 765 */
 766size_t tcg_code_size(void)
 767{
 768    unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
 769    unsigned int i;
 770    size_t total;
 771
 772    qemu_mutex_lock(&region.lock);
 773    total = region.agg_size_full;
 774    for (i = 0; i < n_ctxs; i++) {
 775        const TCGContext *s = atomic_read(&tcg_ctxs[i]);
 776        size_t size;
 777
 778        size = atomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
 779        g_assert(size <= s->code_gen_buffer_size);
 780        total += size;
 781    }
 782    qemu_mutex_unlock(&region.lock);
 783    return total;
 784}
 785
 786/*
 787 * Returns the code capacity (in bytes) of the entire cache, i.e. including all
 788 * regions.
 789 * See also: tcg_code_size()
 790 */
 791size_t tcg_code_capacity(void)
 792{
 793    size_t guard_size, capacity;
 794
 795    /* no need for synchronization; these variables are set at init time */
 796    guard_size = region.stride - region.size;
 797    capacity = region.end + guard_size - region.start;
 798    capacity -= region.n * (guard_size + TCG_HIGHWATER);
 799    return capacity;
 800}
 801
 802size_t tcg_tb_phys_invalidate_count(void)
 803{
 804    unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
 805    unsigned int i;
 806    size_t total = 0;
 807
 808    for (i = 0; i < n_ctxs; i++) {
 809        const TCGContext *s = atomic_read(&tcg_ctxs[i]);
 810
 811        total += atomic_read(&s->tb_phys_invalidate_count);
 812    }
 813    return total;
 814}
 815
 816/* pool based memory allocation */
 817void *tcg_malloc_internal(TCGContext *s, int size)
 818{
 819    TCGPool *p;
 820    int pool_size;
 821    
 822    if (size > TCG_POOL_CHUNK_SIZE) {
 823        /* big malloc: insert a new pool (XXX: could optimize) */
 824        p = g_malloc(sizeof(TCGPool) + size);
 825        p->size = size;
 826        p->next = s->pool_first_large;
 827        s->pool_first_large = p;
 828        return p->data;
 829    } else {
 830        p = s->pool_current;
 831        if (!p) {
 832            p = s->pool_first;
 833            if (!p)
 834                goto new_pool;
 835        } else {
 836            if (!p->next) {
 837            new_pool:
 838                pool_size = TCG_POOL_CHUNK_SIZE;
 839                p = g_malloc(sizeof(TCGPool) + pool_size);
 840                p->size = pool_size;
 841                p->next = NULL;
 842                if (s->pool_current) 
 843                    s->pool_current->next = p;
 844                else
 845                    s->pool_first = p;
 846            } else {
 847                p = p->next;
 848            }
 849        }
 850    }
 851    s->pool_current = p;
 852    s->pool_cur = p->data + size;
 853    s->pool_end = p->data + p->size;
 854    return p->data;
 855}
 856
 857void tcg_pool_reset(TCGContext *s)
 858{
 859    TCGPool *p, *t;
 860    for (p = s->pool_first_large; p; p = t) {
 861        t = p->next;
 862        g_free(p);
 863    }
 864    s->pool_first_large = NULL;
 865    s->pool_cur = s->pool_end = NULL;
 866    s->pool_current = NULL;
 867}
 868
 869typedef struct TCGHelperInfo {
 870    void *func;
 871    const char *name;
 872    unsigned flags;
 873    unsigned sizemask;
 874} TCGHelperInfo;
 875
 876#include "exec/helper-proto.h"
 877
 878static const TCGHelperInfo all_helpers[] = {
 879#include "exec/helper-tcg.h"
 880};
 881static GHashTable *helper_table;
 882
 883static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
 884static void process_op_defs(TCGContext *s);
 885static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
 886                                            TCGReg reg, const char *name);
 887
 888void tcg_context_init(TCGContext *s)
 889{
 890    int op, total_args, n, i;
 891    TCGOpDef *def;
 892    TCGArgConstraint *args_ct;
 893    int *sorted_args;
 894    TCGTemp *ts;
 895
 896    memset(s, 0, sizeof(*s));
 897    s->nb_globals = 0;
 898
 899    /* Count total number of arguments and allocate the corresponding
 900       space */
 901    total_args = 0;
 902    for(op = 0; op < NB_OPS; op++) {
 903        def = &tcg_op_defs[op];
 904        n = def->nb_iargs + def->nb_oargs;
 905        total_args += n;
 906    }
 907
 908    args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args);
 909    sorted_args = g_malloc(sizeof(int) * total_args);
 910
 911    for(op = 0; op < NB_OPS; op++) {
 912        def = &tcg_op_defs[op];
 913        def->args_ct = args_ct;
 914        def->sorted_args = sorted_args;
 915        n = def->nb_iargs + def->nb_oargs;
 916        sorted_args += n;
 917        args_ct += n;
 918    }
 919
 920    /* Register helpers.  */
 921    /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
 922    helper_table = g_hash_table_new(NULL, NULL);
 923
 924    for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
 925        g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
 926                            (gpointer)&all_helpers[i]);
 927    }
 928
 929    tcg_target_init(s);
 930    process_op_defs(s);
 931
 932    /* Reverse the order of the saved registers, assuming they're all at
 933       the start of tcg_target_reg_alloc_order.  */
 934    for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
 935        int r = tcg_target_reg_alloc_order[n];
 936        if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
 937            break;
 938        }
 939    }
 940    for (i = 0; i < n; ++i) {
 941        indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
 942    }
 943    for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
 944        indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
 945    }
 946
 947    tcg_ctx = s;
 948    /*
 949     * In user-mode we simply share the init context among threads, since we
 950     * use a single region. See the documentation tcg_region_init() for the
 951     * reasoning behind this.
 952     * In softmmu we will have at most max_cpus TCG threads.
 953     */
 954#ifdef CONFIG_USER_ONLY
 955    tcg_ctxs = &tcg_ctx;
 956    n_tcg_ctxs = 1;
 957#else
 958    tcg_ctxs = g_new(TCGContext *, max_cpus);
 959#endif
 960
 961    tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
 962    ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
 963    cpu_env = temp_tcgv_ptr(ts);
 964}
 965
 966/*
 967 * Allocate TBs right before their corresponding translated code, making
 968 * sure that TBs and code are on different cache lines.
 969 */
 970TranslationBlock *tcg_tb_alloc(TCGContext *s)
 971{
 972    uintptr_t align = qemu_icache_linesize;
 973    TranslationBlock *tb;
 974    void *next;
 975
 976 retry:
 977    tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
 978    next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
 979
 980    if (unlikely(next > s->code_gen_highwater)) {
 981        if (tcg_region_alloc(s)) {
 982            return NULL;
 983        }
 984        goto retry;
 985    }
 986    atomic_set(&s->code_gen_ptr, next);
 987    s->data_gen_ptr = NULL;
 988    return tb;
 989}
 990
 991void tcg_prologue_init(TCGContext *s)
 992{
 993    size_t prologue_size, total_size;
 994    void *buf0, *buf1;
 995
 996    /* Put the prologue at the beginning of code_gen_buffer.  */
 997    buf0 = s->code_gen_buffer;
 998    total_size = s->code_gen_buffer_size;
 999    s->code_ptr = buf0;
1000    s->code_buf = buf0;
1001    s->data_gen_ptr = NULL;
1002    s->code_gen_prologue = buf0;
1003
1004    /* Compute a high-water mark, at which we voluntarily flush the buffer
1005       and start over.  The size here is arbitrary, significantly larger
1006       than we expect the code generation for any one opcode to require.  */
1007    s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
1008
1009#ifdef TCG_TARGET_NEED_POOL_LABELS
1010    s->pool_labels = NULL;
1011#endif
1012
1013    /* Generate the prologue.  */
1014    tcg_target_qemu_prologue(s);
1015
1016#ifdef TCG_TARGET_NEED_POOL_LABELS
1017    /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1018    {
1019        bool ok = tcg_out_pool_finalize(s);
1020        tcg_debug_assert(ok);
1021    }
1022#endif
1023
1024    buf1 = s->code_ptr;
1025    flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1);
1026
1027    /* Deduct the prologue from the buffer.  */
1028    prologue_size = tcg_current_code_size(s);
1029    s->code_gen_ptr = buf1;
1030    s->code_gen_buffer = buf1;
1031    s->code_buf = buf1;
1032    total_size -= prologue_size;
1033    s->code_gen_buffer_size = total_size;
1034
1035    tcg_register_jit(s->code_gen_buffer, total_size);
1036
1037#ifdef DEBUG_DISAS
1038    if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1039        qemu_log_lock();
1040        qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
1041        if (s->data_gen_ptr) {
1042            size_t code_size = s->data_gen_ptr - buf0;
1043            size_t data_size = prologue_size - code_size;
1044            size_t i;
1045
1046            log_disas(buf0, code_size);
1047
1048            for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1049                if (sizeof(tcg_target_ulong) == 8) {
1050                    qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1051                             (uintptr_t)s->data_gen_ptr + i,
1052                             *(uint64_t *)(s->data_gen_ptr + i));
1053                } else {
1054                    qemu_log("0x%08" PRIxPTR ":  .long  0x%08x\n",
1055                             (uintptr_t)s->data_gen_ptr + i,
1056                             *(uint32_t *)(s->data_gen_ptr + i));
1057                }
1058            }
1059        } else {
1060            log_disas(buf0, prologue_size);
1061        }
1062        qemu_log("\n");
1063        qemu_log_flush();
1064        qemu_log_unlock();
1065    }
1066#endif
1067
1068    /* Assert that goto_ptr is implemented completely.  */
1069    if (TCG_TARGET_HAS_goto_ptr) {
1070        tcg_debug_assert(s->code_gen_epilogue != NULL);
1071    }
1072}
1073
1074void tcg_func_start(TCGContext *s)
1075{
1076    tcg_pool_reset(s);
1077    s->nb_temps = s->nb_globals;
1078
1079    /* No temps have been previously allocated for size or locality.  */
1080    memset(s->free_temps, 0, sizeof(s->free_temps));
1081
1082    s->nb_ops = 0;
1083    s->nb_labels = 0;
1084    s->current_frame_offset = s->frame_start;
1085
1086#ifdef CONFIG_DEBUG_TCG
1087    s->goto_tb_issue_mask = 0;
1088#endif
1089
1090    QTAILQ_INIT(&s->ops);
1091    QTAILQ_INIT(&s->free_ops);
1092}
1093
1094static inline TCGTemp *tcg_temp_alloc(TCGContext *s)
1095{
1096    int n = s->nb_temps++;
1097    tcg_debug_assert(n < TCG_MAX_TEMPS);
1098    return memset(&s->temps[n], 0, sizeof(TCGTemp));
1099}
1100
1101static inline TCGTemp *tcg_global_alloc(TCGContext *s)
1102{
1103    TCGTemp *ts;
1104
1105    tcg_debug_assert(s->nb_globals == s->nb_temps);
1106    s->nb_globals++;
1107    ts = tcg_temp_alloc(s);
1108    ts->temp_global = 1;
1109
1110    return ts;
1111}
1112
1113static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1114                                            TCGReg reg, const char *name)
1115{
1116    TCGTemp *ts;
1117
1118    if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1119        tcg_abort();
1120    }
1121
1122    ts = tcg_global_alloc(s);
1123    ts->base_type = type;
1124    ts->type = type;
1125    ts->fixed_reg = 1;
1126    ts->reg = reg;
1127    ts->name = name;
1128    tcg_regset_set_reg(s->reserved_regs, reg);
1129
1130    return ts;
1131}
1132
1133void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1134{
1135    s->frame_start = start;
1136    s->frame_end = start + size;
1137    s->frame_temp
1138        = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1139}
1140
1141TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1142                                     intptr_t offset, const char *name)
1143{
1144    TCGContext *s = tcg_ctx;
1145    TCGTemp *base_ts = tcgv_ptr_temp(base);
1146    TCGTemp *ts = tcg_global_alloc(s);
1147    int indirect_reg = 0, bigendian = 0;
1148#ifdef HOST_WORDS_BIGENDIAN
1149    bigendian = 1;
1150#endif
1151
1152    if (!base_ts->fixed_reg) {
1153        /* We do not support double-indirect registers.  */
1154        tcg_debug_assert(!base_ts->indirect_reg);
1155        base_ts->indirect_base = 1;
1156        s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1157                            ? 2 : 1);
1158        indirect_reg = 1;
1159    }
1160
1161    if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1162        TCGTemp *ts2 = tcg_global_alloc(s);
1163        char buf[64];
1164
1165        ts->base_type = TCG_TYPE_I64;
1166        ts->type = TCG_TYPE_I32;
1167        ts->indirect_reg = indirect_reg;
1168        ts->mem_allocated = 1;
1169        ts->mem_base = base_ts;
1170        ts->mem_offset = offset + bigendian * 4;
1171        pstrcpy(buf, sizeof(buf), name);
1172        pstrcat(buf, sizeof(buf), "_0");
1173        ts->name = strdup(buf);
1174
1175        tcg_debug_assert(ts2 == ts + 1);
1176        ts2->base_type = TCG_TYPE_I64;
1177        ts2->type = TCG_TYPE_I32;
1178        ts2->indirect_reg = indirect_reg;
1179        ts2->mem_allocated = 1;
1180        ts2->mem_base = base_ts;
1181        ts2->mem_offset = offset + (1 - bigendian) * 4;
1182        pstrcpy(buf, sizeof(buf), name);
1183        pstrcat(buf, sizeof(buf), "_1");
1184        ts2->name = strdup(buf);
1185    } else {
1186        ts->base_type = type;
1187        ts->type = type;
1188        ts->indirect_reg = indirect_reg;
1189        ts->mem_allocated = 1;
1190        ts->mem_base = base_ts;
1191        ts->mem_offset = offset;
1192        ts->name = name;
1193    }
1194    return ts;
1195}
1196
1197TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1198{
1199    TCGContext *s = tcg_ctx;
1200    TCGTemp *ts;
1201    int idx, k;
1202
1203    k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1204    idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1205    if (idx < TCG_MAX_TEMPS) {
1206        /* There is already an available temp with the right type.  */
1207        clear_bit(idx, s->free_temps[k].l);
1208
1209        ts = &s->temps[idx];
1210        ts->temp_allocated = 1;
1211        tcg_debug_assert(ts->base_type == type);
1212        tcg_debug_assert(ts->temp_local == temp_local);
1213    } else {
1214        ts = tcg_temp_alloc(s);
1215        if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1216            TCGTemp *ts2 = tcg_temp_alloc(s);
1217
1218            ts->base_type = type;
1219            ts->type = TCG_TYPE_I32;
1220            ts->temp_allocated = 1;
1221            ts->temp_local = temp_local;
1222
1223            tcg_debug_assert(ts2 == ts + 1);
1224            ts2->base_type = TCG_TYPE_I64;
1225            ts2->type = TCG_TYPE_I32;
1226            ts2->temp_allocated = 1;
1227            ts2->temp_local = temp_local;
1228        } else {
1229            ts->base_type = type;
1230            ts->type = type;
1231            ts->temp_allocated = 1;
1232            ts->temp_local = temp_local;
1233        }
1234    }
1235
1236#if defined(CONFIG_DEBUG_TCG)
1237    s->temps_in_use++;
1238#endif
1239    return ts;
1240}
1241
1242TCGv_vec tcg_temp_new_vec(TCGType type)
1243{
1244    TCGTemp *t;
1245
1246#ifdef CONFIG_DEBUG_TCG
1247    switch (type) {
1248    case TCG_TYPE_V64:
1249        assert(TCG_TARGET_HAS_v64);
1250        break;
1251    case TCG_TYPE_V128:
1252        assert(TCG_TARGET_HAS_v128);
1253        break;
1254    case TCG_TYPE_V256:
1255        assert(TCG_TARGET_HAS_v256);
1256        break;
1257    default:
1258        g_assert_not_reached();
1259    }
1260#endif
1261
1262    t = tcg_temp_new_internal(type, 0);
1263    return temp_tcgv_vec(t);
1264}
1265
1266/* Create a new temp of the same type as an existing temp.  */
1267TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1268{
1269    TCGTemp *t = tcgv_vec_temp(match);
1270
1271    tcg_debug_assert(t->temp_allocated != 0);
1272
1273    t = tcg_temp_new_internal(t->base_type, 0);
1274    return temp_tcgv_vec(t);
1275}
1276
1277void tcg_temp_free_internal(TCGTemp *ts)
1278{
1279    TCGContext *s = tcg_ctx;
1280    int k, idx;
1281
1282#if defined(CONFIG_DEBUG_TCG)
1283    s->temps_in_use--;
1284    if (s->temps_in_use < 0) {
1285        fprintf(stderr, "More temporaries freed than allocated!\n");
1286    }
1287#endif
1288
1289    tcg_debug_assert(ts->temp_global == 0);
1290    tcg_debug_assert(ts->temp_allocated != 0);
1291    ts->temp_allocated = 0;
1292
1293    idx = temp_idx(ts);
1294    k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0);
1295    set_bit(idx, s->free_temps[k].l);
1296}
1297
1298TCGv_i32 tcg_const_i32(int32_t val)
1299{
1300    TCGv_i32 t0;
1301    t0 = tcg_temp_new_i32();
1302    tcg_gen_movi_i32(t0, val);
1303    return t0;
1304}
1305
1306TCGv_i64 tcg_const_i64(int64_t val)
1307{
1308    TCGv_i64 t0;
1309    t0 = tcg_temp_new_i64();
1310    tcg_gen_movi_i64(t0, val);
1311    return t0;
1312}
1313
1314TCGv_i32 tcg_const_local_i32(int32_t val)
1315{
1316    TCGv_i32 t0;
1317    t0 = tcg_temp_local_new_i32();
1318    tcg_gen_movi_i32(t0, val);
1319    return t0;
1320}
1321
1322TCGv_i64 tcg_const_local_i64(int64_t val)
1323{
1324    TCGv_i64 t0;
1325    t0 = tcg_temp_local_new_i64();
1326    tcg_gen_movi_i64(t0, val);
1327    return t0;
1328}
1329
1330#if defined(CONFIG_DEBUG_TCG)
1331void tcg_clear_temp_count(void)
1332{
1333    TCGContext *s = tcg_ctx;
1334    s->temps_in_use = 0;
1335}
1336
1337int tcg_check_temp_count(void)
1338{
1339    TCGContext *s = tcg_ctx;
1340    if (s->temps_in_use) {
1341        /* Clear the count so that we don't give another
1342         * warning immediately next time around.
1343         */
1344        s->temps_in_use = 0;
1345        return 1;
1346    }
1347    return 0;
1348}
1349#endif
1350
1351/* Return true if OP may appear in the opcode stream.
1352   Test the runtime variable that controls each opcode.  */
1353bool tcg_op_supported(TCGOpcode op)
1354{
1355    const bool have_vec
1356        = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1357
1358    switch (op) {
1359    case INDEX_op_discard:
1360    case INDEX_op_set_label:
1361    case INDEX_op_call:
1362    case INDEX_op_br:
1363    case INDEX_op_mb:
1364    case INDEX_op_insn_start:
1365    case INDEX_op_exit_tb:
1366    case INDEX_op_goto_tb:
1367    case INDEX_op_qemu_ld_i32:
1368    case INDEX_op_qemu_st_i32:
1369    case INDEX_op_qemu_ld_i64:
1370    case INDEX_op_qemu_st_i64:
1371        return true;
1372
1373    case INDEX_op_goto_ptr:
1374        return TCG_TARGET_HAS_goto_ptr;
1375
1376    case INDEX_op_mov_i32:
1377    case INDEX_op_movi_i32:
1378    case INDEX_op_setcond_i32:
1379    case INDEX_op_brcond_i32:
1380    case INDEX_op_ld8u_i32:
1381    case INDEX_op_ld8s_i32:
1382    case INDEX_op_ld16u_i32:
1383    case INDEX_op_ld16s_i32:
1384    case INDEX_op_ld_i32:
1385    case INDEX_op_st8_i32:
1386    case INDEX_op_st16_i32:
1387    case INDEX_op_st_i32:
1388    case INDEX_op_add_i32:
1389    case INDEX_op_sub_i32:
1390    case INDEX_op_mul_i32:
1391    case INDEX_op_and_i32:
1392    case INDEX_op_or_i32:
1393    case INDEX_op_xor_i32:
1394    case INDEX_op_shl_i32:
1395    case INDEX_op_shr_i32:
1396    case INDEX_op_sar_i32:
1397        return true;
1398
1399    case INDEX_op_movcond_i32:
1400        return TCG_TARGET_HAS_movcond_i32;
1401    case INDEX_op_div_i32:
1402    case INDEX_op_divu_i32:
1403        return TCG_TARGET_HAS_div_i32;
1404    case INDEX_op_rem_i32:
1405    case INDEX_op_remu_i32:
1406        return TCG_TARGET_HAS_rem_i32;
1407    case INDEX_op_div2_i32:
1408    case INDEX_op_divu2_i32:
1409        return TCG_TARGET_HAS_div2_i32;
1410    case INDEX_op_rotl_i32:
1411    case INDEX_op_rotr_i32:
1412        return TCG_TARGET_HAS_rot_i32;
1413    case INDEX_op_deposit_i32:
1414        return TCG_TARGET_HAS_deposit_i32;
1415    case INDEX_op_extract_i32:
1416        return TCG_TARGET_HAS_extract_i32;
1417    case INDEX_op_sextract_i32:
1418        return TCG_TARGET_HAS_sextract_i32;
1419    case INDEX_op_add2_i32:
1420        return TCG_TARGET_HAS_add2_i32;
1421    case INDEX_op_sub2_i32:
1422        return TCG_TARGET_HAS_sub2_i32;
1423    case INDEX_op_mulu2_i32:
1424        return TCG_TARGET_HAS_mulu2_i32;
1425    case INDEX_op_muls2_i32:
1426        return TCG_TARGET_HAS_muls2_i32;
1427    case INDEX_op_muluh_i32:
1428        return TCG_TARGET_HAS_muluh_i32;
1429    case INDEX_op_mulsh_i32:
1430        return TCG_TARGET_HAS_mulsh_i32;
1431    case INDEX_op_ext8s_i32:
1432        return TCG_TARGET_HAS_ext8s_i32;
1433    case INDEX_op_ext16s_i32:
1434        return TCG_TARGET_HAS_ext16s_i32;
1435    case INDEX_op_ext8u_i32:
1436        return TCG_TARGET_HAS_ext8u_i32;
1437    case INDEX_op_ext16u_i32:
1438        return TCG_TARGET_HAS_ext16u_i32;
1439    case INDEX_op_bswap16_i32:
1440        return TCG_TARGET_HAS_bswap16_i32;
1441    case INDEX_op_bswap32_i32:
1442        return TCG_TARGET_HAS_bswap32_i32;
1443    case INDEX_op_not_i32:
1444        return TCG_TARGET_HAS_not_i32;
1445    case INDEX_op_neg_i32:
1446        return TCG_TARGET_HAS_neg_i32;
1447    case INDEX_op_andc_i32:
1448        return TCG_TARGET_HAS_andc_i32;
1449    case INDEX_op_orc_i32:
1450        return TCG_TARGET_HAS_orc_i32;
1451    case INDEX_op_eqv_i32:
1452        return TCG_TARGET_HAS_eqv_i32;
1453    case INDEX_op_nand_i32:
1454        return TCG_TARGET_HAS_nand_i32;
1455    case INDEX_op_nor_i32:
1456        return TCG_TARGET_HAS_nor_i32;
1457    case INDEX_op_clz_i32:
1458        return TCG_TARGET_HAS_clz_i32;
1459    case INDEX_op_ctz_i32:
1460        return TCG_TARGET_HAS_ctz_i32;
1461    case INDEX_op_ctpop_i32:
1462        return TCG_TARGET_HAS_ctpop_i32;
1463
1464    case INDEX_op_brcond2_i32:
1465    case INDEX_op_setcond2_i32:
1466        return TCG_TARGET_REG_BITS == 32;
1467
1468    case INDEX_op_mov_i64:
1469    case INDEX_op_movi_i64:
1470    case INDEX_op_setcond_i64:
1471    case INDEX_op_brcond_i64:
1472    case INDEX_op_ld8u_i64:
1473    case INDEX_op_ld8s_i64:
1474    case INDEX_op_ld16u_i64:
1475    case INDEX_op_ld16s_i64:
1476    case INDEX_op_ld32u_i64:
1477    case INDEX_op_ld32s_i64:
1478    case INDEX_op_ld_i64:
1479    case INDEX_op_st8_i64:
1480    case INDEX_op_st16_i64:
1481    case INDEX_op_st32_i64:
1482    case INDEX_op_st_i64:
1483    case INDEX_op_add_i64:
1484    case INDEX_op_sub_i64:
1485    case INDEX_op_mul_i64:
1486    case INDEX_op_and_i64:
1487    case INDEX_op_or_i64:
1488    case INDEX_op_xor_i64:
1489    case INDEX_op_shl_i64:
1490    case INDEX_op_shr_i64:
1491    case INDEX_op_sar_i64:
1492    case INDEX_op_ext_i32_i64:
1493    case INDEX_op_extu_i32_i64:
1494        return TCG_TARGET_REG_BITS == 64;
1495
1496    case INDEX_op_movcond_i64:
1497        return TCG_TARGET_HAS_movcond_i64;
1498    case INDEX_op_div_i64:
1499    case INDEX_op_divu_i64:
1500        return TCG_TARGET_HAS_div_i64;
1501    case INDEX_op_rem_i64:
1502    case INDEX_op_remu_i64:
1503        return TCG_TARGET_HAS_rem_i64;
1504    case INDEX_op_div2_i64:
1505    case INDEX_op_divu2_i64:
1506        return TCG_TARGET_HAS_div2_i64;
1507    case INDEX_op_rotl_i64:
1508    case INDEX_op_rotr_i64:
1509        return TCG_TARGET_HAS_rot_i64;
1510    case INDEX_op_deposit_i64:
1511        return TCG_TARGET_HAS_deposit_i64;
1512    case INDEX_op_extract_i64:
1513        return TCG_TARGET_HAS_extract_i64;
1514    case INDEX_op_sextract_i64:
1515        return TCG_TARGET_HAS_sextract_i64;
1516    case INDEX_op_extrl_i64_i32:
1517        return TCG_TARGET_HAS_extrl_i64_i32;
1518    case INDEX_op_extrh_i64_i32:
1519        return TCG_TARGET_HAS_extrh_i64_i32;
1520    case INDEX_op_ext8s_i64:
1521        return TCG_TARGET_HAS_ext8s_i64;
1522    case INDEX_op_ext16s_i64:
1523        return TCG_TARGET_HAS_ext16s_i64;
1524    case INDEX_op_ext32s_i64:
1525        return TCG_TARGET_HAS_ext32s_i64;
1526    case INDEX_op_ext8u_i64:
1527        return TCG_TARGET_HAS_ext8u_i64;
1528    case INDEX_op_ext16u_i64:
1529        return TCG_TARGET_HAS_ext16u_i64;
1530    case INDEX_op_ext32u_i64:
1531        return TCG_TARGET_HAS_ext32u_i64;
1532    case INDEX_op_bswap16_i64:
1533        return TCG_TARGET_HAS_bswap16_i64;
1534    case INDEX_op_bswap32_i64:
1535        return TCG_TARGET_HAS_bswap32_i64;
1536    case INDEX_op_bswap64_i64:
1537        return TCG_TARGET_HAS_bswap64_i64;
1538    case INDEX_op_not_i64:
1539        return TCG_TARGET_HAS_not_i64;
1540    case INDEX_op_neg_i64:
1541        return TCG_TARGET_HAS_neg_i64;
1542    case INDEX_op_andc_i64:
1543        return TCG_TARGET_HAS_andc_i64;
1544    case INDEX_op_orc_i64:
1545        return TCG_TARGET_HAS_orc_i64;
1546    case INDEX_op_eqv_i64:
1547        return TCG_TARGET_HAS_eqv_i64;
1548    case INDEX_op_nand_i64:
1549        return TCG_TARGET_HAS_nand_i64;
1550    case INDEX_op_nor_i64:
1551        return TCG_TARGET_HAS_nor_i64;
1552    case INDEX_op_clz_i64:
1553        return TCG_TARGET_HAS_clz_i64;
1554    case INDEX_op_ctz_i64:
1555        return TCG_TARGET_HAS_ctz_i64;
1556    case INDEX_op_ctpop_i64:
1557        return TCG_TARGET_HAS_ctpop_i64;
1558    case INDEX_op_add2_i64:
1559        return TCG_TARGET_HAS_add2_i64;
1560    case INDEX_op_sub2_i64:
1561        return TCG_TARGET_HAS_sub2_i64;
1562    case INDEX_op_mulu2_i64:
1563        return TCG_TARGET_HAS_mulu2_i64;
1564    case INDEX_op_muls2_i64:
1565        return TCG_TARGET_HAS_muls2_i64;
1566    case INDEX_op_muluh_i64:
1567        return TCG_TARGET_HAS_muluh_i64;
1568    case INDEX_op_mulsh_i64:
1569        return TCG_TARGET_HAS_mulsh_i64;
1570
1571    case INDEX_op_mov_vec:
1572    case INDEX_op_dup_vec:
1573    case INDEX_op_dupi_vec:
1574    case INDEX_op_ld_vec:
1575    case INDEX_op_st_vec:
1576    case INDEX_op_add_vec:
1577    case INDEX_op_sub_vec:
1578    case INDEX_op_and_vec:
1579    case INDEX_op_or_vec:
1580    case INDEX_op_xor_vec:
1581    case INDEX_op_cmp_vec:
1582        return have_vec;
1583    case INDEX_op_dup2_vec:
1584        return have_vec && TCG_TARGET_REG_BITS == 32;
1585    case INDEX_op_not_vec:
1586        return have_vec && TCG_TARGET_HAS_not_vec;
1587    case INDEX_op_neg_vec:
1588        return have_vec && TCG_TARGET_HAS_neg_vec;
1589    case INDEX_op_andc_vec:
1590        return have_vec && TCG_TARGET_HAS_andc_vec;
1591    case INDEX_op_orc_vec:
1592        return have_vec && TCG_TARGET_HAS_orc_vec;
1593    case INDEX_op_mul_vec:
1594        return have_vec && TCG_TARGET_HAS_mul_vec;
1595    case INDEX_op_shli_vec:
1596    case INDEX_op_shri_vec:
1597    case INDEX_op_sari_vec:
1598        return have_vec && TCG_TARGET_HAS_shi_vec;
1599    case INDEX_op_shls_vec:
1600    case INDEX_op_shrs_vec:
1601    case INDEX_op_sars_vec:
1602        return have_vec && TCG_TARGET_HAS_shs_vec;
1603    case INDEX_op_shlv_vec:
1604    case INDEX_op_shrv_vec:
1605    case INDEX_op_sarv_vec:
1606        return have_vec && TCG_TARGET_HAS_shv_vec;
1607
1608    default:
1609        tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1610        return true;
1611    }
1612}
1613
1614/* Note: we convert the 64 bit args to 32 bit and do some alignment
1615   and endian swap. Maybe it would be better to do the alignment
1616   and endian swap in tcg_reg_alloc_call(). */
1617void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1618{
1619    int i, real_args, nb_rets, pi;
1620    unsigned sizemask, flags;
1621    TCGHelperInfo *info;
1622    TCGOp *op;
1623
1624    info = g_hash_table_lookup(helper_table, (gpointer)func);
1625    flags = info->flags;
1626    sizemask = info->sizemask;
1627
1628#if defined(__sparc__) && !defined(__arch64__) \
1629    && !defined(CONFIG_TCG_INTERPRETER)
1630    /* We have 64-bit values in one register, but need to pass as two
1631       separate parameters.  Split them.  */
1632    int orig_sizemask = sizemask;
1633    int orig_nargs = nargs;
1634    TCGv_i64 retl, reth;
1635    TCGTemp *split_args[MAX_OPC_PARAM];
1636
1637    retl = NULL;
1638    reth = NULL;
1639    if (sizemask != 0) {
1640        for (i = real_args = 0; i < nargs; ++i) {
1641            int is_64bit = sizemask & (1 << (i+1)*2);
1642            if (is_64bit) {
1643                TCGv_i64 orig = temp_tcgv_i64(args[i]);
1644                TCGv_i32 h = tcg_temp_new_i32();
1645                TCGv_i32 l = tcg_temp_new_i32();
1646                tcg_gen_extr_i64_i32(l, h, orig);
1647                split_args[real_args++] = tcgv_i32_temp(h);
1648                split_args[real_args++] = tcgv_i32_temp(l);
1649            } else {
1650                split_args[real_args++] = args[i];
1651            }
1652        }
1653        nargs = real_args;
1654        args = split_args;
1655        sizemask = 0;
1656    }
1657#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1658    for (i = 0; i < nargs; ++i) {
1659        int is_64bit = sizemask & (1 << (i+1)*2);
1660        int is_signed = sizemask & (2 << (i+1)*2);
1661        if (!is_64bit) {
1662            TCGv_i64 temp = tcg_temp_new_i64();
1663            TCGv_i64 orig = temp_tcgv_i64(args[i]);
1664            if (is_signed) {
1665                tcg_gen_ext32s_i64(temp, orig);
1666            } else {
1667                tcg_gen_ext32u_i64(temp, orig);
1668            }
1669            args[i] = tcgv_i64_temp(temp);
1670        }
1671    }
1672#endif /* TCG_TARGET_EXTEND_ARGS */
1673
1674    op = tcg_emit_op(INDEX_op_call);
1675
1676    pi = 0;
1677    if (ret != NULL) {
1678#if defined(__sparc__) && !defined(__arch64__) \
1679    && !defined(CONFIG_TCG_INTERPRETER)
1680        if (orig_sizemask & 1) {
1681            /* The 32-bit ABI is going to return the 64-bit value in
1682               the %o0/%o1 register pair.  Prepare for this by using
1683               two return temporaries, and reassemble below.  */
1684            retl = tcg_temp_new_i64();
1685            reth = tcg_temp_new_i64();
1686            op->args[pi++] = tcgv_i64_arg(reth);
1687            op->args[pi++] = tcgv_i64_arg(retl);
1688            nb_rets = 2;
1689        } else {
1690            op->args[pi++] = temp_arg(ret);
1691            nb_rets = 1;
1692        }
1693#else
1694        if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
1695#ifdef HOST_WORDS_BIGENDIAN
1696            op->args[pi++] = temp_arg(ret + 1);
1697            op->args[pi++] = temp_arg(ret);
1698#else
1699            op->args[pi++] = temp_arg(ret);
1700            op->args[pi++] = temp_arg(ret + 1);
1701#endif
1702            nb_rets = 2;
1703        } else {
1704            op->args[pi++] = temp_arg(ret);
1705            nb_rets = 1;
1706        }
1707#endif
1708    } else {
1709        nb_rets = 0;
1710    }
1711    TCGOP_CALLO(op) = nb_rets;
1712
1713    real_args = 0;
1714    for (i = 0; i < nargs; i++) {
1715        int is_64bit = sizemask & (1 << (i+1)*2);
1716        if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1717#ifdef TCG_TARGET_CALL_ALIGN_ARGS
1718            /* some targets want aligned 64 bit args */
1719            if (real_args & 1) {
1720                op->args[pi++] = TCG_CALL_DUMMY_ARG;
1721                real_args++;
1722            }
1723#endif
1724           /* If stack grows up, then we will be placing successive
1725              arguments at lower addresses, which means we need to
1726              reverse the order compared to how we would normally
1727              treat either big or little-endian.  For those arguments
1728              that will wind up in registers, this still works for
1729              HPPA (the only current STACK_GROWSUP target) since the
1730              argument registers are *also* allocated in decreasing
1731              order.  If another such target is added, this logic may
1732              have to get more complicated to differentiate between
1733              stack arguments and register arguments.  */
1734#if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1735            op->args[pi++] = temp_arg(args[i] + 1);
1736            op->args[pi++] = temp_arg(args[i]);
1737#else
1738            op->args[pi++] = temp_arg(args[i]);
1739            op->args[pi++] = temp_arg(args[i] + 1);
1740#endif
1741            real_args += 2;
1742            continue;
1743        }
1744
1745        op->args[pi++] = temp_arg(args[i]);
1746        real_args++;
1747    }
1748    op->args[pi++] = (uintptr_t)func;
1749    op->args[pi++] = flags;
1750    TCGOP_CALLI(op) = real_args;
1751
1752    /* Make sure the fields didn't overflow.  */
1753    tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1754    tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1755
1756#if defined(__sparc__) && !defined(__arch64__) \
1757    && !defined(CONFIG_TCG_INTERPRETER)
1758    /* Free all of the parts we allocated above.  */
1759    for (i = real_args = 0; i < orig_nargs; ++i) {
1760        int is_64bit = orig_sizemask & (1 << (i+1)*2);
1761        if (is_64bit) {
1762            tcg_temp_free_internal(args[real_args++]);
1763            tcg_temp_free_internal(args[real_args++]);
1764        } else {
1765            real_args++;
1766        }
1767    }
1768    if (orig_sizemask & 1) {
1769        /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
1770           Note that describing these as TCGv_i64 eliminates an unnecessary
1771           zero-extension that tcg_gen_concat_i32_i64 would create.  */
1772        tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1773        tcg_temp_free_i64(retl);
1774        tcg_temp_free_i64(reth);
1775    }
1776#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1777    for (i = 0; i < nargs; ++i) {
1778        int is_64bit = sizemask & (1 << (i+1)*2);
1779        if (!is_64bit) {
1780            tcg_temp_free_internal(args[i]);
1781        }
1782    }
1783#endif /* TCG_TARGET_EXTEND_ARGS */
1784}
1785
1786static void tcg_reg_alloc_start(TCGContext *s)
1787{
1788    int i, n;
1789    TCGTemp *ts;
1790
1791    for (i = 0, n = s->nb_globals; i < n; i++) {
1792        ts = &s->temps[i];
1793        ts->val_type = (ts->fixed_reg ? TEMP_VAL_REG : TEMP_VAL_MEM);
1794    }
1795    for (n = s->nb_temps; i < n; i++) {
1796        ts = &s->temps[i];
1797        ts->val_type = (ts->temp_local ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
1798        ts->mem_allocated = 0;
1799        ts->fixed_reg = 0;
1800    }
1801
1802    memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1803}
1804
1805static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1806                                 TCGTemp *ts)
1807{
1808    int idx = temp_idx(ts);
1809
1810    if (ts->temp_global) {
1811        pstrcpy(buf, buf_size, ts->name);
1812    } else if (ts->temp_local) {
1813        snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1814    } else {
1815        snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1816    }
1817    return buf;
1818}
1819
1820static char *tcg_get_arg_str(TCGContext *s, char *buf,
1821                             int buf_size, TCGArg arg)
1822{
1823    return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1824}
1825
1826/* Find helper name.  */
1827static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
1828{
1829    const char *ret = NULL;
1830    if (helper_table) {
1831        TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
1832        if (info) {
1833            ret = info->name;
1834        }
1835    }
1836    return ret;
1837}
1838
1839static const char * const cond_name[] =
1840{
1841    [TCG_COND_NEVER] = "never",
1842    [TCG_COND_ALWAYS] = "always",
1843    [TCG_COND_EQ] = "eq",
1844    [TCG_COND_NE] = "ne",
1845    [TCG_COND_LT] = "lt",
1846    [TCG_COND_GE] = "ge",
1847    [TCG_COND_LE] = "le",
1848    [TCG_COND_GT] = "gt",
1849    [TCG_COND_LTU] = "ltu",
1850    [TCG_COND_GEU] = "geu",
1851    [TCG_COND_LEU] = "leu",
1852    [TCG_COND_GTU] = "gtu"
1853};
1854
1855static const char * const ldst_name[] =
1856{
1857    [MO_UB]   = "ub",
1858    [MO_SB]   = "sb",
1859    [MO_LEUW] = "leuw",
1860    [MO_LESW] = "lesw",
1861    [MO_LEUL] = "leul",
1862    [MO_LESL] = "lesl",
1863    [MO_LEQ]  = "leq",
1864    [MO_BEUW] = "beuw",
1865    [MO_BESW] = "besw",
1866    [MO_BEUL] = "beul",
1867    [MO_BESL] = "besl",
1868    [MO_BEQ]  = "beq",
1869};
1870
1871static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1872#ifdef ALIGNED_ONLY
1873    [MO_UNALN >> MO_ASHIFT]    = "un+",
1874    [MO_ALIGN >> MO_ASHIFT]    = "",
1875#else
1876    [MO_UNALN >> MO_ASHIFT]    = "",
1877    [MO_ALIGN >> MO_ASHIFT]    = "al+",
1878#endif
1879    [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
1880    [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
1881    [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
1882    [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1883    [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1884    [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1885};
1886
1887void tcg_dump_ops(TCGContext *s)
1888{
1889    char buf[128];
1890    TCGOp *op;
1891
1892    QTAILQ_FOREACH(op, &s->ops, link) {
1893        int i, k, nb_oargs, nb_iargs, nb_cargs;
1894        const TCGOpDef *def;
1895        TCGOpcode c;
1896        int col = 0;
1897
1898        c = op->opc;
1899        def = &tcg_op_defs[c];
1900
1901        if (c == INDEX_op_insn_start) {
1902            col += qemu_log("\n ----");
1903
1904            for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
1905                target_ulong a;
1906#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1907                a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
1908#else
1909                a = op->args[i];
1910#endif
1911                col += qemu_log(" " TARGET_FMT_lx, a);
1912            }
1913        } else if (c == INDEX_op_call) {
1914            /* variable number of arguments */
1915            nb_oargs = TCGOP_CALLO(op);
1916            nb_iargs = TCGOP_CALLI(op);
1917            nb_cargs = def->nb_cargs;
1918
1919            /* function name, flags, out args */
1920            col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
1921                            tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
1922                            op->args[nb_oargs + nb_iargs + 1], nb_oargs);
1923            for (i = 0; i < nb_oargs; i++) {
1924                col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
1925                                                       op->args[i]));
1926            }
1927            for (i = 0; i < nb_iargs; i++) {
1928                TCGArg arg = op->args[nb_oargs + i];
1929                const char *t = "<dummy>";
1930                if (arg != TCG_CALL_DUMMY_ARG) {
1931                    t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
1932                }
1933                col += qemu_log(",%s", t);
1934            }
1935        } else {
1936            col += qemu_log(" %s ", def->name);
1937
1938            nb_oargs = def->nb_oargs;
1939            nb_iargs = def->nb_iargs;
1940            nb_cargs = def->nb_cargs;
1941
1942            if (def->flags & TCG_OPF_VECTOR) {
1943                col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
1944                                8 << TCGOP_VECE(op));
1945            }
1946
1947            k = 0;
1948            for (i = 0; i < nb_oargs; i++) {
1949                if (k != 0) {
1950                    col += qemu_log(",");
1951                }
1952                col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1953                                                      op->args[k++]));
1954            }
1955            for (i = 0; i < nb_iargs; i++) {
1956                if (k != 0) {
1957                    col += qemu_log(",");
1958                }
1959                col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1960                                                      op->args[k++]));
1961            }
1962            switch (c) {
1963            case INDEX_op_brcond_i32:
1964            case INDEX_op_setcond_i32:
1965            case INDEX_op_movcond_i32:
1966            case INDEX_op_brcond2_i32:
1967            case INDEX_op_setcond2_i32:
1968            case INDEX_op_brcond_i64:
1969            case INDEX_op_setcond_i64:
1970            case INDEX_op_movcond_i64:
1971            case INDEX_op_cmp_vec:
1972                if (op->args[k] < ARRAY_SIZE(cond_name)
1973                    && cond_name[op->args[k]]) {
1974                    col += qemu_log(",%s", cond_name[op->args[k++]]);
1975                } else {
1976                    col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
1977                }
1978                i = 1;
1979                break;
1980            case INDEX_op_qemu_ld_i32:
1981            case INDEX_op_qemu_st_i32:
1982            case INDEX_op_qemu_ld_i64:
1983            case INDEX_op_qemu_st_i64:
1984                {
1985                    TCGMemOpIdx oi = op->args[k++];
1986                    TCGMemOp op = get_memop(oi);
1987                    unsigned ix = get_mmuidx(oi);
1988
1989                    if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
1990                        col += qemu_log(",$0x%x,%u", op, ix);
1991                    } else {
1992                        const char *s_al, *s_op;
1993                        s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
1994                        s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
1995                        col += qemu_log(",%s%s,%u", s_al, s_op, ix);
1996                    }
1997                    i = 1;
1998                }
1999                break;
2000            default:
2001                i = 0;
2002                break;
2003            }
2004            switch (c) {
2005            case INDEX_op_set_label:
2006            case INDEX_op_br:
2007            case INDEX_op_brcond_i32:
2008            case INDEX_op_brcond_i64:
2009            case INDEX_op_brcond2_i32:
2010                col += qemu_log("%s$L%d", k ? "," : "",
2011                                arg_label(op->args[k])->id);
2012                i++, k++;
2013                break;
2014            default:
2015                break;
2016            }
2017            for (; i < nb_cargs; i++, k++) {
2018                col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
2019            }
2020        }
2021        if (op->life) {
2022            unsigned life = op->life;
2023
2024            for (; col < 48; ++col) {
2025                putc(' ', qemu_logfile);
2026            }
2027
2028            if (life & (SYNC_ARG * 3)) {
2029                qemu_log("  sync:");
2030                for (i = 0; i < 2; ++i) {
2031                    if (life & (SYNC_ARG << i)) {
2032                        qemu_log(" %d", i);
2033                    }
2034                }
2035            }
2036            life /= DEAD_ARG;
2037            if (life) {
2038                qemu_log("  dead:");
2039                for (i = 0; life; ++i, life >>= 1) {
2040                    if (life & 1) {
2041                        qemu_log(" %d", i);
2042                    }
2043                }
2044            }
2045        }
2046        qemu_log("\n");
2047    }
2048}
2049
2050/* we give more priority to constraints with less registers */
2051static int get_constraint_priority(const TCGOpDef *def, int k)
2052{
2053    const TCGArgConstraint *arg_ct;
2054
2055    int i, n;
2056    arg_ct = &def->args_ct[k];
2057    if (arg_ct->ct & TCG_CT_ALIAS) {
2058        /* an alias is equivalent to a single register */
2059        n = 1;
2060    } else {
2061        if (!(arg_ct->ct & TCG_CT_REG))
2062            return 0;
2063        n = 0;
2064        for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
2065            if (tcg_regset_test_reg(arg_ct->u.regs, i))
2066                n++;
2067        }
2068    }
2069    return TCG_TARGET_NB_REGS - n + 1;
2070}
2071
2072/* sort from highest priority to lowest */
2073static void sort_constraints(TCGOpDef *def, int start, int n)
2074{
2075    int i, j, p1, p2, tmp;
2076
2077    for(i = 0; i < n; i++)
2078        def->sorted_args[start + i] = start + i;
2079    if (n <= 1)
2080        return;
2081    for(i = 0; i < n - 1; i++) {
2082        for(j = i + 1; j < n; j++) {
2083            p1 = get_constraint_priority(def, def->sorted_args[start + i]);
2084            p2 = get_constraint_priority(def, def->sorted_args[start + j]);
2085            if (p1 < p2) {
2086                tmp = def->sorted_args[start + i];
2087                def->sorted_args[start + i] = def->sorted_args[start + j];
2088                def->sorted_args[start + j] = tmp;
2089            }
2090        }
2091    }
2092}
2093
2094static void process_op_defs(TCGContext *s)
2095{
2096    TCGOpcode op;
2097
2098    for (op = 0; op < NB_OPS; op++) {
2099        TCGOpDef *def = &tcg_op_defs[op];
2100        const TCGTargetOpDef *tdefs;
2101        TCGType type;
2102        int i, nb_args;
2103
2104        if (def->flags & TCG_OPF_NOT_PRESENT) {
2105            continue;
2106        }
2107
2108        nb_args = def->nb_iargs + def->nb_oargs;
2109        if (nb_args == 0) {
2110            continue;
2111        }
2112
2113        tdefs = tcg_target_op_def(op);
2114        /* Missing TCGTargetOpDef entry. */
2115        tcg_debug_assert(tdefs != NULL);
2116
2117        type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32);
2118        for (i = 0; i < nb_args; i++) {
2119            const char *ct_str = tdefs->args_ct_str[i];
2120            /* Incomplete TCGTargetOpDef entry. */
2121            tcg_debug_assert(ct_str != NULL);
2122
2123            def->args_ct[i].u.regs = 0;
2124            def->args_ct[i].ct = 0;
2125            while (*ct_str != '\0') {
2126                switch(*ct_str) {
2127                case '0' ... '9':
2128                    {
2129                        int oarg = *ct_str - '0';
2130                        tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2131                        tcg_debug_assert(oarg < def->nb_oargs);
2132                        tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG);
2133                        /* TCG_CT_ALIAS is for the output arguments.
2134                           The input is tagged with TCG_CT_IALIAS. */
2135                        def->args_ct[i] = def->args_ct[oarg];
2136                        def->args_ct[oarg].ct |= TCG_CT_ALIAS;
2137                        def->args_ct[oarg].alias_index = i;
2138                        def->args_ct[i].ct |= TCG_CT_IALIAS;
2139                        def->args_ct[i].alias_index = oarg;
2140                    }
2141                    ct_str++;
2142                    break;
2143                case '&':
2144                    def->args_ct[i].ct |= TCG_CT_NEWREG;
2145                    ct_str++;
2146                    break;
2147                case 'i':
2148                    def->args_ct[i].ct |= TCG_CT_CONST;
2149                    ct_str++;
2150                    break;
2151                default:
2152                    ct_str = target_parse_constraint(&def->args_ct[i],
2153                                                     ct_str, type);
2154                    /* Typo in TCGTargetOpDef constraint. */
2155                    tcg_debug_assert(ct_str != NULL);
2156                }
2157            }
2158        }
2159
2160        /* TCGTargetOpDef entry with too much information? */
2161        tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2162
2163        /* sort the constraints (XXX: this is just an heuristic) */
2164        sort_constraints(def, 0, def->nb_oargs);
2165        sort_constraints(def, def->nb_oargs, def->nb_iargs);
2166    }
2167}
2168
2169void tcg_op_remove(TCGContext *s, TCGOp *op)
2170{
2171    QTAILQ_REMOVE(&s->ops, op, link);
2172    QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2173    s->nb_ops--;
2174
2175#ifdef CONFIG_PROFILER
2176    atomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2177#endif
2178}
2179
2180static TCGOp *tcg_op_alloc(TCGOpcode opc)
2181{
2182    TCGContext *s = tcg_ctx;
2183    TCGOp *op;
2184
2185    if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2186        op = tcg_malloc(sizeof(TCGOp));
2187    } else {
2188        op = QTAILQ_FIRST(&s->free_ops);
2189        QTAILQ_REMOVE(&s->free_ops, op, link);
2190    }
2191    memset(op, 0, offsetof(TCGOp, link));
2192    op->opc = opc;
2193    s->nb_ops++;
2194
2195    return op;
2196}
2197
2198TCGOp *tcg_emit_op(TCGOpcode opc)
2199{
2200    TCGOp *op = tcg_op_alloc(opc);
2201    QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2202    return op;
2203}
2204
2205TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
2206                            TCGOpcode opc, int nargs)
2207{
2208    TCGOp *new_op = tcg_op_alloc(opc);
2209    QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2210    return new_op;
2211}
2212
2213TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
2214                           TCGOpcode opc, int nargs)
2215{
2216    TCGOp *new_op = tcg_op_alloc(opc);
2217    QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2218    return new_op;
2219}
2220
2221#define TS_DEAD  1
2222#define TS_MEM   2
2223
2224#define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2225#define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2226
2227/* liveness analysis: end of function: all temps are dead, and globals
2228   should be in memory. */
2229static void tcg_la_func_end(TCGContext *s)
2230{
2231    int ng = s->nb_globals;
2232    int nt = s->nb_temps;
2233    int i;
2234
2235    for (i = 0; i < ng; ++i) {
2236        s->temps[i].state = TS_DEAD | TS_MEM;
2237    }
2238    for (i = ng; i < nt; ++i) {
2239        s->temps[i].state = TS_DEAD;
2240    }
2241}
2242
2243/* liveness analysis: end of basic block: all temps are dead, globals
2244   and local temps should be in memory. */
2245static void tcg_la_bb_end(TCGContext *s)
2246{
2247    int ng = s->nb_globals;
2248    int nt = s->nb_temps;
2249    int i;
2250
2251    for (i = 0; i < ng; ++i) {
2252        s->temps[i].state = TS_DEAD | TS_MEM;
2253    }
2254    for (i = ng; i < nt; ++i) {
2255        s->temps[i].state = (s->temps[i].temp_local
2256                             ? TS_DEAD | TS_MEM
2257                             : TS_DEAD);
2258    }
2259}
2260
2261/* Liveness analysis : update the opc_arg_life array to tell if a
2262   given input arguments is dead. Instructions updating dead
2263   temporaries are removed. */
2264static void liveness_pass_1(TCGContext *s)
2265{
2266    int nb_globals = s->nb_globals;
2267    TCGOp *op, *op_prev;
2268
2269    tcg_la_func_end(s);
2270
2271    QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, TCGOpHead, link, op_prev) {
2272        int i, nb_iargs, nb_oargs;
2273        TCGOpcode opc_new, opc_new2;
2274        bool have_opc_new2;
2275        TCGLifeData arg_life = 0;
2276        TCGTemp *arg_ts;
2277        TCGOpcode opc = op->opc;
2278        const TCGOpDef *def = &tcg_op_defs[opc];
2279
2280        switch (opc) {
2281        case INDEX_op_call:
2282            {
2283                int call_flags;
2284
2285                nb_oargs = TCGOP_CALLO(op);
2286                nb_iargs = TCGOP_CALLI(op);
2287                call_flags = op->args[nb_oargs + nb_iargs + 1];
2288
2289                /* pure functions can be removed if their result is unused */
2290                if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2291                    for (i = 0; i < nb_oargs; i++) {
2292                        arg_ts = arg_temp(op->args[i]);
2293                        if (arg_ts->state != TS_DEAD) {
2294                            goto do_not_remove_call;
2295                        }
2296                    }
2297                    goto do_remove;
2298                } else {
2299                do_not_remove_call:
2300
2301                    /* output args are dead */
2302                    for (i = 0; i < nb_oargs; i++) {
2303                        arg_ts = arg_temp(op->args[i]);
2304                        if (arg_ts->state & TS_DEAD) {
2305                            arg_life |= DEAD_ARG << i;
2306                        }
2307                        if (arg_ts->state & TS_MEM) {
2308                            arg_life |= SYNC_ARG << i;
2309                        }
2310                        arg_ts->state = TS_DEAD;
2311                    }
2312
2313                    if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2314                                        TCG_CALL_NO_READ_GLOBALS))) {
2315                        /* globals should go back to memory */
2316                        for (i = 0; i < nb_globals; i++) {
2317                            s->temps[i].state = TS_DEAD | TS_MEM;
2318                        }
2319                    } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2320                        /* globals should be synced to memory */
2321                        for (i = 0; i < nb_globals; i++) {
2322                            s->temps[i].state |= TS_MEM;
2323                        }
2324                    }
2325
2326                    /* record arguments that die in this helper */
2327                    for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2328                        arg_ts = arg_temp(op->args[i]);
2329                        if (arg_ts && arg_ts->state & TS_DEAD) {
2330                            arg_life |= DEAD_ARG << i;
2331                        }
2332                    }
2333                    /* input arguments are live for preceding opcodes */
2334                    for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2335                        arg_ts = arg_temp(op->args[i]);
2336                        if (arg_ts) {
2337                            arg_ts->state &= ~TS_DEAD;
2338                        }
2339                    }
2340                }
2341            }
2342            break;
2343        case INDEX_op_insn_start:
2344            break;
2345        case INDEX_op_discard:
2346            /* mark the temporary as dead */
2347            arg_temp(op->args[0])->state = TS_DEAD;
2348            break;
2349
2350        case INDEX_op_add2_i32:
2351            opc_new = INDEX_op_add_i32;
2352            goto do_addsub2;
2353        case INDEX_op_sub2_i32:
2354            opc_new = INDEX_op_sub_i32;
2355            goto do_addsub2;
2356        case INDEX_op_add2_i64:
2357            opc_new = INDEX_op_add_i64;
2358            goto do_addsub2;
2359        case INDEX_op_sub2_i64:
2360            opc_new = INDEX_op_sub_i64;
2361        do_addsub2:
2362            nb_iargs = 4;
2363            nb_oargs = 2;
2364            /* Test if the high part of the operation is dead, but not
2365               the low part.  The result can be optimized to a simple
2366               add or sub.  This happens often for x86_64 guest when the
2367               cpu mode is set to 32 bit.  */
2368            if (arg_temp(op->args[1])->state == TS_DEAD) {
2369                if (arg_temp(op->args[0])->state == TS_DEAD) {
2370                    goto do_remove;
2371                }
2372                /* Replace the opcode and adjust the args in place,
2373                   leaving 3 unused args at the end.  */
2374                op->opc = opc = opc_new;
2375                op->args[1] = op->args[2];
2376                op->args[2] = op->args[4];
2377                /* Fall through and mark the single-word operation live.  */
2378                nb_iargs = 2;
2379                nb_oargs = 1;
2380            }
2381            goto do_not_remove;
2382
2383        case INDEX_op_mulu2_i32:
2384            opc_new = INDEX_op_mul_i32;
2385            opc_new2 = INDEX_op_muluh_i32;
2386            have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2387            goto do_mul2;
2388        case INDEX_op_muls2_i32:
2389            opc_new = INDEX_op_mul_i32;
2390            opc_new2 = INDEX_op_mulsh_i32;
2391            have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2392            goto do_mul2;
2393        case INDEX_op_mulu2_i64:
2394            opc_new = INDEX_op_mul_i64;
2395            opc_new2 = INDEX_op_muluh_i64;
2396            have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2397            goto do_mul2;
2398        case INDEX_op_muls2_i64:
2399            opc_new = INDEX_op_mul_i64;
2400            opc_new2 = INDEX_op_mulsh_i64;
2401            have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2402            goto do_mul2;
2403        do_mul2:
2404            nb_iargs = 2;
2405            nb_oargs = 2;
2406            if (arg_temp(op->args[1])->state == TS_DEAD) {
2407                if (arg_temp(op->args[0])->state == TS_DEAD) {
2408                    /* Both parts of the operation are dead.  */
2409                    goto do_remove;
2410                }
2411                /* The high part of the operation is dead; generate the low. */
2412                op->opc = opc = opc_new;
2413                op->args[1] = op->args[2];
2414                op->args[2] = op->args[3];
2415            } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2416                /* The low part of the operation is dead; generate the high. */
2417                op->opc = opc = opc_new2;
2418                op->args[0] = op->args[1];
2419                op->args[1] = op->args[2];
2420                op->args[2] = op->args[3];
2421            } else {
2422                goto do_not_remove;
2423            }
2424            /* Mark the single-word operation live.  */
2425            nb_oargs = 1;
2426            goto do_not_remove;
2427
2428        default:
2429            /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2430            nb_iargs = def->nb_iargs;
2431            nb_oargs = def->nb_oargs;
2432
2433            /* Test if the operation can be removed because all
2434               its outputs are dead. We assume that nb_oargs == 0
2435               implies side effects */
2436            if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2437                for (i = 0; i < nb_oargs; i++) {
2438                    if (arg_temp(op->args[i])->state != TS_DEAD) {
2439                        goto do_not_remove;
2440                    }
2441                }
2442            do_remove:
2443                tcg_op_remove(s, op);
2444            } else {
2445            do_not_remove:
2446                /* output args are dead */
2447                for (i = 0; i < nb_oargs; i++) {
2448                    arg_ts = arg_temp(op->args[i]);
2449                    if (arg_ts->state & TS_DEAD) {
2450                        arg_life |= DEAD_ARG << i;
2451                    }
2452                    if (arg_ts->state & TS_MEM) {
2453                        arg_life |= SYNC_ARG << i;
2454                    }
2455                    arg_ts->state = TS_DEAD;
2456                }
2457
2458                /* if end of basic block, update */
2459                if (def->flags & TCG_OPF_BB_END) {
2460                    tcg_la_bb_end(s);
2461                } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2462                    /* globals should be synced to memory */
2463                    for (i = 0; i < nb_globals; i++) {
2464                        s->temps[i].state |= TS_MEM;
2465                    }
2466                }
2467
2468                /* record arguments that die in this opcode */
2469                for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2470                    arg_ts = arg_temp(op->args[i]);
2471                    if (arg_ts->state & TS_DEAD) {
2472                        arg_life |= DEAD_ARG << i;
2473                    }
2474                }
2475                /* input arguments are live for preceding opcodes */
2476                for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2477                    arg_temp(op->args[i])->state &= ~TS_DEAD;
2478                }
2479            }
2480            break;
2481        }
2482        op->life = arg_life;
2483    }
2484}
2485
2486/* Liveness analysis: Convert indirect regs to direct temporaries.  */
2487static bool liveness_pass_2(TCGContext *s)
2488{
2489    int nb_globals = s->nb_globals;
2490    int nb_temps, i;
2491    bool changes = false;
2492    TCGOp *op, *op_next;
2493
2494    /* Create a temporary for each indirect global.  */
2495    for (i = 0; i < nb_globals; ++i) {
2496        TCGTemp *its = &s->temps[i];
2497        if (its->indirect_reg) {
2498            TCGTemp *dts = tcg_temp_alloc(s);
2499            dts->type = its->type;
2500            dts->base_type = its->base_type;
2501            its->state_ptr = dts;
2502        } else {
2503            its->state_ptr = NULL;
2504        }
2505        /* All globals begin dead.  */
2506        its->state = TS_DEAD;
2507    }
2508    for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2509        TCGTemp *its = &s->temps[i];
2510        its->state_ptr = NULL;
2511        its->state = TS_DEAD;
2512    }
2513
2514    QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2515        TCGOpcode opc = op->opc;
2516        const TCGOpDef *def = &tcg_op_defs[opc];
2517        TCGLifeData arg_life = op->life;
2518        int nb_iargs, nb_oargs, call_flags;
2519        TCGTemp *arg_ts, *dir_ts;
2520
2521        if (opc == INDEX_op_call) {
2522            nb_oargs = TCGOP_CALLO(op);
2523            nb_iargs = TCGOP_CALLI(op);
2524            call_flags = op->args[nb_oargs + nb_iargs + 1];
2525        } else {
2526            nb_iargs = def->nb_iargs;
2527            nb_oargs = def->nb_oargs;
2528
2529            /* Set flags similar to how calls require.  */
2530            if (def->flags & TCG_OPF_BB_END) {
2531                /* Like writing globals: save_globals */
2532                call_flags = 0;
2533            } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2534                /* Like reading globals: sync_globals */
2535                call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2536            } else {
2537                /* No effect on globals.  */
2538                call_flags = (TCG_CALL_NO_READ_GLOBALS |
2539                              TCG_CALL_NO_WRITE_GLOBALS);
2540            }
2541        }
2542
2543        /* Make sure that input arguments are available.  */
2544        for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2545            arg_ts = arg_temp(op->args[i]);
2546            if (arg_ts) {
2547                dir_ts = arg_ts->state_ptr;
2548                if (dir_ts && arg_ts->state == TS_DEAD) {
2549                    TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2550                                      ? INDEX_op_ld_i32
2551                                      : INDEX_op_ld_i64);
2552                    TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
2553
2554                    lop->args[0] = temp_arg(dir_ts);
2555                    lop->args[1] = temp_arg(arg_ts->mem_base);
2556                    lop->args[2] = arg_ts->mem_offset;
2557
2558                    /* Loaded, but synced with memory.  */
2559                    arg_ts->state = TS_MEM;
2560                }
2561            }
2562        }
2563
2564        /* Perform input replacement, and mark inputs that became dead.
2565           No action is required except keeping temp_state up to date
2566           so that we reload when needed.  */
2567        for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2568            arg_ts = arg_temp(op->args[i]);
2569            if (arg_ts) {
2570                dir_ts = arg_ts->state_ptr;
2571                if (dir_ts) {
2572                    op->args[i] = temp_arg(dir_ts);
2573                    changes = true;
2574                    if (IS_DEAD_ARG(i)) {
2575                        arg_ts->state = TS_DEAD;
2576                    }
2577                }
2578            }
2579        }
2580
2581        /* Liveness analysis should ensure that the following are
2582           all correct, for call sites and basic block end points.  */
2583        if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2584            /* Nothing to do */
2585        } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2586            for (i = 0; i < nb_globals; ++i) {
2587                /* Liveness should see that globals are synced back,
2588                   that is, either TS_DEAD or TS_MEM.  */
2589                arg_ts = &s->temps[i];
2590                tcg_debug_assert(arg_ts->state_ptr == 0
2591                                 || arg_ts->state != 0);
2592            }
2593        } else {
2594            for (i = 0; i < nb_globals; ++i) {
2595                /* Liveness should see that globals are saved back,
2596                   that is, TS_DEAD, waiting to be reloaded.  */
2597                arg_ts = &s->temps[i];
2598                tcg_debug_assert(arg_ts->state_ptr == 0
2599                                 || arg_ts->state == TS_DEAD);
2600            }
2601        }
2602
2603        /* Outputs become available.  */
2604        for (i = 0; i < nb_oargs; i++) {
2605            arg_ts = arg_temp(op->args[i]);
2606            dir_ts = arg_ts->state_ptr;
2607            if (!dir_ts) {
2608                continue;
2609            }
2610            op->args[i] = temp_arg(dir_ts);
2611            changes = true;
2612
2613            /* The output is now live and modified.  */
2614            arg_ts->state = 0;
2615
2616            /* Sync outputs upon their last write.  */
2617            if (NEED_SYNC_ARG(i)) {
2618                TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2619                                  ? INDEX_op_st_i32
2620                                  : INDEX_op_st_i64);
2621                TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
2622
2623                sop->args[0] = temp_arg(dir_ts);
2624                sop->args[1] = temp_arg(arg_ts->mem_base);
2625                sop->args[2] = arg_ts->mem_offset;
2626
2627                arg_ts->state = TS_MEM;
2628            }
2629            /* Drop outputs that are dead.  */
2630            if (IS_DEAD_ARG(i)) {
2631                arg_ts->state = TS_DEAD;
2632            }
2633        }
2634    }
2635
2636    return changes;
2637}
2638
2639#ifdef CONFIG_DEBUG_TCG
2640static void dump_regs(TCGContext *s)
2641{
2642    TCGTemp *ts;
2643    int i;
2644    char buf[64];
2645
2646    for(i = 0; i < s->nb_temps; i++) {
2647        ts = &s->temps[i];
2648        printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2649        switch(ts->val_type) {
2650        case TEMP_VAL_REG:
2651            printf("%s", tcg_target_reg_names[ts->reg]);
2652            break;
2653        case TEMP_VAL_MEM:
2654            printf("%d(%s)", (int)ts->mem_offset,
2655                   tcg_target_reg_names[ts->mem_base->reg]);
2656            break;
2657        case TEMP_VAL_CONST:
2658            printf("$0x%" TCG_PRIlx, ts->val);
2659            break;
2660        case TEMP_VAL_DEAD:
2661            printf("D");
2662            break;
2663        default:
2664            printf("???");
2665            break;
2666        }
2667        printf("\n");
2668    }
2669
2670    for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
2671        if (s->reg_to_temp[i] != NULL) {
2672            printf("%s: %s\n", 
2673                   tcg_target_reg_names[i], 
2674                   tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
2675        }
2676    }
2677}
2678
2679static void check_regs(TCGContext *s)
2680{
2681    int reg;
2682    int k;
2683    TCGTemp *ts;
2684    char buf[64];
2685
2686    for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
2687        ts = s->reg_to_temp[reg];
2688        if (ts != NULL) {
2689            if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
2690                printf("Inconsistency for register %s:\n", 
2691                       tcg_target_reg_names[reg]);
2692                goto fail;
2693            }
2694        }
2695    }
2696    for (k = 0; k < s->nb_temps; k++) {
2697        ts = &s->temps[k];
2698        if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg
2699            && s->reg_to_temp[ts->reg] != ts) {
2700            printf("Inconsistency for temp %s:\n",
2701                   tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2702        fail:
2703            printf("reg state:\n");
2704            dump_regs(s);
2705            tcg_abort();
2706        }
2707    }
2708}
2709#endif
2710
2711static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
2712{
2713#if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
2714    /* Sparc64 stack is accessed with offset of 2047 */
2715    s->current_frame_offset = (s->current_frame_offset +
2716                               (tcg_target_long)sizeof(tcg_target_long) - 1) &
2717        ~(sizeof(tcg_target_long) - 1);
2718#endif
2719    if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
2720        s->frame_end) {
2721        tcg_abort();
2722    }
2723    ts->mem_offset = s->current_frame_offset;
2724    ts->mem_base = s->frame_temp;
2725    ts->mem_allocated = 1;
2726    s->current_frame_offset += sizeof(tcg_target_long);
2727}
2728
2729static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet);
2730
2731/* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
2732   mark it free; otherwise mark it dead.  */
2733static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
2734{
2735    if (ts->fixed_reg) {
2736        return;
2737    }
2738    if (ts->val_type == TEMP_VAL_REG) {
2739        s->reg_to_temp[ts->reg] = NULL;
2740    }
2741    ts->val_type = (free_or_dead < 0
2742                    || ts->temp_local
2743                    || ts->temp_global
2744                    ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
2745}
2746
2747/* Mark a temporary as dead.  */
2748static inline void temp_dead(TCGContext *s, TCGTemp *ts)
2749{
2750    temp_free_or_dead(s, ts, 1);
2751}
2752
2753/* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
2754   registers needs to be allocated to store a constant.  If 'free_or_dead'
2755   is non-zero, subsequently release the temporary; if it is positive, the
2756   temp is dead; if it is negative, the temp is free.  */
2757static void temp_sync(TCGContext *s, TCGTemp *ts,
2758                      TCGRegSet allocated_regs, int free_or_dead)
2759{
2760    if (ts->fixed_reg) {
2761        return;
2762    }
2763    if (!ts->mem_coherent) {
2764        if (!ts->mem_allocated) {
2765            temp_allocate_frame(s, ts);
2766        }
2767        switch (ts->val_type) {
2768        case TEMP_VAL_CONST:
2769            /* If we're going to free the temp immediately, then we won't
2770               require it later in a register, so attempt to store the
2771               constant to memory directly.  */
2772            if (free_or_dead
2773                && tcg_out_sti(s, ts->type, ts->val,
2774                               ts->mem_base->reg, ts->mem_offset)) {
2775                break;
2776            }
2777            temp_load(s, ts, tcg_target_available_regs[ts->type],
2778                      allocated_regs);
2779            /* fallthrough */
2780
2781        case TEMP_VAL_REG:
2782            tcg_out_st(s, ts->type, ts->reg,
2783                       ts->mem_base->reg, ts->mem_offset);
2784            break;
2785
2786        case TEMP_VAL_MEM:
2787            break;
2788
2789        case TEMP_VAL_DEAD:
2790        default:
2791            tcg_abort();
2792        }
2793        ts->mem_coherent = 1;
2794    }
2795    if (free_or_dead) {
2796        temp_free_or_dead(s, ts, free_or_dead);
2797    }
2798}
2799
2800/* free register 'reg' by spilling the corresponding temporary if necessary */
2801static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
2802{
2803    TCGTemp *ts = s->reg_to_temp[reg];
2804    if (ts != NULL) {
2805        temp_sync(s, ts, allocated_regs, -1);
2806    }
2807}
2808
2809/* Allocate a register belonging to reg1 & ~reg2 */
2810static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet desired_regs,
2811                            TCGRegSet allocated_regs, bool rev)
2812{
2813    int i, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
2814    const int *order;
2815    TCGReg reg;
2816    TCGRegSet reg_ct;
2817
2818    reg_ct = desired_regs & ~allocated_regs;
2819    order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
2820
2821    /* first try free registers */
2822    for(i = 0; i < n; i++) {
2823        reg = order[i];
2824        if (tcg_regset_test_reg(reg_ct, reg) && s->reg_to_temp[reg] == NULL)
2825            return reg;
2826    }
2827
2828    /* XXX: do better spill choice */
2829    for(i = 0; i < n; i++) {
2830        reg = order[i];
2831        if (tcg_regset_test_reg(reg_ct, reg)) {
2832            tcg_reg_free(s, reg, allocated_regs);
2833            return reg;
2834        }
2835    }
2836
2837    tcg_abort();
2838}
2839
2840/* Make sure the temporary is in a register.  If needed, allocate the register
2841   from DESIRED while avoiding ALLOCATED.  */
2842static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
2843                      TCGRegSet allocated_regs)
2844{
2845    TCGReg reg;
2846
2847    switch (ts->val_type) {
2848    case TEMP_VAL_REG:
2849        return;
2850    case TEMP_VAL_CONST:
2851        reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base);
2852        tcg_out_movi(s, ts->type, reg, ts->val);
2853        ts->mem_coherent = 0;
2854        break;
2855    case TEMP_VAL_MEM:
2856        reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base);
2857        tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
2858        ts->mem_coherent = 1;
2859        break;
2860    case TEMP_VAL_DEAD:
2861    default:
2862        tcg_abort();
2863    }
2864    ts->reg = reg;
2865    ts->val_type = TEMP_VAL_REG;
2866    s->reg_to_temp[reg] = ts;
2867}
2868
2869/* Save a temporary to memory. 'allocated_regs' is used in case a
2870   temporary registers needs to be allocated to store a constant.  */
2871static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
2872{
2873    /* The liveness analysis already ensures that globals are back
2874       in memory. Keep an tcg_debug_assert for safety. */
2875    tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
2876}
2877
2878/* save globals to their canonical location and assume they can be
2879   modified be the following code. 'allocated_regs' is used in case a
2880   temporary registers needs to be allocated to store a constant. */
2881static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
2882{
2883    int i, n;
2884
2885    for (i = 0, n = s->nb_globals; i < n; i++) {
2886        temp_save(s, &s->temps[i], allocated_regs);
2887    }
2888}
2889
2890/* sync globals to their canonical location and assume they can be
2891   read by the following code. 'allocated_regs' is used in case a
2892   temporary registers needs to be allocated to store a constant. */
2893static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
2894{
2895    int i, n;
2896
2897    for (i = 0, n = s->nb_globals; i < n; i++) {
2898        TCGTemp *ts = &s->temps[i];
2899        tcg_debug_assert(ts->val_type != TEMP_VAL_REG
2900                         || ts->fixed_reg
2901                         || ts->mem_coherent);
2902    }
2903}
2904
2905/* at the end of a basic block, we assume all temporaries are dead and
2906   all globals are stored at their canonical location. */
2907static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
2908{
2909    int i;
2910
2911    for (i = s->nb_globals; i < s->nb_temps; i++) {
2912        TCGTemp *ts = &s->temps[i];
2913        if (ts->temp_local) {
2914            temp_save(s, ts, allocated_regs);
2915        } else {
2916            /* The liveness analysis already ensures that temps are dead.
2917               Keep an tcg_debug_assert for safety. */
2918            tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
2919        }
2920    }
2921
2922    save_globals(s, allocated_regs);
2923}
2924
2925static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
2926                                  tcg_target_ulong val, TCGLifeData arg_life)
2927{
2928    if (ots->fixed_reg) {
2929        /* For fixed registers, we do not do any constant propagation.  */
2930        tcg_out_movi(s, ots->type, ots->reg, val);
2931        return;
2932    }
2933
2934    /* The movi is not explicitly generated here.  */
2935    if (ots->val_type == TEMP_VAL_REG) {
2936        s->reg_to_temp[ots->reg] = NULL;
2937    }
2938    ots->val_type = TEMP_VAL_CONST;
2939    ots->val = val;
2940    ots->mem_coherent = 0;
2941    if (NEED_SYNC_ARG(0)) {
2942        temp_sync(s, ots, s->reserved_regs, IS_DEAD_ARG(0));
2943    } else if (IS_DEAD_ARG(0)) {
2944        temp_dead(s, ots);
2945    }
2946}
2947
2948static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op)
2949{
2950    TCGTemp *ots = arg_temp(op->args[0]);
2951    tcg_target_ulong val = op->args[1];
2952
2953    tcg_reg_alloc_do_movi(s, ots, val, op->life);
2954}
2955
2956static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
2957{
2958    const TCGLifeData arg_life = op->life;
2959    TCGRegSet allocated_regs;
2960    TCGTemp *ts, *ots;
2961    TCGType otype, itype;
2962
2963    allocated_regs = s->reserved_regs;
2964    ots = arg_temp(op->args[0]);
2965    ts = arg_temp(op->args[1]);
2966
2967    /* Note that otype != itype for no-op truncation.  */
2968    otype = ots->type;
2969    itype = ts->type;
2970
2971    if (ts->val_type == TEMP_VAL_CONST) {
2972        /* propagate constant or generate sti */
2973        tcg_target_ulong val = ts->val;
2974        if (IS_DEAD_ARG(1)) {
2975            temp_dead(s, ts);
2976        }
2977        tcg_reg_alloc_do_movi(s, ots, val, arg_life);
2978        return;
2979    }
2980
2981    /* If the source value is in memory we're going to be forced
2982       to have it in a register in order to perform the copy.  Copy
2983       the SOURCE value into its own register first, that way we
2984       don't have to reload SOURCE the next time it is used. */
2985    if (ts->val_type == TEMP_VAL_MEM) {
2986        temp_load(s, ts, tcg_target_available_regs[itype], allocated_regs);
2987    }
2988
2989    tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
2990    if (IS_DEAD_ARG(0) && !ots->fixed_reg) {
2991        /* mov to a non-saved dead register makes no sense (even with
2992           liveness analysis disabled). */
2993        tcg_debug_assert(NEED_SYNC_ARG(0));
2994        if (!ots->mem_allocated) {
2995            temp_allocate_frame(s, ots);
2996        }
2997        tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
2998        if (IS_DEAD_ARG(1)) {
2999            temp_dead(s, ts);
3000        }
3001        temp_dead(s, ots);
3002    } else {
3003        if (IS_DEAD_ARG(1) && !ts->fixed_reg && !ots->fixed_reg) {
3004            /* the mov can be suppressed */
3005            if (ots->val_type == TEMP_VAL_REG) {
3006                s->reg_to_temp[ots->reg] = NULL;
3007            }
3008            ots->reg = ts->reg;
3009            temp_dead(s, ts);
3010        } else {
3011            if (ots->val_type != TEMP_VAL_REG) {
3012                /* When allocating a new register, make sure to not spill the
3013                   input one. */
3014                tcg_regset_set_reg(allocated_regs, ts->reg);
3015                ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3016                                         allocated_regs, ots->indirect_base);
3017            }
3018            tcg_out_mov(s, otype, ots->reg, ts->reg);
3019        }
3020        ots->val_type = TEMP_VAL_REG;
3021        ots->mem_coherent = 0;
3022        s->reg_to_temp[ots->reg] = ots;
3023        if (NEED_SYNC_ARG(0)) {
3024            temp_sync(s, ots, allocated_regs, 0);
3025        }
3026    }
3027}
3028
3029static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3030{
3031    const TCGLifeData arg_life = op->life;
3032    const TCGOpDef * const def = &tcg_op_defs[op->opc];
3033    TCGRegSet i_allocated_regs;
3034    TCGRegSet o_allocated_regs;
3035    int i, k, nb_iargs, nb_oargs;
3036    TCGReg reg;
3037    TCGArg arg;
3038    const TCGArgConstraint *arg_ct;
3039    TCGTemp *ts;
3040    TCGArg new_args[TCG_MAX_OP_ARGS];
3041    int const_args[TCG_MAX_OP_ARGS];
3042
3043    nb_oargs = def->nb_oargs;
3044    nb_iargs = def->nb_iargs;
3045
3046    /* copy constants */
3047    memcpy(new_args + nb_oargs + nb_iargs, 
3048           op->args + nb_oargs + nb_iargs,
3049           sizeof(TCGArg) * def->nb_cargs);
3050
3051    i_allocated_regs = s->reserved_regs;
3052    o_allocated_regs = s->reserved_regs;
3053
3054    /* satisfy input constraints */ 
3055    for (k = 0; k < nb_iargs; k++) {
3056        i = def->sorted_args[nb_oargs + k];
3057        arg = op->args[i];
3058        arg_ct = &def->args_ct[i];
3059        ts = arg_temp(arg);
3060
3061        if (ts->val_type == TEMP_VAL_CONST
3062            && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
3063            /* constant is OK for instruction */
3064            const_args[i] = 1;
3065            new_args[i] = ts->val;
3066            goto iarg_end;
3067        }
3068
3069        temp_load(s, ts, arg_ct->u.regs, i_allocated_regs);
3070
3071        if (arg_ct->ct & TCG_CT_IALIAS) {
3072            if (ts->fixed_reg) {
3073                /* if fixed register, we must allocate a new register
3074                   if the alias is not the same register */
3075                if (arg != op->args[arg_ct->alias_index])
3076                    goto allocate_in_reg;
3077            } else {
3078                /* if the input is aliased to an output and if it is
3079                   not dead after the instruction, we must allocate
3080                   a new register and move it */
3081                if (!IS_DEAD_ARG(i)) {
3082                    goto allocate_in_reg;
3083                }
3084                /* check if the current register has already been allocated
3085                   for another input aliased to an output */
3086                int k2, i2;
3087                for (k2 = 0 ; k2 < k ; k2++) {
3088                    i2 = def->sorted_args[nb_oargs + k2];
3089                    if ((def->args_ct[i2].ct & TCG_CT_IALIAS) &&
3090                        (new_args[i2] == ts->reg)) {
3091                        goto allocate_in_reg;
3092                    }
3093                }
3094            }
3095        }
3096        reg = ts->reg;
3097        if (tcg_regset_test_reg(arg_ct->u.regs, reg)) {
3098            /* nothing to do : the constraint is satisfied */
3099        } else {
3100        allocate_in_reg:
3101            /* allocate a new register matching the constraint 
3102               and move the temporary register into it */
3103            reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs,
3104                                ts->indirect_base);
3105            tcg_out_mov(s, ts->type, reg, ts->reg);
3106        }
3107        new_args[i] = reg;
3108        const_args[i] = 0;
3109        tcg_regset_set_reg(i_allocated_regs, reg);
3110    iarg_end: ;
3111    }
3112    
3113    /* mark dead temporaries and free the associated registers */
3114    for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3115        if (IS_DEAD_ARG(i)) {
3116            temp_dead(s, arg_temp(op->args[i]));
3117        }
3118    }
3119
3120    if (def->flags & TCG_OPF_BB_END) {
3121        tcg_reg_alloc_bb_end(s, i_allocated_regs);
3122    } else {
3123        if (def->flags & TCG_OPF_CALL_CLOBBER) {
3124            /* XXX: permit generic clobber register list ? */ 
3125            for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3126                if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3127                    tcg_reg_free(s, i, i_allocated_regs);
3128                }
3129            }
3130        }
3131        if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3132            /* sync globals if the op has side effects and might trigger
3133               an exception. */
3134            sync_globals(s, i_allocated_regs);
3135        }
3136        
3137        /* satisfy the output constraints */
3138        for(k = 0; k < nb_oargs; k++) {
3139            i = def->sorted_args[k];
3140            arg = op->args[i];
3141            arg_ct = &def->args_ct[i];
3142            ts = arg_temp(arg);
3143            if ((arg_ct->ct & TCG_CT_ALIAS)
3144                && !const_args[arg_ct->alias_index]) {
3145                reg = new_args[arg_ct->alias_index];
3146            } else if (arg_ct->ct & TCG_CT_NEWREG) {
3147                reg = tcg_reg_alloc(s, arg_ct->u.regs,
3148                                    i_allocated_regs | o_allocated_regs,
3149                                    ts->indirect_base);
3150            } else {
3151                /* if fixed register, we try to use it */
3152                reg = ts->reg;
3153                if (ts->fixed_reg &&
3154                    tcg_regset_test_reg(arg_ct->u.regs, reg)) {
3155                    goto oarg_end;
3156                }
3157                reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs,
3158                                    ts->indirect_base);
3159            }
3160            tcg_regset_set_reg(o_allocated_regs, reg);
3161            /* if a fixed register is used, then a move will be done afterwards */
3162            if (!ts->fixed_reg) {
3163                if (ts->val_type == TEMP_VAL_REG) {
3164                    s->reg_to_temp[ts->reg] = NULL;
3165                }
3166                ts->val_type = TEMP_VAL_REG;
3167                ts->reg = reg;
3168                /* temp value is modified, so the value kept in memory is
3169                   potentially not the same */
3170                ts->mem_coherent = 0;
3171                s->reg_to_temp[reg] = ts;
3172            }
3173        oarg_end:
3174            new_args[i] = reg;
3175        }
3176    }
3177
3178    /* emit instruction */
3179    if (def->flags & TCG_OPF_VECTOR) {
3180        tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
3181                       new_args, const_args);
3182    } else {
3183        tcg_out_op(s, op->opc, new_args, const_args);
3184    }
3185
3186    /* move the outputs in the correct register if needed */
3187    for(i = 0; i < nb_oargs; i++) {
3188        ts = arg_temp(op->args[i]);
3189        reg = new_args[i];
3190        if (ts->fixed_reg && ts->reg != reg) {
3191            tcg_out_mov(s, ts->type, ts->reg, reg);
3192        }
3193        if (NEED_SYNC_ARG(i)) {
3194            temp_sync(s, ts, o_allocated_regs, IS_DEAD_ARG(i));
3195        } else if (IS_DEAD_ARG(i)) {
3196            temp_dead(s, ts);
3197        }
3198    }
3199}
3200
3201#ifdef TCG_TARGET_STACK_GROWSUP
3202#define STACK_DIR(x) (-(x))
3203#else
3204#define STACK_DIR(x) (x)
3205#endif
3206
3207static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
3208{
3209    const int nb_oargs = TCGOP_CALLO(op);
3210    const int nb_iargs = TCGOP_CALLI(op);
3211    const TCGLifeData arg_life = op->life;
3212    int flags, nb_regs, i;
3213    TCGReg reg;
3214    TCGArg arg;
3215    TCGTemp *ts;
3216    intptr_t stack_offset;
3217    size_t call_stack_size;
3218    tcg_insn_unit *func_addr;
3219    int allocate_args;
3220    TCGRegSet allocated_regs;
3221
3222    func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
3223    flags = op->args[nb_oargs + nb_iargs + 1];
3224
3225    nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
3226    if (nb_regs > nb_iargs) {
3227        nb_regs = nb_iargs;
3228    }
3229
3230    /* assign stack slots first */
3231    call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
3232    call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) & 
3233        ~(TCG_TARGET_STACK_ALIGN - 1);
3234    allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
3235    if (allocate_args) {
3236        /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
3237           preallocate call stack */
3238        tcg_abort();
3239    }
3240
3241    stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
3242    for (i = nb_regs; i < nb_iargs; i++) {
3243        arg = op->args[nb_oargs + i];
3244#ifdef TCG_TARGET_STACK_GROWSUP
3245        stack_offset -= sizeof(tcg_target_long);
3246#endif
3247        if (arg != TCG_CALL_DUMMY_ARG) {
3248            ts = arg_temp(arg);
3249            temp_load(s, ts, tcg_target_available_regs[ts->type],
3250                      s->reserved_regs);
3251            tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
3252        }
3253#ifndef TCG_TARGET_STACK_GROWSUP
3254        stack_offset += sizeof(tcg_target_long);
3255#endif
3256    }
3257    
3258    /* assign input registers */
3259    allocated_regs = s->reserved_regs;
3260    for (i = 0; i < nb_regs; i++) {
3261        arg = op->args[nb_oargs + i];
3262        if (arg != TCG_CALL_DUMMY_ARG) {
3263            ts = arg_temp(arg);
3264            reg = tcg_target_call_iarg_regs[i];
3265            tcg_reg_free(s, reg, allocated_regs);
3266
3267            if (ts->val_type == TEMP_VAL_REG) {
3268                if (ts->reg != reg) {
3269                    tcg_out_mov(s, ts->type, reg, ts->reg);
3270                }
3271            } else {
3272                TCGRegSet arg_set = 0;
3273
3274                tcg_regset_set_reg(arg_set, reg);
3275                temp_load(s, ts, arg_set, allocated_regs);
3276            }
3277
3278            tcg_regset_set_reg(allocated_regs, reg);
3279        }
3280    }
3281    
3282    /* mark dead temporaries and free the associated registers */
3283    for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3284        if (IS_DEAD_ARG(i)) {
3285            temp_dead(s, arg_temp(op->args[i]));
3286        }
3287    }
3288    
3289    /* clobber call registers */
3290    for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3291        if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3292            tcg_reg_free(s, i, allocated_regs);
3293        }
3294    }
3295
3296    /* Save globals if they might be written by the helper, sync them if
3297       they might be read. */
3298    if (flags & TCG_CALL_NO_READ_GLOBALS) {
3299        /* Nothing to do */
3300    } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
3301        sync_globals(s, allocated_regs);
3302    } else {
3303        save_globals(s, allocated_regs);
3304    }
3305
3306    tcg_out_call(s, func_addr);
3307
3308    /* assign output registers and emit moves if needed */
3309    for(i = 0; i < nb_oargs; i++) {
3310        arg = op->args[i];
3311        ts = arg_temp(arg);
3312        reg = tcg_target_call_oarg_regs[i];
3313        tcg_debug_assert(s->reg_to_temp[reg] == NULL);
3314
3315        if (ts->fixed_reg) {
3316            if (ts->reg != reg) {
3317                tcg_out_mov(s, ts->type, ts->reg, reg);
3318            }
3319        } else {
3320            if (ts->val_type == TEMP_VAL_REG) {
3321                s->reg_to_temp[ts->reg] = NULL;
3322            }
3323            ts->val_type = TEMP_VAL_REG;
3324            ts->reg = reg;
3325            ts->mem_coherent = 0;
3326            s->reg_to_temp[reg] = ts;
3327            if (NEED_SYNC_ARG(i)) {
3328                temp_sync(s, ts, allocated_regs, IS_DEAD_ARG(i));
3329            } else if (IS_DEAD_ARG(i)) {
3330                temp_dead(s, ts);
3331            }
3332        }
3333    }
3334}
3335
3336#ifdef CONFIG_PROFILER
3337
3338/* avoid copy/paste errors */
3339#define PROF_ADD(to, from, field)                       \
3340    do {                                                \
3341        (to)->field += atomic_read(&((from)->field));   \
3342    } while (0)
3343
3344#define PROF_MAX(to, from, field)                                       \
3345    do {                                                                \
3346        typeof((from)->field) val__ = atomic_read(&((from)->field));    \
3347        if (val__ > (to)->field) {                                      \
3348            (to)->field = val__;                                        \
3349        }                                                               \
3350    } while (0)
3351
3352/* Pass in a zero'ed @prof */
3353static inline
3354void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
3355{
3356    unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
3357    unsigned int i;
3358
3359    for (i = 0; i < n_ctxs; i++) {
3360        TCGContext *s = atomic_read(&tcg_ctxs[i]);
3361        const TCGProfile *orig = &s->prof;
3362
3363        if (counters) {
3364            PROF_ADD(prof, orig, tb_count1);
3365            PROF_ADD(prof, orig, tb_count);
3366            PROF_ADD(prof, orig, op_count);
3367            PROF_MAX(prof, orig, op_count_max);
3368            PROF_ADD(prof, orig, temp_count);
3369            PROF_MAX(prof, orig, temp_count_max);
3370            PROF_ADD(prof, orig, del_op_count);
3371            PROF_ADD(prof, orig, code_in_len);
3372            PROF_ADD(prof, orig, code_out_len);
3373            PROF_ADD(prof, orig, search_out_len);
3374            PROF_ADD(prof, orig, interm_time);
3375            PROF_ADD(prof, orig, code_time);
3376            PROF_ADD(prof, orig, la_time);
3377            PROF_ADD(prof, orig, opt_time);
3378            PROF_ADD(prof, orig, restore_count);
3379            PROF_ADD(prof, orig, restore_time);
3380        }
3381        if (table) {
3382            int i;
3383
3384            for (i = 0; i < NB_OPS; i++) {
3385                PROF_ADD(prof, orig, table_op_count[i]);
3386            }
3387        }
3388    }
3389}
3390
3391#undef PROF_ADD
3392#undef PROF_MAX
3393
3394static void tcg_profile_snapshot_counters(TCGProfile *prof)
3395{
3396    tcg_profile_snapshot(prof, true, false);
3397}
3398
3399static void tcg_profile_snapshot_table(TCGProfile *prof)
3400{
3401    tcg_profile_snapshot(prof, false, true);
3402}
3403
3404void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
3405{
3406    TCGProfile prof = {};
3407    int i;
3408
3409    tcg_profile_snapshot_table(&prof);
3410    for (i = 0; i < NB_OPS; i++) {
3411        cpu_fprintf(f, "%s %" PRId64 "\n", tcg_op_defs[i].name,
3412                    prof.table_op_count[i]);
3413    }
3414}
3415#else
3416void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
3417{
3418    cpu_fprintf(f, "[TCG profiler not compiled]\n");
3419}
3420#endif
3421
3422
3423int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
3424{
3425#ifdef CONFIG_PROFILER
3426    TCGProfile *prof = &s->prof;
3427#endif
3428    int i, num_insns;
3429    TCGOp *op;
3430
3431#ifdef CONFIG_PROFILER
3432    {
3433        int n;
3434
3435        QTAILQ_FOREACH(op, &s->ops, link) {
3436            n++;
3437        }
3438        atomic_set(&prof->op_count, prof->op_count + n);
3439        if (n > prof->op_count_max) {
3440            atomic_set(&prof->op_count_max, n);
3441        }
3442
3443        n = s->nb_temps;
3444        atomic_set(&prof->temp_count, prof->temp_count + n);
3445        if (n > prof->temp_count_max) {
3446            atomic_set(&prof->temp_count_max, n);
3447        }
3448    }
3449#endif
3450
3451#ifdef DEBUG_DISAS
3452    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
3453                 && qemu_log_in_addr_range(tb->pc))) {
3454        qemu_log_lock();
3455        qemu_log("OP:\n");
3456        tcg_dump_ops(s);
3457        qemu_log("\n");
3458        qemu_log_unlock();
3459    }
3460#endif
3461
3462#ifdef CONFIG_PROFILER
3463    atomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
3464#endif
3465
3466#ifdef USE_TCG_OPTIMIZATIONS
3467    tcg_optimize(s);
3468#endif
3469
3470#ifdef CONFIG_PROFILER
3471    atomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
3472    atomic_set(&prof->la_time, prof->la_time - profile_getclock());
3473#endif
3474
3475    liveness_pass_1(s);
3476
3477    if (s->nb_indirects > 0) {
3478#ifdef DEBUG_DISAS
3479        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
3480                     && qemu_log_in_addr_range(tb->pc))) {
3481            qemu_log_lock();
3482            qemu_log("OP before indirect lowering:\n");
3483            tcg_dump_ops(s);
3484            qemu_log("\n");
3485            qemu_log_unlock();
3486        }
3487#endif
3488        /* Replace indirect temps with direct temps.  */
3489        if (liveness_pass_2(s)) {
3490            /* If changes were made, re-run liveness.  */
3491            liveness_pass_1(s);
3492        }
3493    }
3494
3495#ifdef CONFIG_PROFILER
3496    atomic_set(&prof->la_time, prof->la_time + profile_getclock());
3497#endif
3498
3499#ifdef DEBUG_DISAS
3500    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
3501                 && qemu_log_in_addr_range(tb->pc))) {
3502        qemu_log_lock();
3503        qemu_log("OP after optimization and liveness analysis:\n");
3504        tcg_dump_ops(s);
3505        qemu_log("\n");
3506        qemu_log_unlock();
3507    }
3508#endif
3509
3510    tcg_reg_alloc_start(s);
3511
3512    s->code_buf = tb->tc.ptr;
3513    s->code_ptr = tb->tc.ptr;
3514
3515#ifdef TCG_TARGET_NEED_LDST_LABELS
3516    QSIMPLEQ_INIT(&s->ldst_labels);
3517#endif
3518#ifdef TCG_TARGET_NEED_POOL_LABELS
3519    s->pool_labels = NULL;
3520#endif
3521
3522    num_insns = -1;
3523    QTAILQ_FOREACH(op, &s->ops, link) {
3524        TCGOpcode opc = op->opc;
3525
3526#ifdef CONFIG_PROFILER
3527        atomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
3528#endif
3529
3530        switch (opc) {
3531        case INDEX_op_mov_i32:
3532        case INDEX_op_mov_i64:
3533        case INDEX_op_mov_vec:
3534            tcg_reg_alloc_mov(s, op);
3535            break;
3536        case INDEX_op_movi_i32:
3537        case INDEX_op_movi_i64:
3538        case INDEX_op_dupi_vec:
3539            tcg_reg_alloc_movi(s, op);
3540            break;
3541        case INDEX_op_insn_start:
3542            if (num_insns >= 0) {
3543                size_t off = tcg_current_code_size(s);
3544                s->gen_insn_end_off[num_insns] = off;
3545                /* Assert that we do not overflow our stored offset.  */
3546                assert(s->gen_insn_end_off[num_insns] == off);
3547            }
3548            num_insns++;
3549            for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
3550                target_ulong a;
3551#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
3552                a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
3553#else
3554                a = op->args[i];
3555#endif
3556                s->gen_insn_data[num_insns][i] = a;
3557            }
3558            break;
3559        case INDEX_op_discard:
3560            temp_dead(s, arg_temp(op->args[0]));
3561            break;
3562        case INDEX_op_set_label:
3563            tcg_reg_alloc_bb_end(s, s->reserved_regs);
3564            tcg_out_label(s, arg_label(op->args[0]), s->code_ptr);
3565            break;
3566        case INDEX_op_call:
3567            tcg_reg_alloc_call(s, op);
3568            break;
3569        default:
3570            /* Sanity check that we've not introduced any unhandled opcodes. */
3571            tcg_debug_assert(tcg_op_supported(opc));
3572            /* Note: in order to speed up the code, it would be much
3573               faster to have specialized register allocator functions for
3574               some common argument patterns */
3575            tcg_reg_alloc_op(s, op);
3576            break;
3577        }
3578#ifdef CONFIG_DEBUG_TCG
3579        check_regs(s);
3580#endif
3581        /* Test for (pending) buffer overflow.  The assumption is that any
3582           one operation beginning below the high water mark cannot overrun
3583           the buffer completely.  Thus we can test for overflow after
3584           generating code without having to check during generation.  */
3585        if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
3586            return -1;
3587        }
3588    }
3589    tcg_debug_assert(num_insns >= 0);
3590    s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
3591
3592    /* Generate TB finalization at the end of block */
3593#ifdef TCG_TARGET_NEED_LDST_LABELS
3594    if (!tcg_out_ldst_finalize(s)) {
3595        return -1;
3596    }
3597#endif
3598#ifdef TCG_TARGET_NEED_POOL_LABELS
3599    if (!tcg_out_pool_finalize(s)) {
3600        return -1;
3601    }
3602#endif
3603
3604    /* flush instruction cache */
3605    flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
3606
3607    return tcg_current_code_size(s);
3608}
3609
3610#ifdef CONFIG_PROFILER
3611void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
3612{
3613    TCGProfile prof = {};
3614    const TCGProfile *s;
3615    int64_t tb_count;
3616    int64_t tb_div_count;
3617    int64_t tot;
3618
3619    tcg_profile_snapshot_counters(&prof);
3620    s = &prof;
3621    tb_count = s->tb_count;
3622    tb_div_count = tb_count ? tb_count : 1;
3623    tot = s->interm_time + s->code_time;
3624
3625    cpu_fprintf(f, "JIT cycles          %" PRId64 " (%0.3f s at 2.4 GHz)\n",
3626                tot, tot / 2.4e9);
3627    cpu_fprintf(f, "translated TBs      %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n", 
3628                tb_count, s->tb_count1 - tb_count,
3629                (double)(s->tb_count1 - s->tb_count)
3630                / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
3631    cpu_fprintf(f, "avg ops/TB          %0.1f max=%d\n", 
3632                (double)s->op_count / tb_div_count, s->op_count_max);
3633    cpu_fprintf(f, "deleted ops/TB      %0.2f\n",
3634                (double)s->del_op_count / tb_div_count);
3635    cpu_fprintf(f, "avg temps/TB        %0.2f max=%d\n",
3636                (double)s->temp_count / tb_div_count, s->temp_count_max);
3637    cpu_fprintf(f, "avg host code/TB    %0.1f\n",
3638                (double)s->code_out_len / tb_div_count);
3639    cpu_fprintf(f, "avg search data/TB  %0.1f\n",
3640                (double)s->search_out_len / tb_div_count);
3641    
3642    cpu_fprintf(f, "cycles/op           %0.1f\n", 
3643                s->op_count ? (double)tot / s->op_count : 0);
3644    cpu_fprintf(f, "cycles/in byte      %0.1f\n", 
3645                s->code_in_len ? (double)tot / s->code_in_len : 0);
3646    cpu_fprintf(f, "cycles/out byte     %0.1f\n", 
3647                s->code_out_len ? (double)tot / s->code_out_len : 0);
3648    cpu_fprintf(f, "cycles/search byte     %0.1f\n",
3649                s->search_out_len ? (double)tot / s->search_out_len : 0);
3650    if (tot == 0) {
3651        tot = 1;
3652    }
3653    cpu_fprintf(f, "  gen_interm time   %0.1f%%\n", 
3654                (double)s->interm_time / tot * 100.0);
3655    cpu_fprintf(f, "  gen_code time     %0.1f%%\n", 
3656                (double)s->code_time / tot * 100.0);
3657    cpu_fprintf(f, "optim./code time    %0.1f%%\n",
3658                (double)s->opt_time / (s->code_time ? s->code_time : 1)
3659                * 100.0);
3660    cpu_fprintf(f, "liveness/code time  %0.1f%%\n", 
3661                (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
3662    cpu_fprintf(f, "cpu_restore count   %" PRId64 "\n",
3663                s->restore_count);
3664    cpu_fprintf(f, "  avg cycles        %0.1f\n",
3665                s->restore_count ? (double)s->restore_time / s->restore_count : 0);
3666}
3667#else
3668void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
3669{
3670    cpu_fprintf(f, "[TCG profiler not compiled]\n");
3671}
3672#endif
3673
3674#ifdef ELF_HOST_MACHINE
3675/* In order to use this feature, the backend needs to do three things:
3676
3677   (1) Define ELF_HOST_MACHINE to indicate both what value to
3678       put into the ELF image and to indicate support for the feature.
3679
3680   (2) Define tcg_register_jit.  This should create a buffer containing
3681       the contents of a .debug_frame section that describes the post-
3682       prologue unwind info for the tcg machine.
3683
3684   (3) Call tcg_register_jit_int, with the constructed .debug_frame.
3685*/
3686
3687/* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
3688typedef enum {
3689    JIT_NOACTION = 0,
3690    JIT_REGISTER_FN,
3691    JIT_UNREGISTER_FN
3692} jit_actions_t;
3693
3694struct jit_code_entry {
3695    struct jit_code_entry *next_entry;
3696    struct jit_code_entry *prev_entry;
3697    const void *symfile_addr;
3698    uint64_t symfile_size;
3699};
3700
3701struct jit_descriptor {
3702    uint32_t version;
3703    uint32_t action_flag;
3704    struct jit_code_entry *relevant_entry;
3705    struct jit_code_entry *first_entry;
3706};
3707
3708void __jit_debug_register_code(void) __attribute__((noinline));
3709void __jit_debug_register_code(void)
3710{
3711    asm("");
3712}
3713
3714/* Must statically initialize the version, because GDB may check
3715   the version before we can set it.  */
3716struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
3717
3718/* End GDB interface.  */
3719
3720static int find_string(const char *strtab, const char *str)
3721{
3722    const char *p = strtab + 1;
3723
3724    while (1) {
3725        if (strcmp(p, str) == 0) {
3726            return p - strtab;
3727        }
3728        p += strlen(p) + 1;
3729    }
3730}
3731
3732static void tcg_register_jit_int(void *buf_ptr, size_t buf_size,
3733                                 const void *debug_frame,
3734                                 size_t debug_frame_size)
3735{
3736    struct __attribute__((packed)) DebugInfo {
3737        uint32_t  len;
3738        uint16_t  version;
3739        uint32_t  abbrev;
3740        uint8_t   ptr_size;
3741        uint8_t   cu_die;
3742        uint16_t  cu_lang;
3743        uintptr_t cu_low_pc;
3744        uintptr_t cu_high_pc;
3745        uint8_t   fn_die;
3746        char      fn_name[16];
3747        uintptr_t fn_low_pc;
3748        uintptr_t fn_high_pc;
3749        uint8_t   cu_eoc;
3750    };
3751
3752    struct ElfImage {
3753        ElfW(Ehdr) ehdr;
3754        ElfW(Phdr) phdr;
3755        ElfW(Shdr) shdr[7];
3756        ElfW(Sym)  sym[2];
3757        struct DebugInfo di;
3758        uint8_t    da[24];
3759        char       str[80];
3760    };
3761
3762    struct ElfImage *img;
3763
3764    static const struct ElfImage img_template = {
3765        .ehdr = {
3766            .e_ident[EI_MAG0] = ELFMAG0,
3767            .e_ident[EI_MAG1] = ELFMAG1,
3768            .e_ident[EI_MAG2] = ELFMAG2,
3769            .e_ident[EI_MAG3] = ELFMAG3,
3770            .e_ident[EI_CLASS] = ELF_CLASS,
3771            .e_ident[EI_DATA] = ELF_DATA,
3772            .e_ident[EI_VERSION] = EV_CURRENT,
3773            .e_type = ET_EXEC,
3774            .e_machine = ELF_HOST_MACHINE,
3775            .e_version = EV_CURRENT,
3776            .e_phoff = offsetof(struct ElfImage, phdr),
3777            .e_shoff = offsetof(struct ElfImage, shdr),
3778            .e_ehsize = sizeof(ElfW(Shdr)),
3779            .e_phentsize = sizeof(ElfW(Phdr)),
3780            .e_phnum = 1,
3781            .e_shentsize = sizeof(ElfW(Shdr)),
3782            .e_shnum = ARRAY_SIZE(img->shdr),
3783            .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
3784#ifdef ELF_HOST_FLAGS
3785            .e_flags = ELF_HOST_FLAGS,
3786#endif
3787#ifdef ELF_OSABI
3788            .e_ident[EI_OSABI] = ELF_OSABI,
3789#endif
3790        },
3791        .phdr = {
3792            .p_type = PT_LOAD,
3793            .p_flags = PF_X,
3794        },
3795        .shdr = {
3796            [0] = { .sh_type = SHT_NULL },
3797            /* Trick: The contents of code_gen_buffer are not present in
3798               this fake ELF file; that got allocated elsewhere.  Therefore
3799               we mark .text as SHT_NOBITS (similar to .bss) so that readers
3800               will not look for contents.  We can record any address.  */
3801            [1] = { /* .text */
3802                .sh_type = SHT_NOBITS,
3803                .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
3804            },
3805            [2] = { /* .debug_info */
3806                .sh_type = SHT_PROGBITS,
3807                .sh_offset = offsetof(struct ElfImage, di),
3808                .sh_size = sizeof(struct DebugInfo),
3809            },
3810            [3] = { /* .debug_abbrev */
3811                .sh_type = SHT_PROGBITS,
3812                .sh_offset = offsetof(struct ElfImage, da),
3813                .sh_size = sizeof(img->da),
3814            },
3815            [4] = { /* .debug_frame */
3816                .sh_type = SHT_PROGBITS,
3817                .sh_offset = sizeof(struct ElfImage),
3818            },
3819            [5] = { /* .symtab */
3820                .sh_type = SHT_SYMTAB,
3821                .sh_offset = offsetof(struct ElfImage, sym),
3822                .sh_size = sizeof(img->sym),
3823                .sh_info = 1,
3824                .sh_link = ARRAY_SIZE(img->shdr) - 1,
3825                .sh_entsize = sizeof(ElfW(Sym)),
3826            },
3827            [6] = { /* .strtab */
3828                .sh_type = SHT_STRTAB,
3829                .sh_offset = offsetof(struct ElfImage, str),
3830                .sh_size = sizeof(img->str),
3831            }
3832        },
3833        .sym = {
3834            [1] = { /* code_gen_buffer */
3835                .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
3836                .st_shndx = 1,
3837            }
3838        },
3839        .di = {
3840            .len = sizeof(struct DebugInfo) - 4,
3841            .version = 2,
3842            .ptr_size = sizeof(void *),
3843            .cu_die = 1,
3844            .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
3845            .fn_die = 2,
3846            .fn_name = "code_gen_buffer"
3847        },
3848        .da = {
3849            1,          /* abbrev number (the cu) */
3850            0x11, 1,    /* DW_TAG_compile_unit, has children */
3851            0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
3852            0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
3853            0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
3854            0, 0,       /* end of abbrev */
3855            2,          /* abbrev number (the fn) */
3856            0x2e, 0,    /* DW_TAG_subprogram, no children */
3857            0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
3858            0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
3859            0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
3860            0, 0,       /* end of abbrev */
3861            0           /* no more abbrev */
3862        },
3863        .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
3864               ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
3865    };
3866
3867    /* We only need a single jit entry; statically allocate it.  */
3868    static struct jit_code_entry one_entry;
3869
3870    uintptr_t buf = (uintptr_t)buf_ptr;
3871    size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
3872    DebugFrameHeader *dfh;
3873
3874    img = g_malloc(img_size);
3875    *img = img_template;
3876
3877    img->phdr.p_vaddr = buf;
3878    img->phdr.p_paddr = buf;
3879    img->phdr.p_memsz = buf_size;
3880
3881    img->shdr[1].sh_name = find_string(img->str, ".text");
3882    img->shdr[1].sh_addr = buf;
3883    img->shdr[1].sh_size = buf_size;
3884
3885    img->shdr[2].sh_name = find_string(img->str, ".debug_info");
3886    img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
3887
3888    img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
3889    img->shdr[4].sh_size = debug_frame_size;
3890
3891    img->shdr[5].sh_name = find_string(img->str, ".symtab");
3892    img->shdr[6].sh_name = find_string(img->str, ".strtab");
3893
3894    img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
3895    img->sym[1].st_value = buf;
3896    img->sym[1].st_size = buf_size;
3897
3898    img->di.cu_low_pc = buf;
3899    img->di.cu_high_pc = buf + buf_size;
3900    img->di.fn_low_pc = buf;
3901    img->di.fn_high_pc = buf + buf_size;
3902
3903    dfh = (DebugFrameHeader *)(img + 1);
3904    memcpy(dfh, debug_frame, debug_frame_size);
3905    dfh->fde.func_start = buf;
3906    dfh->fde.func_len = buf_size;
3907
3908#ifdef DEBUG_JIT
3909    /* Enable this block to be able to debug the ELF image file creation.
3910       One can use readelf, objdump, or other inspection utilities.  */
3911    {
3912        FILE *f = fopen("/tmp/qemu.jit", "w+b");
3913        if (f) {
3914            if (fwrite(img, img_size, 1, f) != img_size) {
3915                /* Avoid stupid unused return value warning for fwrite.  */
3916            }
3917            fclose(f);
3918        }
3919    }
3920#endif
3921
3922    one_entry.symfile_addr = img;
3923    one_entry.symfile_size = img_size;
3924
3925    __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
3926    __jit_debug_descriptor.relevant_entry = &one_entry;
3927    __jit_debug_descriptor.first_entry = &one_entry;
3928    __jit_debug_register_code();
3929}
3930#else
3931/* No support for the feature.  Provide the entry point expected by exec.c,
3932   and implement the internal function we declared earlier.  */
3933
3934static void tcg_register_jit_int(void *buf, size_t size,
3935                                 const void *debug_frame,
3936                                 size_t debug_frame_size)
3937{
3938}
3939
3940void tcg_register_jit(void *buf, size_t buf_size)
3941{
3942}
3943#endif /* ELF_HOST_MACHINE */
3944
3945#if !TCG_TARGET_MAYBE_vec
3946void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
3947{
3948    g_assert_not_reached();
3949}
3950#endif
3951