qemu/tcg/tcg.c
<<
>>
Prefs
   1/*
   2 * Tiny Code Generator for QEMU
   3 *
   4 * Copyright (c) 2008 Fabrice Bellard
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24
  25/* define it to use liveness analysis (better code) */
  26#define USE_TCG_OPTIMIZATIONS
  27
  28#include "qemu/osdep.h"
  29
  30/* Define to jump the ELF file used to communicate with GDB.  */
  31#undef DEBUG_JIT
  32
  33#include "qemu/error-report.h"
  34#include "qemu/cutils.h"
  35#include "qemu/host-utils.h"
  36#include "qemu/timer.h"
  37
  38/* Note: the long term plan is to reduce the dependencies on the QEMU
  39   CPU definitions. Currently they are used for qemu_ld/st
  40   instructions */
  41#define NO_CPU_IO_DEFS
  42#include "cpu.h"
  43
  44#include "exec/cpu-common.h"
  45#include "exec/exec-all.h"
  46
  47#include "tcg-op.h"
  48
  49#if UINTPTR_MAX == UINT32_MAX
  50# define ELF_CLASS  ELFCLASS32
  51#else
  52# define ELF_CLASS  ELFCLASS64
  53#endif
  54#ifdef HOST_WORDS_BIGENDIAN
  55# define ELF_DATA   ELFDATA2MSB
  56#else
  57# define ELF_DATA   ELFDATA2LSB
  58#endif
  59
  60#include "elf.h"
  61#include "exec/log.h"
  62#include "sysemu/sysemu.h"
  63
  64/* Forward declarations for functions declared in tcg-target.inc.c and
  65   used here. */
  66static void tcg_target_init(TCGContext *s);
  67static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode);
  68static void tcg_target_qemu_prologue(TCGContext *s);
  69static void patch_reloc(tcg_insn_unit *code_ptr, int type,
  70                        intptr_t value, intptr_t addend);
  71
  72/* The CIE and FDE header definitions will be common to all hosts.  */
  73typedef struct {
  74    uint32_t len __attribute__((aligned((sizeof(void *)))));
  75    uint32_t id;
  76    uint8_t version;
  77    char augmentation[1];
  78    uint8_t code_align;
  79    uint8_t data_align;
  80    uint8_t return_column;
  81} DebugFrameCIE;
  82
  83typedef struct QEMU_PACKED {
  84    uint32_t len __attribute__((aligned((sizeof(void *)))));
  85    uint32_t cie_offset;
  86    uintptr_t func_start;
  87    uintptr_t func_len;
  88} DebugFrameFDEHeader;
  89
  90typedef struct QEMU_PACKED {
  91    DebugFrameCIE cie;
  92    DebugFrameFDEHeader fde;
  93} DebugFrameHeader;
  94
  95static void tcg_register_jit_int(void *buf, size_t size,
  96                                 const void *debug_frame,
  97                                 size_t debug_frame_size)
  98    __attribute__((unused));
  99
 100/* Forward declarations for functions declared and used in tcg-target.inc.c. */
 101static const char *target_parse_constraint(TCGArgConstraint *ct,
 102                                           const char *ct_str, TCGType type);
 103static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
 104                       intptr_t arg2);
 105static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
 106static void tcg_out_movi(TCGContext *s, TCGType type,
 107                         TCGReg ret, tcg_target_long arg);
 108static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
 109                       const int *const_args);
 110#if TCG_TARGET_MAYBE_vec
 111static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
 112                           unsigned vece, const TCGArg *args,
 113                           const int *const_args);
 114#else
 115static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
 116                                  unsigned vece, const TCGArg *args,
 117                                  const int *const_args)
 118{
 119    g_assert_not_reached();
 120}
 121#endif
 122static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
 123                       intptr_t arg2);
 124static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
 125                        TCGReg base, intptr_t ofs);
 126static void tcg_out_call(TCGContext *s, tcg_insn_unit *target);
 127static int tcg_target_const_match(tcg_target_long val, TCGType type,
 128                                  const TCGArgConstraint *arg_ct);
 129#ifdef TCG_TARGET_NEED_LDST_LABELS
 130static bool tcg_out_ldst_finalize(TCGContext *s);
 131#endif
 132
 133#define TCG_HIGHWATER 1024
 134
 135static TCGContext **tcg_ctxs;
 136static unsigned int n_tcg_ctxs;
 137TCGv_env cpu_env = 0;
 138
 139struct tcg_region_tree {
 140    QemuMutex lock;
 141    GTree *tree;
 142    /* padding to avoid false sharing is computed at run-time */
 143};
 144
 145/*
 146 * We divide code_gen_buffer into equally-sized "regions" that TCG threads
 147 * dynamically allocate from as demand dictates. Given appropriate region
 148 * sizing, this minimizes flushes even when some TCG threads generate a lot
 149 * more code than others.
 150 */
 151struct tcg_region_state {
 152    QemuMutex lock;
 153
 154    /* fields set at init time */
 155    void *start;
 156    void *start_aligned;
 157    void *end;
 158    size_t n;
 159    size_t size; /* size of one region */
 160    size_t stride; /* .size + guard size */
 161
 162    /* fields protected by the lock */
 163    size_t current; /* current region index */
 164    size_t agg_size_full; /* aggregate size of full regions */
 165};
 166
 167static struct tcg_region_state region;
 168/*
 169 * This is an array of struct tcg_region_tree's, with padding.
 170 * We use void * to simplify the computation of region_trees[i]; each
 171 * struct is found every tree_size bytes.
 172 */
 173static void *region_trees;
 174static size_t tree_size;
 175static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
 176static TCGRegSet tcg_target_call_clobber_regs;
 177
 178#if TCG_TARGET_INSN_UNIT_SIZE == 1
 179static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
 180{
 181    *s->code_ptr++ = v;
 182}
 183
 184static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
 185                                                      uint8_t v)
 186{
 187    *p = v;
 188}
 189#endif
 190
 191#if TCG_TARGET_INSN_UNIT_SIZE <= 2
 192static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
 193{
 194    if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
 195        *s->code_ptr++ = v;
 196    } else {
 197        tcg_insn_unit *p = s->code_ptr;
 198        memcpy(p, &v, sizeof(v));
 199        s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
 200    }
 201}
 202
 203static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
 204                                                       uint16_t v)
 205{
 206    if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
 207        *p = v;
 208    } else {
 209        memcpy(p, &v, sizeof(v));
 210    }
 211}
 212#endif
 213
 214#if TCG_TARGET_INSN_UNIT_SIZE <= 4
 215static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
 216{
 217    if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
 218        *s->code_ptr++ = v;
 219    } else {
 220        tcg_insn_unit *p = s->code_ptr;
 221        memcpy(p, &v, sizeof(v));
 222        s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
 223    }
 224}
 225
 226static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
 227                                                       uint32_t v)
 228{
 229    if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
 230        *p = v;
 231    } else {
 232        memcpy(p, &v, sizeof(v));
 233    }
 234}
 235#endif
 236
 237#if TCG_TARGET_INSN_UNIT_SIZE <= 8
 238static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
 239{
 240    if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
 241        *s->code_ptr++ = v;
 242    } else {
 243        tcg_insn_unit *p = s->code_ptr;
 244        memcpy(p, &v, sizeof(v));
 245        s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
 246    }
 247}
 248
 249static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
 250                                                       uint64_t v)
 251{
 252    if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
 253        *p = v;
 254    } else {
 255        memcpy(p, &v, sizeof(v));
 256    }
 257}
 258#endif
 259
 260/* label relocation processing */
 261
 262static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
 263                          TCGLabel *l, intptr_t addend)
 264{
 265    TCGRelocation *r;
 266
 267    if (l->has_value) {
 268        /* FIXME: This may break relocations on RISC targets that
 269           modify instruction fields in place.  The caller may not have 
 270           written the initial value.  */
 271        patch_reloc(code_ptr, type, l->u.value, addend);
 272    } else {
 273        /* add a new relocation entry */
 274        r = tcg_malloc(sizeof(TCGRelocation));
 275        r->type = type;
 276        r->ptr = code_ptr;
 277        r->addend = addend;
 278        r->next = l->u.first_reloc;
 279        l->u.first_reloc = r;
 280    }
 281}
 282
 283static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr)
 284{
 285    intptr_t value = (intptr_t)ptr;
 286    TCGRelocation *r;
 287
 288    tcg_debug_assert(!l->has_value);
 289
 290    for (r = l->u.first_reloc; r != NULL; r = r->next) {
 291        patch_reloc(r->ptr, r->type, value, r->addend);
 292    }
 293
 294    l->has_value = 1;
 295    l->u.value_ptr = ptr;
 296}
 297
 298TCGLabel *gen_new_label(void)
 299{
 300    TCGContext *s = tcg_ctx;
 301    TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
 302
 303    *l = (TCGLabel){
 304        .id = s->nb_labels++
 305    };
 306
 307    return l;
 308}
 309
 310static void set_jmp_reset_offset(TCGContext *s, int which)
 311{
 312    size_t off = tcg_current_code_size(s);
 313    s->tb_jmp_reset_offset[which] = off;
 314    /* Make sure that we didn't overflow the stored offset.  */
 315    assert(s->tb_jmp_reset_offset[which] == off);
 316}
 317
 318#include "tcg-target.inc.c"
 319
 320/* compare a pointer @ptr and a tb_tc @s */
 321static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
 322{
 323    if (ptr >= s->ptr + s->size) {
 324        return 1;
 325    } else if (ptr < s->ptr) {
 326        return -1;
 327    }
 328    return 0;
 329}
 330
 331static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
 332{
 333    const struct tb_tc *a = ap;
 334    const struct tb_tc *b = bp;
 335
 336    /*
 337     * When both sizes are set, we know this isn't a lookup.
 338     * This is the most likely case: every TB must be inserted; lookups
 339     * are a lot less frequent.
 340     */
 341    if (likely(a->size && b->size)) {
 342        if (a->ptr > b->ptr) {
 343            return 1;
 344        } else if (a->ptr < b->ptr) {
 345            return -1;
 346        }
 347        /* a->ptr == b->ptr should happen only on deletions */
 348        g_assert(a->size == b->size);
 349        return 0;
 350    }
 351    /*
 352     * All lookups have either .size field set to 0.
 353     * From the glib sources we see that @ap is always the lookup key. However
 354     * the docs provide no guarantee, so we just mark this case as likely.
 355     */
 356    if (likely(a->size == 0)) {
 357        return ptr_cmp_tb_tc(a->ptr, b);
 358    }
 359    return ptr_cmp_tb_tc(b->ptr, a);
 360}
 361
 362static void tcg_region_trees_init(void)
 363{
 364    size_t i;
 365
 366    tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
 367    region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
 368    for (i = 0; i < region.n; i++) {
 369        struct tcg_region_tree *rt = region_trees + i * tree_size;
 370
 371        qemu_mutex_init(&rt->lock);
 372        rt->tree = g_tree_new(tb_tc_cmp);
 373    }
 374}
 375
 376static struct tcg_region_tree *tc_ptr_to_region_tree(void *p)
 377{
 378    size_t region_idx;
 379
 380    if (p < region.start_aligned) {
 381        region_idx = 0;
 382    } else {
 383        ptrdiff_t offset = p - region.start_aligned;
 384
 385        if (offset > region.stride * (region.n - 1)) {
 386            region_idx = region.n - 1;
 387        } else {
 388            region_idx = offset / region.stride;
 389        }
 390    }
 391    return region_trees + region_idx * tree_size;
 392}
 393
 394void tcg_tb_insert(TranslationBlock *tb)
 395{
 396    struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
 397
 398    qemu_mutex_lock(&rt->lock);
 399    g_tree_insert(rt->tree, &tb->tc, tb);
 400    qemu_mutex_unlock(&rt->lock);
 401}
 402
 403void tcg_tb_remove(TranslationBlock *tb)
 404{
 405    struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
 406
 407    qemu_mutex_lock(&rt->lock);
 408    g_tree_remove(rt->tree, &tb->tc);
 409    qemu_mutex_unlock(&rt->lock);
 410}
 411
 412/*
 413 * Find the TB 'tb' such that
 414 * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
 415 * Return NULL if not found.
 416 */
 417TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
 418{
 419    struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
 420    TranslationBlock *tb;
 421    struct tb_tc s = { .ptr = (void *)tc_ptr };
 422
 423    qemu_mutex_lock(&rt->lock);
 424    tb = g_tree_lookup(rt->tree, &s);
 425    qemu_mutex_unlock(&rt->lock);
 426    return tb;
 427}
 428
 429static void tcg_region_tree_lock_all(void)
 430{
 431    size_t i;
 432
 433    for (i = 0; i < region.n; i++) {
 434        struct tcg_region_tree *rt = region_trees + i * tree_size;
 435
 436        qemu_mutex_lock(&rt->lock);
 437    }
 438}
 439
 440static void tcg_region_tree_unlock_all(void)
 441{
 442    size_t i;
 443
 444    for (i = 0; i < region.n; i++) {
 445        struct tcg_region_tree *rt = region_trees + i * tree_size;
 446
 447        qemu_mutex_unlock(&rt->lock);
 448    }
 449}
 450
 451void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
 452{
 453    size_t i;
 454
 455    tcg_region_tree_lock_all();
 456    for (i = 0; i < region.n; i++) {
 457        struct tcg_region_tree *rt = region_trees + i * tree_size;
 458
 459        g_tree_foreach(rt->tree, func, user_data);
 460    }
 461    tcg_region_tree_unlock_all();
 462}
 463
 464size_t tcg_nb_tbs(void)
 465{
 466    size_t nb_tbs = 0;
 467    size_t i;
 468
 469    tcg_region_tree_lock_all();
 470    for (i = 0; i < region.n; i++) {
 471        struct tcg_region_tree *rt = region_trees + i * tree_size;
 472
 473        nb_tbs += g_tree_nnodes(rt->tree);
 474    }
 475    tcg_region_tree_unlock_all();
 476    return nb_tbs;
 477}
 478
 479static void tcg_region_tree_reset_all(void)
 480{
 481    size_t i;
 482
 483    tcg_region_tree_lock_all();
 484    for (i = 0; i < region.n; i++) {
 485        struct tcg_region_tree *rt = region_trees + i * tree_size;
 486
 487        /* Increment the refcount first so that destroy acts as a reset */
 488        g_tree_ref(rt->tree);
 489        g_tree_destroy(rt->tree);
 490    }
 491    tcg_region_tree_unlock_all();
 492}
 493
 494static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
 495{
 496    void *start, *end;
 497
 498    start = region.start_aligned + curr_region * region.stride;
 499    end = start + region.size;
 500
 501    if (curr_region == 0) {
 502        start = region.start;
 503    }
 504    if (curr_region == region.n - 1) {
 505        end = region.end;
 506    }
 507
 508    *pstart = start;
 509    *pend = end;
 510}
 511
 512static void tcg_region_assign(TCGContext *s, size_t curr_region)
 513{
 514    void *start, *end;
 515
 516    tcg_region_bounds(curr_region, &start, &end);
 517
 518    s->code_gen_buffer = start;
 519    s->code_gen_ptr = start;
 520    s->code_gen_buffer_size = end - start;
 521    s->code_gen_highwater = end - TCG_HIGHWATER;
 522}
 523
 524static bool tcg_region_alloc__locked(TCGContext *s)
 525{
 526    if (region.current == region.n) {
 527        return true;
 528    }
 529    tcg_region_assign(s, region.current);
 530    region.current++;
 531    return false;
 532}
 533
 534/*
 535 * Request a new region once the one in use has filled up.
 536 * Returns true on error.
 537 */
 538static bool tcg_region_alloc(TCGContext *s)
 539{
 540    bool err;
 541    /* read the region size now; alloc__locked will overwrite it on success */
 542    size_t size_full = s->code_gen_buffer_size;
 543
 544    qemu_mutex_lock(&region.lock);
 545    err = tcg_region_alloc__locked(s);
 546    if (!err) {
 547        region.agg_size_full += size_full - TCG_HIGHWATER;
 548    }
 549    qemu_mutex_unlock(&region.lock);
 550    return err;
 551}
 552
 553/*
 554 * Perform a context's first region allocation.
 555 * This function does _not_ increment region.agg_size_full.
 556 */
 557static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
 558{
 559    return tcg_region_alloc__locked(s);
 560}
 561
 562/* Call from a safe-work context */
 563void tcg_region_reset_all(void)
 564{
 565    unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
 566    unsigned int i;
 567
 568    qemu_mutex_lock(&region.lock);
 569    region.current = 0;
 570    region.agg_size_full = 0;
 571
 572    for (i = 0; i < n_ctxs; i++) {
 573        TCGContext *s = atomic_read(&tcg_ctxs[i]);
 574        bool err = tcg_region_initial_alloc__locked(s);
 575
 576        g_assert(!err);
 577    }
 578    qemu_mutex_unlock(&region.lock);
 579
 580    tcg_region_tree_reset_all();
 581}
 582
 583#ifdef CONFIG_USER_ONLY
 584static size_t tcg_n_regions(void)
 585{
 586    return 1;
 587}
 588#else
 589/*
 590 * It is likely that some vCPUs will translate more code than others, so we
 591 * first try to set more regions than max_cpus, with those regions being of
 592 * reasonable size. If that's not possible we make do by evenly dividing
 593 * the code_gen_buffer among the vCPUs.
 594 */
 595static size_t tcg_n_regions(void)
 596{
 597    size_t i;
 598
 599    /* Use a single region if all we have is one vCPU thread */
 600    if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
 601        return 1;
 602    }
 603
 604    /* Try to have more regions than max_cpus, with each region being >= 2 MB */
 605    for (i = 8; i > 0; i--) {
 606        size_t regions_per_thread = i;
 607        size_t region_size;
 608
 609        region_size = tcg_init_ctx.code_gen_buffer_size;
 610        region_size /= max_cpus * regions_per_thread;
 611
 612        if (region_size >= 2 * 1024u * 1024) {
 613            return max_cpus * regions_per_thread;
 614        }
 615    }
 616    /* If we can't, then just allocate one region per vCPU thread */
 617    return max_cpus;
 618}
 619#endif
 620
 621/*
 622 * Initializes region partitioning.
 623 *
 624 * Called at init time from the parent thread (i.e. the one calling
 625 * tcg_context_init), after the target's TCG globals have been set.
 626 *
 627 * Region partitioning works by splitting code_gen_buffer into separate regions,
 628 * and then assigning regions to TCG threads so that the threads can translate
 629 * code in parallel without synchronization.
 630 *
 631 * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
 632 * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
 633 * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
 634 * must have been parsed before calling this function, since it calls
 635 * qemu_tcg_mttcg_enabled().
 636 *
 637 * In user-mode we use a single region.  Having multiple regions in user-mode
 638 * is not supported, because the number of vCPU threads (recall that each thread
 639 * spawned by the guest corresponds to a vCPU thread) is only bounded by the
 640 * OS, and usually this number is huge (tens of thousands is not uncommon).
 641 * Thus, given this large bound on the number of vCPU threads and the fact
 642 * that code_gen_buffer is allocated at compile-time, we cannot guarantee
 643 * that the availability of at least one region per vCPU thread.
 644 *
 645 * However, this user-mode limitation is unlikely to be a significant problem
 646 * in practice. Multi-threaded guests share most if not all of their translated
 647 * code, which makes parallel code generation less appealing than in softmmu.
 648 */
 649void tcg_region_init(void)
 650{
 651    void *buf = tcg_init_ctx.code_gen_buffer;
 652    void *aligned;
 653    size_t size = tcg_init_ctx.code_gen_buffer_size;
 654    size_t page_size = qemu_real_host_page_size;
 655    size_t region_size;
 656    size_t n_regions;
 657    size_t i;
 658
 659    n_regions = tcg_n_regions();
 660
 661    /* The first region will be 'aligned - buf' bytes larger than the others */
 662    aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
 663    g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
 664    /*
 665     * Make region_size a multiple of page_size, using aligned as the start.
 666     * As a result of this we might end up with a few extra pages at the end of
 667     * the buffer; we will assign those to the last region.
 668     */
 669    region_size = (size - (aligned - buf)) / n_regions;
 670    region_size = QEMU_ALIGN_DOWN(region_size, page_size);
 671
 672    /* A region must have at least 2 pages; one code, one guard */
 673    g_assert(region_size >= 2 * page_size);
 674
 675    /* init the region struct */
 676    qemu_mutex_init(&region.lock);
 677    region.n = n_regions;
 678    region.size = region_size - page_size;
 679    region.stride = region_size;
 680    region.start = buf;
 681    region.start_aligned = aligned;
 682    /* page-align the end, since its last page will be a guard page */
 683    region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
 684    /* account for that last guard page */
 685    region.end -= page_size;
 686
 687    /* set guard pages */
 688    for (i = 0; i < region.n; i++) {
 689        void *start, *end;
 690        int rc;
 691
 692        tcg_region_bounds(i, &start, &end);
 693        rc = qemu_mprotect_none(end, page_size);
 694        g_assert(!rc);
 695    }
 696
 697    tcg_region_trees_init();
 698
 699    /* In user-mode we support only one ctx, so do the initial allocation now */
 700#ifdef CONFIG_USER_ONLY
 701    {
 702        bool err = tcg_region_initial_alloc__locked(tcg_ctx);
 703
 704        g_assert(!err);
 705    }
 706#endif
 707}
 708
 709/*
 710 * All TCG threads except the parent (i.e. the one that called tcg_context_init
 711 * and registered the target's TCG globals) must register with this function
 712 * before initiating translation.
 713 *
 714 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
 715 * of tcg_region_init() for the reasoning behind this.
 716 *
 717 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
 718 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
 719 * is not used anymore for translation once this function is called.
 720 *
 721 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
 722 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
 723 */
 724#ifdef CONFIG_USER_ONLY
 725void tcg_register_thread(void)
 726{
 727    tcg_ctx = &tcg_init_ctx;
 728}
 729#else
 730void tcg_register_thread(void)
 731{
 732    TCGContext *s = g_malloc(sizeof(*s));
 733    unsigned int i, n;
 734    bool err;
 735
 736    *s = tcg_init_ctx;
 737
 738    /* Relink mem_base.  */
 739    for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
 740        if (tcg_init_ctx.temps[i].mem_base) {
 741            ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
 742            tcg_debug_assert(b >= 0 && b < n);
 743            s->temps[i].mem_base = &s->temps[b];
 744        }
 745    }
 746
 747    /* Claim an entry in tcg_ctxs */
 748    n = atomic_fetch_inc(&n_tcg_ctxs);
 749    g_assert(n < max_cpus);
 750    atomic_set(&tcg_ctxs[n], s);
 751
 752    tcg_ctx = s;
 753    qemu_mutex_lock(&region.lock);
 754    err = tcg_region_initial_alloc__locked(tcg_ctx);
 755    g_assert(!err);
 756    qemu_mutex_unlock(&region.lock);
 757}
 758#endif /* !CONFIG_USER_ONLY */
 759
 760/*
 761 * Returns the size (in bytes) of all translated code (i.e. from all regions)
 762 * currently in the cache.
 763 * See also: tcg_code_capacity()
 764 * Do not confuse with tcg_current_code_size(); that one applies to a single
 765 * TCG context.
 766 */
 767size_t tcg_code_size(void)
 768{
 769    unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
 770    unsigned int i;
 771    size_t total;
 772
 773    qemu_mutex_lock(&region.lock);
 774    total = region.agg_size_full;
 775    for (i = 0; i < n_ctxs; i++) {
 776        const TCGContext *s = atomic_read(&tcg_ctxs[i]);
 777        size_t size;
 778
 779        size = atomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
 780        g_assert(size <= s->code_gen_buffer_size);
 781        total += size;
 782    }
 783    qemu_mutex_unlock(&region.lock);
 784    return total;
 785}
 786
 787/*
 788 * Returns the code capacity (in bytes) of the entire cache, i.e. including all
 789 * regions.
 790 * See also: tcg_code_size()
 791 */
 792size_t tcg_code_capacity(void)
 793{
 794    size_t guard_size, capacity;
 795
 796    /* no need for synchronization; these variables are set at init time */
 797    guard_size = region.stride - region.size;
 798    capacity = region.end + guard_size - region.start;
 799    capacity -= region.n * (guard_size + TCG_HIGHWATER);
 800    return capacity;
 801}
 802
 803size_t tcg_tb_phys_invalidate_count(void)
 804{
 805    unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
 806    unsigned int i;
 807    size_t total = 0;
 808
 809    for (i = 0; i < n_ctxs; i++) {
 810        const TCGContext *s = atomic_read(&tcg_ctxs[i]);
 811
 812        total += atomic_read(&s->tb_phys_invalidate_count);
 813    }
 814    return total;
 815}
 816
 817/* pool based memory allocation */
 818void *tcg_malloc_internal(TCGContext *s, int size)
 819{
 820    TCGPool *p;
 821    int pool_size;
 822    
 823    if (size > TCG_POOL_CHUNK_SIZE) {
 824        /* big malloc: insert a new pool (XXX: could optimize) */
 825        p = g_malloc(sizeof(TCGPool) + size);
 826        p->size = size;
 827        p->next = s->pool_first_large;
 828        s->pool_first_large = p;
 829        return p->data;
 830    } else {
 831        p = s->pool_current;
 832        if (!p) {
 833            p = s->pool_first;
 834            if (!p)
 835                goto new_pool;
 836        } else {
 837            if (!p->next) {
 838            new_pool:
 839                pool_size = TCG_POOL_CHUNK_SIZE;
 840                p = g_malloc(sizeof(TCGPool) + pool_size);
 841                p->size = pool_size;
 842                p->next = NULL;
 843                if (s->pool_current) 
 844                    s->pool_current->next = p;
 845                else
 846                    s->pool_first = p;
 847            } else {
 848                p = p->next;
 849            }
 850        }
 851    }
 852    s->pool_current = p;
 853    s->pool_cur = p->data + size;
 854    s->pool_end = p->data + p->size;
 855    return p->data;
 856}
 857
 858void tcg_pool_reset(TCGContext *s)
 859{
 860    TCGPool *p, *t;
 861    for (p = s->pool_first_large; p; p = t) {
 862        t = p->next;
 863        g_free(p);
 864    }
 865    s->pool_first_large = NULL;
 866    s->pool_cur = s->pool_end = NULL;
 867    s->pool_current = NULL;
 868}
 869
 870typedef struct TCGHelperInfo {
 871    void *func;
 872    const char *name;
 873    unsigned flags;
 874    unsigned sizemask;
 875} TCGHelperInfo;
 876
 877#include "exec/helper-proto.h"
 878
 879static const TCGHelperInfo all_helpers[] = {
 880#include "exec/helper-tcg.h"
 881};
 882static GHashTable *helper_table;
 883
 884static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
 885static void process_op_defs(TCGContext *s);
 886static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
 887                                            TCGReg reg, const char *name);
 888
 889void tcg_context_init(TCGContext *s)
 890{
 891    int op, total_args, n, i;
 892    TCGOpDef *def;
 893    TCGArgConstraint *args_ct;
 894    int *sorted_args;
 895    TCGTemp *ts;
 896
 897    memset(s, 0, sizeof(*s));
 898    s->nb_globals = 0;
 899
 900    /* Count total number of arguments and allocate the corresponding
 901       space */
 902    total_args = 0;
 903    for(op = 0; op < NB_OPS; op++) {
 904        def = &tcg_op_defs[op];
 905        n = def->nb_iargs + def->nb_oargs;
 906        total_args += n;
 907    }
 908
 909    args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args);
 910    sorted_args = g_malloc(sizeof(int) * total_args);
 911
 912    for(op = 0; op < NB_OPS; op++) {
 913        def = &tcg_op_defs[op];
 914        def->args_ct = args_ct;
 915        def->sorted_args = sorted_args;
 916        n = def->nb_iargs + def->nb_oargs;
 917        sorted_args += n;
 918        args_ct += n;
 919    }
 920
 921    /* Register helpers.  */
 922    /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
 923    helper_table = g_hash_table_new(NULL, NULL);
 924
 925    for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
 926        g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
 927                            (gpointer)&all_helpers[i]);
 928    }
 929
 930    tcg_target_init(s);
 931    process_op_defs(s);
 932
 933    /* Reverse the order of the saved registers, assuming they're all at
 934       the start of tcg_target_reg_alloc_order.  */
 935    for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
 936        int r = tcg_target_reg_alloc_order[n];
 937        if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
 938            break;
 939        }
 940    }
 941    for (i = 0; i < n; ++i) {
 942        indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
 943    }
 944    for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
 945        indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
 946    }
 947
 948    tcg_ctx = s;
 949    /*
 950     * In user-mode we simply share the init context among threads, since we
 951     * use a single region. See the documentation tcg_region_init() for the
 952     * reasoning behind this.
 953     * In softmmu we will have at most max_cpus TCG threads.
 954     */
 955#ifdef CONFIG_USER_ONLY
 956    tcg_ctxs = &tcg_ctx;
 957    n_tcg_ctxs = 1;
 958#else
 959    tcg_ctxs = g_new(TCGContext *, max_cpus);
 960#endif
 961
 962    tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
 963    ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
 964    cpu_env = temp_tcgv_ptr(ts);
 965}
 966
 967/*
 968 * Allocate TBs right before their corresponding translated code, making
 969 * sure that TBs and code are on different cache lines.
 970 */
 971TranslationBlock *tcg_tb_alloc(TCGContext *s)
 972{
 973    uintptr_t align = qemu_icache_linesize;
 974    TranslationBlock *tb;
 975    void *next;
 976
 977 retry:
 978    tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
 979    next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
 980
 981    if (unlikely(next > s->code_gen_highwater)) {
 982        if (tcg_region_alloc(s)) {
 983            return NULL;
 984        }
 985        goto retry;
 986    }
 987    atomic_set(&s->code_gen_ptr, next);
 988    s->data_gen_ptr = NULL;
 989    return tb;
 990}
 991
 992void tcg_prologue_init(TCGContext *s)
 993{
 994    size_t prologue_size, total_size;
 995    void *buf0, *buf1;
 996
 997    /* Put the prologue at the beginning of code_gen_buffer.  */
 998    buf0 = s->code_gen_buffer;
 999    total_size = s->code_gen_buffer_size;
1000    s->code_ptr = buf0;
1001    s->code_buf = buf0;
1002    s->data_gen_ptr = NULL;
1003    s->code_gen_prologue = buf0;
1004
1005    /* Compute a high-water mark, at which we voluntarily flush the buffer
1006       and start over.  The size here is arbitrary, significantly larger
1007       than we expect the code generation for any one opcode to require.  */
1008    s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
1009
1010#ifdef TCG_TARGET_NEED_POOL_LABELS
1011    s->pool_labels = NULL;
1012#endif
1013
1014    /* Generate the prologue.  */
1015    tcg_target_qemu_prologue(s);
1016
1017#ifdef TCG_TARGET_NEED_POOL_LABELS
1018    /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1019    {
1020        bool ok = tcg_out_pool_finalize(s);
1021        tcg_debug_assert(ok);
1022    }
1023#endif
1024
1025    buf1 = s->code_ptr;
1026    flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1);
1027
1028    /* Deduct the prologue from the buffer.  */
1029    prologue_size = tcg_current_code_size(s);
1030    s->code_gen_ptr = buf1;
1031    s->code_gen_buffer = buf1;
1032    s->code_buf = buf1;
1033    total_size -= prologue_size;
1034    s->code_gen_buffer_size = total_size;
1035
1036    tcg_register_jit(s->code_gen_buffer, total_size);
1037
1038#ifdef DEBUG_DISAS
1039    if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1040        qemu_log_lock();
1041        qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
1042        if (s->data_gen_ptr) {
1043            size_t code_size = s->data_gen_ptr - buf0;
1044            size_t data_size = prologue_size - code_size;
1045            size_t i;
1046
1047            log_disas(buf0, code_size);
1048
1049            for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1050                if (sizeof(tcg_target_ulong) == 8) {
1051                    qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1052                             (uintptr_t)s->data_gen_ptr + i,
1053                             *(uint64_t *)(s->data_gen_ptr + i));
1054                } else {
1055                    qemu_log("0x%08" PRIxPTR ":  .long  0x%08x\n",
1056                             (uintptr_t)s->data_gen_ptr + i,
1057                             *(uint32_t *)(s->data_gen_ptr + i));
1058                }
1059            }
1060        } else {
1061            log_disas(buf0, prologue_size);
1062        }
1063        qemu_log("\n");
1064        qemu_log_flush();
1065        qemu_log_unlock();
1066    }
1067#endif
1068
1069    /* Assert that goto_ptr is implemented completely.  */
1070    if (TCG_TARGET_HAS_goto_ptr) {
1071        tcg_debug_assert(s->code_gen_epilogue != NULL);
1072    }
1073}
1074
1075void tcg_func_start(TCGContext *s)
1076{
1077    tcg_pool_reset(s);
1078    s->nb_temps = s->nb_globals;
1079
1080    /* No temps have been previously allocated for size or locality.  */
1081    memset(s->free_temps, 0, sizeof(s->free_temps));
1082
1083    s->nb_ops = 0;
1084    s->nb_labels = 0;
1085    s->current_frame_offset = s->frame_start;
1086
1087#ifdef CONFIG_DEBUG_TCG
1088    s->goto_tb_issue_mask = 0;
1089#endif
1090
1091    QTAILQ_INIT(&s->ops);
1092    QTAILQ_INIT(&s->free_ops);
1093}
1094
1095static inline TCGTemp *tcg_temp_alloc(TCGContext *s)
1096{
1097    int n = s->nb_temps++;
1098    tcg_debug_assert(n < TCG_MAX_TEMPS);
1099    return memset(&s->temps[n], 0, sizeof(TCGTemp));
1100}
1101
1102static inline TCGTemp *tcg_global_alloc(TCGContext *s)
1103{
1104    TCGTemp *ts;
1105
1106    tcg_debug_assert(s->nb_globals == s->nb_temps);
1107    s->nb_globals++;
1108    ts = tcg_temp_alloc(s);
1109    ts->temp_global = 1;
1110
1111    return ts;
1112}
1113
1114static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1115                                            TCGReg reg, const char *name)
1116{
1117    TCGTemp *ts;
1118
1119    if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1120        tcg_abort();
1121    }
1122
1123    ts = tcg_global_alloc(s);
1124    ts->base_type = type;
1125    ts->type = type;
1126    ts->fixed_reg = 1;
1127    ts->reg = reg;
1128    ts->name = name;
1129    tcg_regset_set_reg(s->reserved_regs, reg);
1130
1131    return ts;
1132}
1133
1134void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1135{
1136    s->frame_start = start;
1137    s->frame_end = start + size;
1138    s->frame_temp
1139        = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1140}
1141
1142TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1143                                     intptr_t offset, const char *name)
1144{
1145    TCGContext *s = tcg_ctx;
1146    TCGTemp *base_ts = tcgv_ptr_temp(base);
1147    TCGTemp *ts = tcg_global_alloc(s);
1148    int indirect_reg = 0, bigendian = 0;
1149#ifdef HOST_WORDS_BIGENDIAN
1150    bigendian = 1;
1151#endif
1152
1153    if (!base_ts->fixed_reg) {
1154        /* We do not support double-indirect registers.  */
1155        tcg_debug_assert(!base_ts->indirect_reg);
1156        base_ts->indirect_base = 1;
1157        s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1158                            ? 2 : 1);
1159        indirect_reg = 1;
1160    }
1161
1162    if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1163        TCGTemp *ts2 = tcg_global_alloc(s);
1164        char buf[64];
1165
1166        ts->base_type = TCG_TYPE_I64;
1167        ts->type = TCG_TYPE_I32;
1168        ts->indirect_reg = indirect_reg;
1169        ts->mem_allocated = 1;
1170        ts->mem_base = base_ts;
1171        ts->mem_offset = offset + bigendian * 4;
1172        pstrcpy(buf, sizeof(buf), name);
1173        pstrcat(buf, sizeof(buf), "_0");
1174        ts->name = strdup(buf);
1175
1176        tcg_debug_assert(ts2 == ts + 1);
1177        ts2->base_type = TCG_TYPE_I64;
1178        ts2->type = TCG_TYPE_I32;
1179        ts2->indirect_reg = indirect_reg;
1180        ts2->mem_allocated = 1;
1181        ts2->mem_base = base_ts;
1182        ts2->mem_offset = offset + (1 - bigendian) * 4;
1183        pstrcpy(buf, sizeof(buf), name);
1184        pstrcat(buf, sizeof(buf), "_1");
1185        ts2->name = strdup(buf);
1186    } else {
1187        ts->base_type = type;
1188        ts->type = type;
1189        ts->indirect_reg = indirect_reg;
1190        ts->mem_allocated = 1;
1191        ts->mem_base = base_ts;
1192        ts->mem_offset = offset;
1193        ts->name = name;
1194    }
1195    return ts;
1196}
1197
1198TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1199{
1200    TCGContext *s = tcg_ctx;
1201    TCGTemp *ts;
1202    int idx, k;
1203
1204    k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1205    idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1206    if (idx < TCG_MAX_TEMPS) {
1207        /* There is already an available temp with the right type.  */
1208        clear_bit(idx, s->free_temps[k].l);
1209
1210        ts = &s->temps[idx];
1211        ts->temp_allocated = 1;
1212        tcg_debug_assert(ts->base_type == type);
1213        tcg_debug_assert(ts->temp_local == temp_local);
1214    } else {
1215        ts = tcg_temp_alloc(s);
1216        if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1217            TCGTemp *ts2 = tcg_temp_alloc(s);
1218
1219            ts->base_type = type;
1220            ts->type = TCG_TYPE_I32;
1221            ts->temp_allocated = 1;
1222            ts->temp_local = temp_local;
1223
1224            tcg_debug_assert(ts2 == ts + 1);
1225            ts2->base_type = TCG_TYPE_I64;
1226            ts2->type = TCG_TYPE_I32;
1227            ts2->temp_allocated = 1;
1228            ts2->temp_local = temp_local;
1229        } else {
1230            ts->base_type = type;
1231            ts->type = type;
1232            ts->temp_allocated = 1;
1233            ts->temp_local = temp_local;
1234        }
1235    }
1236
1237#if defined(CONFIG_DEBUG_TCG)
1238    s->temps_in_use++;
1239#endif
1240    return ts;
1241}
1242
1243TCGv_vec tcg_temp_new_vec(TCGType type)
1244{
1245    TCGTemp *t;
1246
1247#ifdef CONFIG_DEBUG_TCG
1248    switch (type) {
1249    case TCG_TYPE_V64:
1250        assert(TCG_TARGET_HAS_v64);
1251        break;
1252    case TCG_TYPE_V128:
1253        assert(TCG_TARGET_HAS_v128);
1254        break;
1255    case TCG_TYPE_V256:
1256        assert(TCG_TARGET_HAS_v256);
1257        break;
1258    default:
1259        g_assert_not_reached();
1260    }
1261#endif
1262
1263    t = tcg_temp_new_internal(type, 0);
1264    return temp_tcgv_vec(t);
1265}
1266
1267/* Create a new temp of the same type as an existing temp.  */
1268TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1269{
1270    TCGTemp *t = tcgv_vec_temp(match);
1271
1272    tcg_debug_assert(t->temp_allocated != 0);
1273
1274    t = tcg_temp_new_internal(t->base_type, 0);
1275    return temp_tcgv_vec(t);
1276}
1277
1278void tcg_temp_free_internal(TCGTemp *ts)
1279{
1280    TCGContext *s = tcg_ctx;
1281    int k, idx;
1282
1283#if defined(CONFIG_DEBUG_TCG)
1284    s->temps_in_use--;
1285    if (s->temps_in_use < 0) {
1286        fprintf(stderr, "More temporaries freed than allocated!\n");
1287    }
1288#endif
1289
1290    tcg_debug_assert(ts->temp_global == 0);
1291    tcg_debug_assert(ts->temp_allocated != 0);
1292    ts->temp_allocated = 0;
1293
1294    idx = temp_idx(ts);
1295    k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0);
1296    set_bit(idx, s->free_temps[k].l);
1297}
1298
1299TCGv_i32 tcg_const_i32(int32_t val)
1300{
1301    TCGv_i32 t0;
1302    t0 = tcg_temp_new_i32();
1303    tcg_gen_movi_i32(t0, val);
1304    return t0;
1305}
1306
1307TCGv_i64 tcg_const_i64(int64_t val)
1308{
1309    TCGv_i64 t0;
1310    t0 = tcg_temp_new_i64();
1311    tcg_gen_movi_i64(t0, val);
1312    return t0;
1313}
1314
1315TCGv_i32 tcg_const_local_i32(int32_t val)
1316{
1317    TCGv_i32 t0;
1318    t0 = tcg_temp_local_new_i32();
1319    tcg_gen_movi_i32(t0, val);
1320    return t0;
1321}
1322
1323TCGv_i64 tcg_const_local_i64(int64_t val)
1324{
1325    TCGv_i64 t0;
1326    t0 = tcg_temp_local_new_i64();
1327    tcg_gen_movi_i64(t0, val);
1328    return t0;
1329}
1330
1331#if defined(CONFIG_DEBUG_TCG)
1332void tcg_clear_temp_count(void)
1333{
1334    TCGContext *s = tcg_ctx;
1335    s->temps_in_use = 0;
1336}
1337
1338int tcg_check_temp_count(void)
1339{
1340    TCGContext *s = tcg_ctx;
1341    if (s->temps_in_use) {
1342        /* Clear the count so that we don't give another
1343         * warning immediately next time around.
1344         */
1345        s->temps_in_use = 0;
1346        return 1;
1347    }
1348    return 0;
1349}
1350#endif
1351
1352/* Return true if OP may appear in the opcode stream.
1353   Test the runtime variable that controls each opcode.  */
1354bool tcg_op_supported(TCGOpcode op)
1355{
1356    const bool have_vec
1357        = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1358
1359    switch (op) {
1360    case INDEX_op_discard:
1361    case INDEX_op_set_label:
1362    case INDEX_op_call:
1363    case INDEX_op_br:
1364    case INDEX_op_mb:
1365    case INDEX_op_insn_start:
1366    case INDEX_op_exit_tb:
1367    case INDEX_op_goto_tb:
1368    case INDEX_op_qemu_ld_i32:
1369    case INDEX_op_qemu_st_i32:
1370    case INDEX_op_qemu_ld_i64:
1371    case INDEX_op_qemu_st_i64:
1372        return true;
1373
1374    case INDEX_op_goto_ptr:
1375        return TCG_TARGET_HAS_goto_ptr;
1376
1377    case INDEX_op_mov_i32:
1378    case INDEX_op_movi_i32:
1379    case INDEX_op_setcond_i32:
1380    case INDEX_op_brcond_i32:
1381    case INDEX_op_ld8u_i32:
1382    case INDEX_op_ld8s_i32:
1383    case INDEX_op_ld16u_i32:
1384    case INDEX_op_ld16s_i32:
1385    case INDEX_op_ld_i32:
1386    case INDEX_op_st8_i32:
1387    case INDEX_op_st16_i32:
1388    case INDEX_op_st_i32:
1389    case INDEX_op_add_i32:
1390    case INDEX_op_sub_i32:
1391    case INDEX_op_mul_i32:
1392    case INDEX_op_and_i32:
1393    case INDEX_op_or_i32:
1394    case INDEX_op_xor_i32:
1395    case INDEX_op_shl_i32:
1396    case INDEX_op_shr_i32:
1397    case INDEX_op_sar_i32:
1398        return true;
1399
1400    case INDEX_op_movcond_i32:
1401        return TCG_TARGET_HAS_movcond_i32;
1402    case INDEX_op_div_i32:
1403    case INDEX_op_divu_i32:
1404        return TCG_TARGET_HAS_div_i32;
1405    case INDEX_op_rem_i32:
1406    case INDEX_op_remu_i32:
1407        return TCG_TARGET_HAS_rem_i32;
1408    case INDEX_op_div2_i32:
1409    case INDEX_op_divu2_i32:
1410        return TCG_TARGET_HAS_div2_i32;
1411    case INDEX_op_rotl_i32:
1412    case INDEX_op_rotr_i32:
1413        return TCG_TARGET_HAS_rot_i32;
1414    case INDEX_op_deposit_i32:
1415        return TCG_TARGET_HAS_deposit_i32;
1416    case INDEX_op_extract_i32:
1417        return TCG_TARGET_HAS_extract_i32;
1418    case INDEX_op_sextract_i32:
1419        return TCG_TARGET_HAS_sextract_i32;
1420    case INDEX_op_add2_i32:
1421        return TCG_TARGET_HAS_add2_i32;
1422    case INDEX_op_sub2_i32:
1423        return TCG_TARGET_HAS_sub2_i32;
1424    case INDEX_op_mulu2_i32:
1425        return TCG_TARGET_HAS_mulu2_i32;
1426    case INDEX_op_muls2_i32:
1427        return TCG_TARGET_HAS_muls2_i32;
1428    case INDEX_op_muluh_i32:
1429        return TCG_TARGET_HAS_muluh_i32;
1430    case INDEX_op_mulsh_i32:
1431        return TCG_TARGET_HAS_mulsh_i32;
1432    case INDEX_op_ext8s_i32:
1433        return TCG_TARGET_HAS_ext8s_i32;
1434    case INDEX_op_ext16s_i32:
1435        return TCG_TARGET_HAS_ext16s_i32;
1436    case INDEX_op_ext8u_i32:
1437        return TCG_TARGET_HAS_ext8u_i32;
1438    case INDEX_op_ext16u_i32:
1439        return TCG_TARGET_HAS_ext16u_i32;
1440    case INDEX_op_bswap16_i32:
1441        return TCG_TARGET_HAS_bswap16_i32;
1442    case INDEX_op_bswap32_i32:
1443        return TCG_TARGET_HAS_bswap32_i32;
1444    case INDEX_op_not_i32:
1445        return TCG_TARGET_HAS_not_i32;
1446    case INDEX_op_neg_i32:
1447        return TCG_TARGET_HAS_neg_i32;
1448    case INDEX_op_andc_i32:
1449        return TCG_TARGET_HAS_andc_i32;
1450    case INDEX_op_orc_i32:
1451        return TCG_TARGET_HAS_orc_i32;
1452    case INDEX_op_eqv_i32:
1453        return TCG_TARGET_HAS_eqv_i32;
1454    case INDEX_op_nand_i32:
1455        return TCG_TARGET_HAS_nand_i32;
1456    case INDEX_op_nor_i32:
1457        return TCG_TARGET_HAS_nor_i32;
1458    case INDEX_op_clz_i32:
1459        return TCG_TARGET_HAS_clz_i32;
1460    case INDEX_op_ctz_i32:
1461        return TCG_TARGET_HAS_ctz_i32;
1462    case INDEX_op_ctpop_i32:
1463        return TCG_TARGET_HAS_ctpop_i32;
1464
1465    case INDEX_op_brcond2_i32:
1466    case INDEX_op_setcond2_i32:
1467        return TCG_TARGET_REG_BITS == 32;
1468
1469    case INDEX_op_mov_i64:
1470    case INDEX_op_movi_i64:
1471    case INDEX_op_setcond_i64:
1472    case INDEX_op_brcond_i64:
1473    case INDEX_op_ld8u_i64:
1474    case INDEX_op_ld8s_i64:
1475    case INDEX_op_ld16u_i64:
1476    case INDEX_op_ld16s_i64:
1477    case INDEX_op_ld32u_i64:
1478    case INDEX_op_ld32s_i64:
1479    case INDEX_op_ld_i64:
1480    case INDEX_op_st8_i64:
1481    case INDEX_op_st16_i64:
1482    case INDEX_op_st32_i64:
1483    case INDEX_op_st_i64:
1484    case INDEX_op_add_i64:
1485    case INDEX_op_sub_i64:
1486    case INDEX_op_mul_i64:
1487    case INDEX_op_and_i64:
1488    case INDEX_op_or_i64:
1489    case INDEX_op_xor_i64:
1490    case INDEX_op_shl_i64:
1491    case INDEX_op_shr_i64:
1492    case INDEX_op_sar_i64:
1493    case INDEX_op_ext_i32_i64:
1494    case INDEX_op_extu_i32_i64:
1495        return TCG_TARGET_REG_BITS == 64;
1496
1497    case INDEX_op_movcond_i64:
1498        return TCG_TARGET_HAS_movcond_i64;
1499    case INDEX_op_div_i64:
1500    case INDEX_op_divu_i64:
1501        return TCG_TARGET_HAS_div_i64;
1502    case INDEX_op_rem_i64:
1503    case INDEX_op_remu_i64:
1504        return TCG_TARGET_HAS_rem_i64;
1505    case INDEX_op_div2_i64:
1506    case INDEX_op_divu2_i64:
1507        return TCG_TARGET_HAS_div2_i64;
1508    case INDEX_op_rotl_i64:
1509    case INDEX_op_rotr_i64:
1510        return TCG_TARGET_HAS_rot_i64;
1511    case INDEX_op_deposit_i64:
1512        return TCG_TARGET_HAS_deposit_i64;
1513    case INDEX_op_extract_i64:
1514        return TCG_TARGET_HAS_extract_i64;
1515    case INDEX_op_sextract_i64:
1516        return TCG_TARGET_HAS_sextract_i64;
1517    case INDEX_op_extrl_i64_i32:
1518        return TCG_TARGET_HAS_extrl_i64_i32;
1519    case INDEX_op_extrh_i64_i32:
1520        return TCG_TARGET_HAS_extrh_i64_i32;
1521    case INDEX_op_ext8s_i64:
1522        return TCG_TARGET_HAS_ext8s_i64;
1523    case INDEX_op_ext16s_i64:
1524        return TCG_TARGET_HAS_ext16s_i64;
1525    case INDEX_op_ext32s_i64:
1526        return TCG_TARGET_HAS_ext32s_i64;
1527    case INDEX_op_ext8u_i64:
1528        return TCG_TARGET_HAS_ext8u_i64;
1529    case INDEX_op_ext16u_i64:
1530        return TCG_TARGET_HAS_ext16u_i64;
1531    case INDEX_op_ext32u_i64:
1532        return TCG_TARGET_HAS_ext32u_i64;
1533    case INDEX_op_bswap16_i64:
1534        return TCG_TARGET_HAS_bswap16_i64;
1535    case INDEX_op_bswap32_i64:
1536        return TCG_TARGET_HAS_bswap32_i64;
1537    case INDEX_op_bswap64_i64:
1538        return TCG_TARGET_HAS_bswap64_i64;
1539    case INDEX_op_not_i64:
1540        return TCG_TARGET_HAS_not_i64;
1541    case INDEX_op_neg_i64:
1542        return TCG_TARGET_HAS_neg_i64;
1543    case INDEX_op_andc_i64:
1544        return TCG_TARGET_HAS_andc_i64;
1545    case INDEX_op_orc_i64:
1546        return TCG_TARGET_HAS_orc_i64;
1547    case INDEX_op_eqv_i64:
1548        return TCG_TARGET_HAS_eqv_i64;
1549    case INDEX_op_nand_i64:
1550        return TCG_TARGET_HAS_nand_i64;
1551    case INDEX_op_nor_i64:
1552        return TCG_TARGET_HAS_nor_i64;
1553    case INDEX_op_clz_i64:
1554        return TCG_TARGET_HAS_clz_i64;
1555    case INDEX_op_ctz_i64:
1556        return TCG_TARGET_HAS_ctz_i64;
1557    case INDEX_op_ctpop_i64:
1558        return TCG_TARGET_HAS_ctpop_i64;
1559    case INDEX_op_add2_i64:
1560        return TCG_TARGET_HAS_add2_i64;
1561    case INDEX_op_sub2_i64:
1562        return TCG_TARGET_HAS_sub2_i64;
1563    case INDEX_op_mulu2_i64:
1564        return TCG_TARGET_HAS_mulu2_i64;
1565    case INDEX_op_muls2_i64:
1566        return TCG_TARGET_HAS_muls2_i64;
1567    case INDEX_op_muluh_i64:
1568        return TCG_TARGET_HAS_muluh_i64;
1569    case INDEX_op_mulsh_i64:
1570        return TCG_TARGET_HAS_mulsh_i64;
1571
1572    case INDEX_op_mov_vec:
1573    case INDEX_op_dup_vec:
1574    case INDEX_op_dupi_vec:
1575    case INDEX_op_ld_vec:
1576    case INDEX_op_st_vec:
1577    case INDEX_op_add_vec:
1578    case INDEX_op_sub_vec:
1579    case INDEX_op_and_vec:
1580    case INDEX_op_or_vec:
1581    case INDEX_op_xor_vec:
1582    case INDEX_op_cmp_vec:
1583        return have_vec;
1584    case INDEX_op_dup2_vec:
1585        return have_vec && TCG_TARGET_REG_BITS == 32;
1586    case INDEX_op_not_vec:
1587        return have_vec && TCG_TARGET_HAS_not_vec;
1588    case INDEX_op_neg_vec:
1589        return have_vec && TCG_TARGET_HAS_neg_vec;
1590    case INDEX_op_andc_vec:
1591        return have_vec && TCG_TARGET_HAS_andc_vec;
1592    case INDEX_op_orc_vec:
1593        return have_vec && TCG_TARGET_HAS_orc_vec;
1594    case INDEX_op_mul_vec:
1595        return have_vec && TCG_TARGET_HAS_mul_vec;
1596    case INDEX_op_shli_vec:
1597    case INDEX_op_shri_vec:
1598    case INDEX_op_sari_vec:
1599        return have_vec && TCG_TARGET_HAS_shi_vec;
1600    case INDEX_op_shls_vec:
1601    case INDEX_op_shrs_vec:
1602    case INDEX_op_sars_vec:
1603        return have_vec && TCG_TARGET_HAS_shs_vec;
1604    case INDEX_op_shlv_vec:
1605    case INDEX_op_shrv_vec:
1606    case INDEX_op_sarv_vec:
1607        return have_vec && TCG_TARGET_HAS_shv_vec;
1608
1609    default:
1610        tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1611        return true;
1612    }
1613}
1614
1615/* Note: we convert the 64 bit args to 32 bit and do some alignment
1616   and endian swap. Maybe it would be better to do the alignment
1617   and endian swap in tcg_reg_alloc_call(). */
1618void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1619{
1620    int i, real_args, nb_rets, pi;
1621    unsigned sizemask, flags;
1622    TCGHelperInfo *info;
1623    TCGOp *op;
1624
1625    info = g_hash_table_lookup(helper_table, (gpointer)func);
1626    flags = info->flags;
1627    sizemask = info->sizemask;
1628
1629#if defined(__sparc__) && !defined(__arch64__) \
1630    && !defined(CONFIG_TCG_INTERPRETER)
1631    /* We have 64-bit values in one register, but need to pass as two
1632       separate parameters.  Split them.  */
1633    int orig_sizemask = sizemask;
1634    int orig_nargs = nargs;
1635    TCGv_i64 retl, reth;
1636    TCGTemp *split_args[MAX_OPC_PARAM];
1637
1638    retl = NULL;
1639    reth = NULL;
1640    if (sizemask != 0) {
1641        for (i = real_args = 0; i < nargs; ++i) {
1642            int is_64bit = sizemask & (1 << (i+1)*2);
1643            if (is_64bit) {
1644                TCGv_i64 orig = temp_tcgv_i64(args[i]);
1645                TCGv_i32 h = tcg_temp_new_i32();
1646                TCGv_i32 l = tcg_temp_new_i32();
1647                tcg_gen_extr_i64_i32(l, h, orig);
1648                split_args[real_args++] = tcgv_i32_temp(h);
1649                split_args[real_args++] = tcgv_i32_temp(l);
1650            } else {
1651                split_args[real_args++] = args[i];
1652            }
1653        }
1654        nargs = real_args;
1655        args = split_args;
1656        sizemask = 0;
1657    }
1658#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1659    for (i = 0; i < nargs; ++i) {
1660        int is_64bit = sizemask & (1 << (i+1)*2);
1661        int is_signed = sizemask & (2 << (i+1)*2);
1662        if (!is_64bit) {
1663            TCGv_i64 temp = tcg_temp_new_i64();
1664            TCGv_i64 orig = temp_tcgv_i64(args[i]);
1665            if (is_signed) {
1666                tcg_gen_ext32s_i64(temp, orig);
1667            } else {
1668                tcg_gen_ext32u_i64(temp, orig);
1669            }
1670            args[i] = tcgv_i64_temp(temp);
1671        }
1672    }
1673#endif /* TCG_TARGET_EXTEND_ARGS */
1674
1675    op = tcg_emit_op(INDEX_op_call);
1676
1677    pi = 0;
1678    if (ret != NULL) {
1679#if defined(__sparc__) && !defined(__arch64__) \
1680    && !defined(CONFIG_TCG_INTERPRETER)
1681        if (orig_sizemask & 1) {
1682            /* The 32-bit ABI is going to return the 64-bit value in
1683               the %o0/%o1 register pair.  Prepare for this by using
1684               two return temporaries, and reassemble below.  */
1685            retl = tcg_temp_new_i64();
1686            reth = tcg_temp_new_i64();
1687            op->args[pi++] = tcgv_i64_arg(reth);
1688            op->args[pi++] = tcgv_i64_arg(retl);
1689            nb_rets = 2;
1690        } else {
1691            op->args[pi++] = temp_arg(ret);
1692            nb_rets = 1;
1693        }
1694#else
1695        if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
1696#ifdef HOST_WORDS_BIGENDIAN
1697            op->args[pi++] = temp_arg(ret + 1);
1698            op->args[pi++] = temp_arg(ret);
1699#else
1700            op->args[pi++] = temp_arg(ret);
1701            op->args[pi++] = temp_arg(ret + 1);
1702#endif
1703            nb_rets = 2;
1704        } else {
1705            op->args[pi++] = temp_arg(ret);
1706            nb_rets = 1;
1707        }
1708#endif
1709    } else {
1710        nb_rets = 0;
1711    }
1712    TCGOP_CALLO(op) = nb_rets;
1713
1714    real_args = 0;
1715    for (i = 0; i < nargs; i++) {
1716        int is_64bit = sizemask & (1 << (i+1)*2);
1717        if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1718#ifdef TCG_TARGET_CALL_ALIGN_ARGS
1719            /* some targets want aligned 64 bit args */
1720            if (real_args & 1) {
1721                op->args[pi++] = TCG_CALL_DUMMY_ARG;
1722                real_args++;
1723            }
1724#endif
1725           /* If stack grows up, then we will be placing successive
1726              arguments at lower addresses, which means we need to
1727              reverse the order compared to how we would normally
1728              treat either big or little-endian.  For those arguments
1729              that will wind up in registers, this still works for
1730              HPPA (the only current STACK_GROWSUP target) since the
1731              argument registers are *also* allocated in decreasing
1732              order.  If another such target is added, this logic may
1733              have to get more complicated to differentiate between
1734              stack arguments and register arguments.  */
1735#if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1736            op->args[pi++] = temp_arg(args[i] + 1);
1737            op->args[pi++] = temp_arg(args[i]);
1738#else
1739            op->args[pi++] = temp_arg(args[i]);
1740            op->args[pi++] = temp_arg(args[i] + 1);
1741#endif
1742            real_args += 2;
1743            continue;
1744        }
1745
1746        op->args[pi++] = temp_arg(args[i]);
1747        real_args++;
1748    }
1749    op->args[pi++] = (uintptr_t)func;
1750    op->args[pi++] = flags;
1751    TCGOP_CALLI(op) = real_args;
1752
1753    /* Make sure the fields didn't overflow.  */
1754    tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1755    tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1756
1757#if defined(__sparc__) && !defined(__arch64__) \
1758    && !defined(CONFIG_TCG_INTERPRETER)
1759    /* Free all of the parts we allocated above.  */
1760    for (i = real_args = 0; i < orig_nargs; ++i) {
1761        int is_64bit = orig_sizemask & (1 << (i+1)*2);
1762        if (is_64bit) {
1763            tcg_temp_free_internal(args[real_args++]);
1764            tcg_temp_free_internal(args[real_args++]);
1765        } else {
1766            real_args++;
1767        }
1768    }
1769    if (orig_sizemask & 1) {
1770        /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
1771           Note that describing these as TCGv_i64 eliminates an unnecessary
1772           zero-extension that tcg_gen_concat_i32_i64 would create.  */
1773        tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1774        tcg_temp_free_i64(retl);
1775        tcg_temp_free_i64(reth);
1776    }
1777#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1778    for (i = 0; i < nargs; ++i) {
1779        int is_64bit = sizemask & (1 << (i+1)*2);
1780        if (!is_64bit) {
1781            tcg_temp_free_internal(args[i]);
1782        }
1783    }
1784#endif /* TCG_TARGET_EXTEND_ARGS */
1785}
1786
1787static void tcg_reg_alloc_start(TCGContext *s)
1788{
1789    int i, n;
1790    TCGTemp *ts;
1791
1792    for (i = 0, n = s->nb_globals; i < n; i++) {
1793        ts = &s->temps[i];
1794        ts->val_type = (ts->fixed_reg ? TEMP_VAL_REG : TEMP_VAL_MEM);
1795    }
1796    for (n = s->nb_temps; i < n; i++) {
1797        ts = &s->temps[i];
1798        ts->val_type = (ts->temp_local ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
1799        ts->mem_allocated = 0;
1800        ts->fixed_reg = 0;
1801    }
1802
1803    memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1804}
1805
1806static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1807                                 TCGTemp *ts)
1808{
1809    int idx = temp_idx(ts);
1810
1811    if (ts->temp_global) {
1812        pstrcpy(buf, buf_size, ts->name);
1813    } else if (ts->temp_local) {
1814        snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1815    } else {
1816        snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1817    }
1818    return buf;
1819}
1820
1821static char *tcg_get_arg_str(TCGContext *s, char *buf,
1822                             int buf_size, TCGArg arg)
1823{
1824    return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1825}
1826
1827/* Find helper name.  */
1828static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
1829{
1830    const char *ret = NULL;
1831    if (helper_table) {
1832        TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
1833        if (info) {
1834            ret = info->name;
1835        }
1836    }
1837    return ret;
1838}
1839
1840static const char * const cond_name[] =
1841{
1842    [TCG_COND_NEVER] = "never",
1843    [TCG_COND_ALWAYS] = "always",
1844    [TCG_COND_EQ] = "eq",
1845    [TCG_COND_NE] = "ne",
1846    [TCG_COND_LT] = "lt",
1847    [TCG_COND_GE] = "ge",
1848    [TCG_COND_LE] = "le",
1849    [TCG_COND_GT] = "gt",
1850    [TCG_COND_LTU] = "ltu",
1851    [TCG_COND_GEU] = "geu",
1852    [TCG_COND_LEU] = "leu",
1853    [TCG_COND_GTU] = "gtu"
1854};
1855
1856static const char * const ldst_name[] =
1857{
1858    [MO_UB]   = "ub",
1859    [MO_SB]   = "sb",
1860    [MO_LEUW] = "leuw",
1861    [MO_LESW] = "lesw",
1862    [MO_LEUL] = "leul",
1863    [MO_LESL] = "lesl",
1864    [MO_LEQ]  = "leq",
1865    [MO_BEUW] = "beuw",
1866    [MO_BESW] = "besw",
1867    [MO_BEUL] = "beul",
1868    [MO_BESL] = "besl",
1869    [MO_BEQ]  = "beq",
1870};
1871
1872static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1873#ifdef ALIGNED_ONLY
1874    [MO_UNALN >> MO_ASHIFT]    = "un+",
1875    [MO_ALIGN >> MO_ASHIFT]    = "",
1876#else
1877    [MO_UNALN >> MO_ASHIFT]    = "",
1878    [MO_ALIGN >> MO_ASHIFT]    = "al+",
1879#endif
1880    [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
1881    [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
1882    [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
1883    [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1884    [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1885    [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1886};
1887
1888void tcg_dump_ops(TCGContext *s)
1889{
1890    char buf[128];
1891    TCGOp *op;
1892
1893    QTAILQ_FOREACH(op, &s->ops, link) {
1894        int i, k, nb_oargs, nb_iargs, nb_cargs;
1895        const TCGOpDef *def;
1896        TCGOpcode c;
1897        int col = 0;
1898
1899        c = op->opc;
1900        def = &tcg_op_defs[c];
1901
1902        if (c == INDEX_op_insn_start) {
1903            col += qemu_log("\n ----");
1904
1905            for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
1906                target_ulong a;
1907#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1908                a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
1909#else
1910                a = op->args[i];
1911#endif
1912                col += qemu_log(" " TARGET_FMT_lx, a);
1913            }
1914        } else if (c == INDEX_op_call) {
1915            /* variable number of arguments */
1916            nb_oargs = TCGOP_CALLO(op);
1917            nb_iargs = TCGOP_CALLI(op);
1918            nb_cargs = def->nb_cargs;
1919
1920            /* function name, flags, out args */
1921            col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
1922                            tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
1923                            op->args[nb_oargs + nb_iargs + 1], nb_oargs);
1924            for (i = 0; i < nb_oargs; i++) {
1925                col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
1926                                                       op->args[i]));
1927            }
1928            for (i = 0; i < nb_iargs; i++) {
1929                TCGArg arg = op->args[nb_oargs + i];
1930                const char *t = "<dummy>";
1931                if (arg != TCG_CALL_DUMMY_ARG) {
1932                    t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
1933                }
1934                col += qemu_log(",%s", t);
1935            }
1936        } else {
1937            col += qemu_log(" %s ", def->name);
1938
1939            nb_oargs = def->nb_oargs;
1940            nb_iargs = def->nb_iargs;
1941            nb_cargs = def->nb_cargs;
1942
1943            if (def->flags & TCG_OPF_VECTOR) {
1944                col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
1945                                8 << TCGOP_VECE(op));
1946            }
1947
1948            k = 0;
1949            for (i = 0; i < nb_oargs; i++) {
1950                if (k != 0) {
1951                    col += qemu_log(",");
1952                }
1953                col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1954                                                      op->args[k++]));
1955            }
1956            for (i = 0; i < nb_iargs; i++) {
1957                if (k != 0) {
1958                    col += qemu_log(",");
1959                }
1960                col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1961                                                      op->args[k++]));
1962            }
1963            switch (c) {
1964            case INDEX_op_brcond_i32:
1965            case INDEX_op_setcond_i32:
1966            case INDEX_op_movcond_i32:
1967            case INDEX_op_brcond2_i32:
1968            case INDEX_op_setcond2_i32:
1969            case INDEX_op_brcond_i64:
1970            case INDEX_op_setcond_i64:
1971            case INDEX_op_movcond_i64:
1972            case INDEX_op_cmp_vec:
1973                if (op->args[k] < ARRAY_SIZE(cond_name)
1974                    && cond_name[op->args[k]]) {
1975                    col += qemu_log(",%s", cond_name[op->args[k++]]);
1976                } else {
1977                    col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
1978                }
1979                i = 1;
1980                break;
1981            case INDEX_op_qemu_ld_i32:
1982            case INDEX_op_qemu_st_i32:
1983            case INDEX_op_qemu_ld_i64:
1984            case INDEX_op_qemu_st_i64:
1985                {
1986                    TCGMemOpIdx oi = op->args[k++];
1987                    TCGMemOp op = get_memop(oi);
1988                    unsigned ix = get_mmuidx(oi);
1989
1990                    if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
1991                        col += qemu_log(",$0x%x,%u", op, ix);
1992                    } else {
1993                        const char *s_al, *s_op;
1994                        s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
1995                        s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
1996                        col += qemu_log(",%s%s,%u", s_al, s_op, ix);
1997                    }
1998                    i = 1;
1999                }
2000                break;
2001            default:
2002                i = 0;
2003                break;
2004            }
2005            switch (c) {
2006            case INDEX_op_set_label:
2007            case INDEX_op_br:
2008            case INDEX_op_brcond_i32:
2009            case INDEX_op_brcond_i64:
2010            case INDEX_op_brcond2_i32:
2011                col += qemu_log("%s$L%d", k ? "," : "",
2012                                arg_label(op->args[k])->id);
2013                i++, k++;
2014                break;
2015            default:
2016                break;
2017            }
2018            for (; i < nb_cargs; i++, k++) {
2019                col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
2020            }
2021        }
2022        if (op->life) {
2023            unsigned life = op->life;
2024
2025            for (; col < 48; ++col) {
2026                putc(' ', qemu_logfile);
2027            }
2028
2029            if (life & (SYNC_ARG * 3)) {
2030                qemu_log("  sync:");
2031                for (i = 0; i < 2; ++i) {
2032                    if (life & (SYNC_ARG << i)) {
2033                        qemu_log(" %d", i);
2034                    }
2035                }
2036            }
2037            life /= DEAD_ARG;
2038            if (life) {
2039                qemu_log("  dead:");
2040                for (i = 0; life; ++i, life >>= 1) {
2041                    if (life & 1) {
2042                        qemu_log(" %d", i);
2043                    }
2044                }
2045            }
2046        }
2047        qemu_log("\n");
2048    }
2049}
2050
2051/* we give more priority to constraints with less registers */
2052static int get_constraint_priority(const TCGOpDef *def, int k)
2053{
2054    const TCGArgConstraint *arg_ct;
2055
2056    int i, n;
2057    arg_ct = &def->args_ct[k];
2058    if (arg_ct->ct & TCG_CT_ALIAS) {
2059        /* an alias is equivalent to a single register */
2060        n = 1;
2061    } else {
2062        if (!(arg_ct->ct & TCG_CT_REG))
2063            return 0;
2064        n = 0;
2065        for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
2066            if (tcg_regset_test_reg(arg_ct->u.regs, i))
2067                n++;
2068        }
2069    }
2070    return TCG_TARGET_NB_REGS - n + 1;
2071}
2072
2073/* sort from highest priority to lowest */
2074static void sort_constraints(TCGOpDef *def, int start, int n)
2075{
2076    int i, j, p1, p2, tmp;
2077
2078    for(i = 0; i < n; i++)
2079        def->sorted_args[start + i] = start + i;
2080    if (n <= 1)
2081        return;
2082    for(i = 0; i < n - 1; i++) {
2083        for(j = i + 1; j < n; j++) {
2084            p1 = get_constraint_priority(def, def->sorted_args[start + i]);
2085            p2 = get_constraint_priority(def, def->sorted_args[start + j]);
2086            if (p1 < p2) {
2087                tmp = def->sorted_args[start + i];
2088                def->sorted_args[start + i] = def->sorted_args[start + j];
2089                def->sorted_args[start + j] = tmp;
2090            }
2091        }
2092    }
2093}
2094
2095static void process_op_defs(TCGContext *s)
2096{
2097    TCGOpcode op;
2098
2099    for (op = 0; op < NB_OPS; op++) {
2100        TCGOpDef *def = &tcg_op_defs[op];
2101        const TCGTargetOpDef *tdefs;
2102        TCGType type;
2103        int i, nb_args;
2104
2105        if (def->flags & TCG_OPF_NOT_PRESENT) {
2106            continue;
2107        }
2108
2109        nb_args = def->nb_iargs + def->nb_oargs;
2110        if (nb_args == 0) {
2111            continue;
2112        }
2113
2114        tdefs = tcg_target_op_def(op);
2115        /* Missing TCGTargetOpDef entry. */
2116        tcg_debug_assert(tdefs != NULL);
2117
2118        type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32);
2119        for (i = 0; i < nb_args; i++) {
2120            const char *ct_str = tdefs->args_ct_str[i];
2121            /* Incomplete TCGTargetOpDef entry. */
2122            tcg_debug_assert(ct_str != NULL);
2123
2124            def->args_ct[i].u.regs = 0;
2125            def->args_ct[i].ct = 0;
2126            while (*ct_str != '\0') {
2127                switch(*ct_str) {
2128                case '0' ... '9':
2129                    {
2130                        int oarg = *ct_str - '0';
2131                        tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2132                        tcg_debug_assert(oarg < def->nb_oargs);
2133                        tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG);
2134                        /* TCG_CT_ALIAS is for the output arguments.
2135                           The input is tagged with TCG_CT_IALIAS. */
2136                        def->args_ct[i] = def->args_ct[oarg];
2137                        def->args_ct[oarg].ct |= TCG_CT_ALIAS;
2138                        def->args_ct[oarg].alias_index = i;
2139                        def->args_ct[i].ct |= TCG_CT_IALIAS;
2140                        def->args_ct[i].alias_index = oarg;
2141                    }
2142                    ct_str++;
2143                    break;
2144                case '&':
2145                    def->args_ct[i].ct |= TCG_CT_NEWREG;
2146                    ct_str++;
2147                    break;
2148                case 'i':
2149                    def->args_ct[i].ct |= TCG_CT_CONST;
2150                    ct_str++;
2151                    break;
2152                default:
2153                    ct_str = target_parse_constraint(&def->args_ct[i],
2154                                                     ct_str, type);
2155                    /* Typo in TCGTargetOpDef constraint. */
2156                    tcg_debug_assert(ct_str != NULL);
2157                }
2158            }
2159        }
2160
2161        /* TCGTargetOpDef entry with too much information? */
2162        tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2163
2164        /* sort the constraints (XXX: this is just an heuristic) */
2165        sort_constraints(def, 0, def->nb_oargs);
2166        sort_constraints(def, def->nb_oargs, def->nb_iargs);
2167    }
2168}
2169
2170void tcg_op_remove(TCGContext *s, TCGOp *op)
2171{
2172    QTAILQ_REMOVE(&s->ops, op, link);
2173    QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2174    s->nb_ops--;
2175
2176#ifdef CONFIG_PROFILER
2177    atomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2178#endif
2179}
2180
2181static TCGOp *tcg_op_alloc(TCGOpcode opc)
2182{
2183    TCGContext *s = tcg_ctx;
2184    TCGOp *op;
2185
2186    if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2187        op = tcg_malloc(sizeof(TCGOp));
2188    } else {
2189        op = QTAILQ_FIRST(&s->free_ops);
2190        QTAILQ_REMOVE(&s->free_ops, op, link);
2191    }
2192    memset(op, 0, offsetof(TCGOp, link));
2193    op->opc = opc;
2194    s->nb_ops++;
2195
2196    return op;
2197}
2198
2199TCGOp *tcg_emit_op(TCGOpcode opc)
2200{
2201    TCGOp *op = tcg_op_alloc(opc);
2202    QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2203    return op;
2204}
2205
2206TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
2207                            TCGOpcode opc, int nargs)
2208{
2209    TCGOp *new_op = tcg_op_alloc(opc);
2210    QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2211    return new_op;
2212}
2213
2214TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
2215                           TCGOpcode opc, int nargs)
2216{
2217    TCGOp *new_op = tcg_op_alloc(opc);
2218    QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2219    return new_op;
2220}
2221
2222#define TS_DEAD  1
2223#define TS_MEM   2
2224
2225#define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2226#define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2227
2228/* liveness analysis: end of function: all temps are dead, and globals
2229   should be in memory. */
2230static void tcg_la_func_end(TCGContext *s)
2231{
2232    int ng = s->nb_globals;
2233    int nt = s->nb_temps;
2234    int i;
2235
2236    for (i = 0; i < ng; ++i) {
2237        s->temps[i].state = TS_DEAD | TS_MEM;
2238    }
2239    for (i = ng; i < nt; ++i) {
2240        s->temps[i].state = TS_DEAD;
2241    }
2242}
2243
2244/* liveness analysis: end of basic block: all temps are dead, globals
2245   and local temps should be in memory. */
2246static void tcg_la_bb_end(TCGContext *s)
2247{
2248    int ng = s->nb_globals;
2249    int nt = s->nb_temps;
2250    int i;
2251
2252    for (i = 0; i < ng; ++i) {
2253        s->temps[i].state = TS_DEAD | TS_MEM;
2254    }
2255    for (i = ng; i < nt; ++i) {
2256        s->temps[i].state = (s->temps[i].temp_local
2257                             ? TS_DEAD | TS_MEM
2258                             : TS_DEAD);
2259    }
2260}
2261
2262/* Liveness analysis : update the opc_arg_life array to tell if a
2263   given input arguments is dead. Instructions updating dead
2264   temporaries are removed. */
2265static void liveness_pass_1(TCGContext *s)
2266{
2267    int nb_globals = s->nb_globals;
2268    TCGOp *op, *op_prev;
2269
2270    tcg_la_func_end(s);
2271
2272    QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, TCGOpHead, link, op_prev) {
2273        int i, nb_iargs, nb_oargs;
2274        TCGOpcode opc_new, opc_new2;
2275        bool have_opc_new2;
2276        TCGLifeData arg_life = 0;
2277        TCGTemp *arg_ts;
2278        TCGOpcode opc = op->opc;
2279        const TCGOpDef *def = &tcg_op_defs[opc];
2280
2281        switch (opc) {
2282        case INDEX_op_call:
2283            {
2284                int call_flags;
2285
2286                nb_oargs = TCGOP_CALLO(op);
2287                nb_iargs = TCGOP_CALLI(op);
2288                call_flags = op->args[nb_oargs + nb_iargs + 1];
2289
2290                /* pure functions can be removed if their result is unused */
2291                if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2292                    for (i = 0; i < nb_oargs; i++) {
2293                        arg_ts = arg_temp(op->args[i]);
2294                        if (arg_ts->state != TS_DEAD) {
2295                            goto do_not_remove_call;
2296                        }
2297                    }
2298                    goto do_remove;
2299                } else {
2300                do_not_remove_call:
2301
2302                    /* output args are dead */
2303                    for (i = 0; i < nb_oargs; i++) {
2304                        arg_ts = arg_temp(op->args[i]);
2305                        if (arg_ts->state & TS_DEAD) {
2306                            arg_life |= DEAD_ARG << i;
2307                        }
2308                        if (arg_ts->state & TS_MEM) {
2309                            arg_life |= SYNC_ARG << i;
2310                        }
2311                        arg_ts->state = TS_DEAD;
2312                    }
2313
2314                    if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2315                                        TCG_CALL_NO_READ_GLOBALS))) {
2316                        /* globals should go back to memory */
2317                        for (i = 0; i < nb_globals; i++) {
2318                            s->temps[i].state = TS_DEAD | TS_MEM;
2319                        }
2320                    } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2321                        /* globals should be synced to memory */
2322                        for (i = 0; i < nb_globals; i++) {
2323                            s->temps[i].state |= TS_MEM;
2324                        }
2325                    }
2326
2327                    /* record arguments that die in this helper */
2328                    for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2329                        arg_ts = arg_temp(op->args[i]);
2330                        if (arg_ts && arg_ts->state & TS_DEAD) {
2331                            arg_life |= DEAD_ARG << i;
2332                        }
2333                    }
2334                    /* input arguments are live for preceding opcodes */
2335                    for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2336                        arg_ts = arg_temp(op->args[i]);
2337                        if (arg_ts) {
2338                            arg_ts->state &= ~TS_DEAD;
2339                        }
2340                    }
2341                }
2342            }
2343            break;
2344        case INDEX_op_insn_start:
2345            break;
2346        case INDEX_op_discard:
2347            /* mark the temporary as dead */
2348            arg_temp(op->args[0])->state = TS_DEAD;
2349            break;
2350
2351        case INDEX_op_add2_i32:
2352            opc_new = INDEX_op_add_i32;
2353            goto do_addsub2;
2354        case INDEX_op_sub2_i32:
2355            opc_new = INDEX_op_sub_i32;
2356            goto do_addsub2;
2357        case INDEX_op_add2_i64:
2358            opc_new = INDEX_op_add_i64;
2359            goto do_addsub2;
2360        case INDEX_op_sub2_i64:
2361            opc_new = INDEX_op_sub_i64;
2362        do_addsub2:
2363            nb_iargs = 4;
2364            nb_oargs = 2;
2365            /* Test if the high part of the operation is dead, but not
2366               the low part.  The result can be optimized to a simple
2367               add or sub.  This happens often for x86_64 guest when the
2368               cpu mode is set to 32 bit.  */
2369            if (arg_temp(op->args[1])->state == TS_DEAD) {
2370                if (arg_temp(op->args[0])->state == TS_DEAD) {
2371                    goto do_remove;
2372                }
2373                /* Replace the opcode and adjust the args in place,
2374                   leaving 3 unused args at the end.  */
2375                op->opc = opc = opc_new;
2376                op->args[1] = op->args[2];
2377                op->args[2] = op->args[4];
2378                /* Fall through and mark the single-word operation live.  */
2379                nb_iargs = 2;
2380                nb_oargs = 1;
2381            }
2382            goto do_not_remove;
2383
2384        case INDEX_op_mulu2_i32:
2385            opc_new = INDEX_op_mul_i32;
2386            opc_new2 = INDEX_op_muluh_i32;
2387            have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2388            goto do_mul2;
2389        case INDEX_op_muls2_i32:
2390            opc_new = INDEX_op_mul_i32;
2391            opc_new2 = INDEX_op_mulsh_i32;
2392            have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2393            goto do_mul2;
2394        case INDEX_op_mulu2_i64:
2395            opc_new = INDEX_op_mul_i64;
2396            opc_new2 = INDEX_op_muluh_i64;
2397            have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2398            goto do_mul2;
2399        case INDEX_op_muls2_i64:
2400            opc_new = INDEX_op_mul_i64;
2401            opc_new2 = INDEX_op_mulsh_i64;
2402            have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2403            goto do_mul2;
2404        do_mul2:
2405            nb_iargs = 2;
2406            nb_oargs = 2;
2407            if (arg_temp(op->args[1])->state == TS_DEAD) {
2408                if (arg_temp(op->args[0])->state == TS_DEAD) {
2409                    /* Both parts of the operation are dead.  */
2410                    goto do_remove;
2411                }
2412                /* The high part of the operation is dead; generate the low. */
2413                op->opc = opc = opc_new;
2414                op->args[1] = op->args[2];
2415                op->args[2] = op->args[3];
2416            } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2417                /* The low part of the operation is dead; generate the high. */
2418                op->opc = opc = opc_new2;
2419                op->args[0] = op->args[1];
2420                op->args[1] = op->args[2];
2421                op->args[2] = op->args[3];
2422            } else {
2423                goto do_not_remove;
2424            }
2425            /* Mark the single-word operation live.  */
2426            nb_oargs = 1;
2427            goto do_not_remove;
2428
2429        default:
2430            /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2431            nb_iargs = def->nb_iargs;
2432            nb_oargs = def->nb_oargs;
2433
2434            /* Test if the operation can be removed because all
2435               its outputs are dead. We assume that nb_oargs == 0
2436               implies side effects */
2437            if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2438                for (i = 0; i < nb_oargs; i++) {
2439                    if (arg_temp(op->args[i])->state != TS_DEAD) {
2440                        goto do_not_remove;
2441                    }
2442                }
2443            do_remove:
2444                tcg_op_remove(s, op);
2445            } else {
2446            do_not_remove:
2447                /* output args are dead */
2448                for (i = 0; i < nb_oargs; i++) {
2449                    arg_ts = arg_temp(op->args[i]);
2450                    if (arg_ts->state & TS_DEAD) {
2451                        arg_life |= DEAD_ARG << i;
2452                    }
2453                    if (arg_ts->state & TS_MEM) {
2454                        arg_life |= SYNC_ARG << i;
2455                    }
2456                    arg_ts->state = TS_DEAD;
2457                }
2458
2459                /* if end of basic block, update */
2460                if (def->flags & TCG_OPF_BB_END) {
2461                    tcg_la_bb_end(s);
2462                } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2463                    /* globals should be synced to memory */
2464                    for (i = 0; i < nb_globals; i++) {
2465                        s->temps[i].state |= TS_MEM;
2466                    }
2467                }
2468
2469                /* record arguments that die in this opcode */
2470                for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2471                    arg_ts = arg_temp(op->args[i]);
2472                    if (arg_ts->state & TS_DEAD) {
2473                        arg_life |= DEAD_ARG << i;
2474                    }
2475                }
2476                /* input arguments are live for preceding opcodes */
2477                for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2478                    arg_temp(op->args[i])->state &= ~TS_DEAD;
2479                }
2480            }
2481            break;
2482        }
2483        op->life = arg_life;
2484    }
2485}
2486
2487/* Liveness analysis: Convert indirect regs to direct temporaries.  */
2488static bool liveness_pass_2(TCGContext *s)
2489{
2490    int nb_globals = s->nb_globals;
2491    int nb_temps, i;
2492    bool changes = false;
2493    TCGOp *op, *op_next;
2494
2495    /* Create a temporary for each indirect global.  */
2496    for (i = 0; i < nb_globals; ++i) {
2497        TCGTemp *its = &s->temps[i];
2498        if (its->indirect_reg) {
2499            TCGTemp *dts = tcg_temp_alloc(s);
2500            dts->type = its->type;
2501            dts->base_type = its->base_type;
2502            its->state_ptr = dts;
2503        } else {
2504            its->state_ptr = NULL;
2505        }
2506        /* All globals begin dead.  */
2507        its->state = TS_DEAD;
2508    }
2509    for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2510        TCGTemp *its = &s->temps[i];
2511        its->state_ptr = NULL;
2512        its->state = TS_DEAD;
2513    }
2514
2515    QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2516        TCGOpcode opc = op->opc;
2517        const TCGOpDef *def = &tcg_op_defs[opc];
2518        TCGLifeData arg_life = op->life;
2519        int nb_iargs, nb_oargs, call_flags;
2520        TCGTemp *arg_ts, *dir_ts;
2521
2522        if (opc == INDEX_op_call) {
2523            nb_oargs = TCGOP_CALLO(op);
2524            nb_iargs = TCGOP_CALLI(op);
2525            call_flags = op->args[nb_oargs + nb_iargs + 1];
2526        } else {
2527            nb_iargs = def->nb_iargs;
2528            nb_oargs = def->nb_oargs;
2529
2530            /* Set flags similar to how calls require.  */
2531            if (def->flags & TCG_OPF_BB_END) {
2532                /* Like writing globals: save_globals */
2533                call_flags = 0;
2534            } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2535                /* Like reading globals: sync_globals */
2536                call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2537            } else {
2538                /* No effect on globals.  */
2539                call_flags = (TCG_CALL_NO_READ_GLOBALS |
2540                              TCG_CALL_NO_WRITE_GLOBALS);
2541            }
2542        }
2543
2544        /* Make sure that input arguments are available.  */
2545        for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2546            arg_ts = arg_temp(op->args[i]);
2547            if (arg_ts) {
2548                dir_ts = arg_ts->state_ptr;
2549                if (dir_ts && arg_ts->state == TS_DEAD) {
2550                    TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2551                                      ? INDEX_op_ld_i32
2552                                      : INDEX_op_ld_i64);
2553                    TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
2554
2555                    lop->args[0] = temp_arg(dir_ts);
2556                    lop->args[1] = temp_arg(arg_ts->mem_base);
2557                    lop->args[2] = arg_ts->mem_offset;
2558
2559                    /* Loaded, but synced with memory.  */
2560                    arg_ts->state = TS_MEM;
2561                }
2562            }
2563        }
2564
2565        /* Perform input replacement, and mark inputs that became dead.
2566           No action is required except keeping temp_state up to date
2567           so that we reload when needed.  */
2568        for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2569            arg_ts = arg_temp(op->args[i]);
2570            if (arg_ts) {
2571                dir_ts = arg_ts->state_ptr;
2572                if (dir_ts) {
2573                    op->args[i] = temp_arg(dir_ts);
2574                    changes = true;
2575                    if (IS_DEAD_ARG(i)) {
2576                        arg_ts->state = TS_DEAD;
2577                    }
2578                }
2579            }
2580        }
2581
2582        /* Liveness analysis should ensure that the following are
2583           all correct, for call sites and basic block end points.  */
2584        if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2585            /* Nothing to do */
2586        } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2587            for (i = 0; i < nb_globals; ++i) {
2588                /* Liveness should see that globals are synced back,
2589                   that is, either TS_DEAD or TS_MEM.  */
2590                arg_ts = &s->temps[i];
2591                tcg_debug_assert(arg_ts->state_ptr == 0
2592                                 || arg_ts->state != 0);
2593            }
2594        } else {
2595            for (i = 0; i < nb_globals; ++i) {
2596                /* Liveness should see that globals are saved back,
2597                   that is, TS_DEAD, waiting to be reloaded.  */
2598                arg_ts = &s->temps[i];
2599                tcg_debug_assert(arg_ts->state_ptr == 0
2600                                 || arg_ts->state == TS_DEAD);
2601            }
2602        }
2603
2604        /* Outputs become available.  */
2605        for (i = 0; i < nb_oargs; i++) {
2606            arg_ts = arg_temp(op->args[i]);
2607            dir_ts = arg_ts->state_ptr;
2608            if (!dir_ts) {
2609                continue;
2610            }
2611            op->args[i] = temp_arg(dir_ts);
2612            changes = true;
2613
2614            /* The output is now live and modified.  */
2615            arg_ts->state = 0;
2616
2617            /* Sync outputs upon their last write.  */
2618            if (NEED_SYNC_ARG(i)) {
2619                TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2620                                  ? INDEX_op_st_i32
2621                                  : INDEX_op_st_i64);
2622                TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
2623
2624                sop->args[0] = temp_arg(dir_ts);
2625                sop->args[1] = temp_arg(arg_ts->mem_base);
2626                sop->args[2] = arg_ts->mem_offset;
2627
2628                arg_ts->state = TS_MEM;
2629            }
2630            /* Drop outputs that are dead.  */
2631            if (IS_DEAD_ARG(i)) {
2632                arg_ts->state = TS_DEAD;
2633            }
2634        }
2635    }
2636
2637    return changes;
2638}
2639
2640#ifdef CONFIG_DEBUG_TCG
2641static void dump_regs(TCGContext *s)
2642{
2643    TCGTemp *ts;
2644    int i;
2645    char buf[64];
2646
2647    for(i = 0; i < s->nb_temps; i++) {
2648        ts = &s->temps[i];
2649        printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2650        switch(ts->val_type) {
2651        case TEMP_VAL_REG:
2652            printf("%s", tcg_target_reg_names[ts->reg]);
2653            break;
2654        case TEMP_VAL_MEM:
2655            printf("%d(%s)", (int)ts->mem_offset,
2656                   tcg_target_reg_names[ts->mem_base->reg]);
2657            break;
2658        case TEMP_VAL_CONST:
2659            printf("$0x%" TCG_PRIlx, ts->val);
2660            break;
2661        case TEMP_VAL_DEAD:
2662            printf("D");
2663            break;
2664        default:
2665            printf("???");
2666            break;
2667        }
2668        printf("\n");
2669    }
2670
2671    for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
2672        if (s->reg_to_temp[i] != NULL) {
2673            printf("%s: %s\n", 
2674                   tcg_target_reg_names[i], 
2675                   tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
2676        }
2677    }
2678}
2679
2680static void check_regs(TCGContext *s)
2681{
2682    int reg;
2683    int k;
2684    TCGTemp *ts;
2685    char buf[64];
2686
2687    for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
2688        ts = s->reg_to_temp[reg];
2689        if (ts != NULL) {
2690            if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
2691                printf("Inconsistency for register %s:\n", 
2692                       tcg_target_reg_names[reg]);
2693                goto fail;
2694            }
2695        }
2696    }
2697    for (k = 0; k < s->nb_temps; k++) {
2698        ts = &s->temps[k];
2699        if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg
2700            && s->reg_to_temp[ts->reg] != ts) {
2701            printf("Inconsistency for temp %s:\n",
2702                   tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2703        fail:
2704            printf("reg state:\n");
2705            dump_regs(s);
2706            tcg_abort();
2707        }
2708    }
2709}
2710#endif
2711
2712static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
2713{
2714#if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
2715    /* Sparc64 stack is accessed with offset of 2047 */
2716    s->current_frame_offset = (s->current_frame_offset +
2717                               (tcg_target_long)sizeof(tcg_target_long) - 1) &
2718        ~(sizeof(tcg_target_long) - 1);
2719#endif
2720    if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
2721        s->frame_end) {
2722        tcg_abort();
2723    }
2724    ts->mem_offset = s->current_frame_offset;
2725    ts->mem_base = s->frame_temp;
2726    ts->mem_allocated = 1;
2727    s->current_frame_offset += sizeof(tcg_target_long);
2728}
2729
2730static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet);
2731
2732/* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
2733   mark it free; otherwise mark it dead.  */
2734static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
2735{
2736    if (ts->fixed_reg) {
2737        return;
2738    }
2739    if (ts->val_type == TEMP_VAL_REG) {
2740        s->reg_to_temp[ts->reg] = NULL;
2741    }
2742    ts->val_type = (free_or_dead < 0
2743                    || ts->temp_local
2744                    || ts->temp_global
2745                    ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
2746}
2747
2748/* Mark a temporary as dead.  */
2749static inline void temp_dead(TCGContext *s, TCGTemp *ts)
2750{
2751    temp_free_or_dead(s, ts, 1);
2752}
2753
2754/* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
2755   registers needs to be allocated to store a constant.  If 'free_or_dead'
2756   is non-zero, subsequently release the temporary; if it is positive, the
2757   temp is dead; if it is negative, the temp is free.  */
2758static void temp_sync(TCGContext *s, TCGTemp *ts,
2759                      TCGRegSet allocated_regs, int free_or_dead)
2760{
2761    if (ts->fixed_reg) {
2762        return;
2763    }
2764    if (!ts->mem_coherent) {
2765        if (!ts->mem_allocated) {
2766            temp_allocate_frame(s, ts);
2767        }
2768        switch (ts->val_type) {
2769        case TEMP_VAL_CONST:
2770            /* If we're going to free the temp immediately, then we won't
2771               require it later in a register, so attempt to store the
2772               constant to memory directly.  */
2773            if (free_or_dead
2774                && tcg_out_sti(s, ts->type, ts->val,
2775                               ts->mem_base->reg, ts->mem_offset)) {
2776                break;
2777            }
2778            temp_load(s, ts, tcg_target_available_regs[ts->type],
2779                      allocated_regs);
2780            /* fallthrough */
2781
2782        case TEMP_VAL_REG:
2783            tcg_out_st(s, ts->type, ts->reg,
2784                       ts->mem_base->reg, ts->mem_offset);
2785            break;
2786
2787        case TEMP_VAL_MEM:
2788            break;
2789
2790        case TEMP_VAL_DEAD:
2791        default:
2792            tcg_abort();
2793        }
2794        ts->mem_coherent = 1;
2795    }
2796    if (free_or_dead) {
2797        temp_free_or_dead(s, ts, free_or_dead);
2798    }
2799}
2800
2801/* free register 'reg' by spilling the corresponding temporary if necessary */
2802static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
2803{
2804    TCGTemp *ts = s->reg_to_temp[reg];
2805    if (ts != NULL) {
2806        temp_sync(s, ts, allocated_regs, -1);
2807    }
2808}
2809
2810/* Allocate a register belonging to reg1 & ~reg2 */
2811static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet desired_regs,
2812                            TCGRegSet allocated_regs, bool rev)
2813{
2814    int i, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
2815    const int *order;
2816    TCGReg reg;
2817    TCGRegSet reg_ct;
2818
2819    reg_ct = desired_regs & ~allocated_regs;
2820    order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
2821
2822    /* first try free registers */
2823    for(i = 0; i < n; i++) {
2824        reg = order[i];
2825        if (tcg_regset_test_reg(reg_ct, reg) && s->reg_to_temp[reg] == NULL)
2826            return reg;
2827    }
2828
2829    /* XXX: do better spill choice */
2830    for(i = 0; i < n; i++) {
2831        reg = order[i];
2832        if (tcg_regset_test_reg(reg_ct, reg)) {
2833            tcg_reg_free(s, reg, allocated_regs);
2834            return reg;
2835        }
2836    }
2837
2838    tcg_abort();
2839}
2840
2841/* Make sure the temporary is in a register.  If needed, allocate the register
2842   from DESIRED while avoiding ALLOCATED.  */
2843static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
2844                      TCGRegSet allocated_regs)
2845{
2846    TCGReg reg;
2847
2848    switch (ts->val_type) {
2849    case TEMP_VAL_REG:
2850        return;
2851    case TEMP_VAL_CONST:
2852        reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base);
2853        tcg_out_movi(s, ts->type, reg, ts->val);
2854        ts->mem_coherent = 0;
2855        break;
2856    case TEMP_VAL_MEM:
2857        reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base);
2858        tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
2859        ts->mem_coherent = 1;
2860        break;
2861    case TEMP_VAL_DEAD:
2862    default:
2863        tcg_abort();
2864    }
2865    ts->reg = reg;
2866    ts->val_type = TEMP_VAL_REG;
2867    s->reg_to_temp[reg] = ts;
2868}
2869
2870/* Save a temporary to memory. 'allocated_regs' is used in case a
2871   temporary registers needs to be allocated to store a constant.  */
2872static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
2873{
2874    /* The liveness analysis already ensures that globals are back
2875       in memory. Keep an tcg_debug_assert for safety. */
2876    tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
2877}
2878
2879/* save globals to their canonical location and assume they can be
2880   modified be the following code. 'allocated_regs' is used in case a
2881   temporary registers needs to be allocated to store a constant. */
2882static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
2883{
2884    int i, n;
2885
2886    for (i = 0, n = s->nb_globals; i < n; i++) {
2887        temp_save(s, &s->temps[i], allocated_regs);
2888    }
2889}
2890
2891/* sync globals to their canonical location and assume they can be
2892   read by the following code. 'allocated_regs' is used in case a
2893   temporary registers needs to be allocated to store a constant. */
2894static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
2895{
2896    int i, n;
2897
2898    for (i = 0, n = s->nb_globals; i < n; i++) {
2899        TCGTemp *ts = &s->temps[i];
2900        tcg_debug_assert(ts->val_type != TEMP_VAL_REG
2901                         || ts->fixed_reg
2902                         || ts->mem_coherent);
2903    }
2904}
2905
2906/* at the end of a basic block, we assume all temporaries are dead and
2907   all globals are stored at their canonical location. */
2908static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
2909{
2910    int i;
2911
2912    for (i = s->nb_globals; i < s->nb_temps; i++) {
2913        TCGTemp *ts = &s->temps[i];
2914        if (ts->temp_local) {
2915            temp_save(s, ts, allocated_regs);
2916        } else {
2917            /* The liveness analysis already ensures that temps are dead.
2918               Keep an tcg_debug_assert for safety. */
2919            tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
2920        }
2921    }
2922
2923    save_globals(s, allocated_regs);
2924}
2925
2926static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
2927                                  tcg_target_ulong val, TCGLifeData arg_life)
2928{
2929    if (ots->fixed_reg) {
2930        /* For fixed registers, we do not do any constant propagation.  */
2931        tcg_out_movi(s, ots->type, ots->reg, val);
2932        return;
2933    }
2934
2935    /* The movi is not explicitly generated here.  */
2936    if (ots->val_type == TEMP_VAL_REG) {
2937        s->reg_to_temp[ots->reg] = NULL;
2938    }
2939    ots->val_type = TEMP_VAL_CONST;
2940    ots->val = val;
2941    ots->mem_coherent = 0;
2942    if (NEED_SYNC_ARG(0)) {
2943        temp_sync(s, ots, s->reserved_regs, IS_DEAD_ARG(0));
2944    } else if (IS_DEAD_ARG(0)) {
2945        temp_dead(s, ots);
2946    }
2947}
2948
2949static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op)
2950{
2951    TCGTemp *ots = arg_temp(op->args[0]);
2952    tcg_target_ulong val = op->args[1];
2953
2954    tcg_reg_alloc_do_movi(s, ots, val, op->life);
2955}
2956
2957static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
2958{
2959    const TCGLifeData arg_life = op->life;
2960    TCGRegSet allocated_regs;
2961    TCGTemp *ts, *ots;
2962    TCGType otype, itype;
2963
2964    allocated_regs = s->reserved_regs;
2965    ots = arg_temp(op->args[0]);
2966    ts = arg_temp(op->args[1]);
2967
2968    /* Note that otype != itype for no-op truncation.  */
2969    otype = ots->type;
2970    itype = ts->type;
2971
2972    if (ts->val_type == TEMP_VAL_CONST) {
2973        /* propagate constant or generate sti */
2974        tcg_target_ulong val = ts->val;
2975        if (IS_DEAD_ARG(1)) {
2976            temp_dead(s, ts);
2977        }
2978        tcg_reg_alloc_do_movi(s, ots, val, arg_life);
2979        return;
2980    }
2981
2982    /* If the source value is in memory we're going to be forced
2983       to have it in a register in order to perform the copy.  Copy
2984       the SOURCE value into its own register first, that way we
2985       don't have to reload SOURCE the next time it is used. */
2986    if (ts->val_type == TEMP_VAL_MEM) {
2987        temp_load(s, ts, tcg_target_available_regs[itype], allocated_regs);
2988    }
2989
2990    tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
2991    if (IS_DEAD_ARG(0) && !ots->fixed_reg) {
2992        /* mov to a non-saved dead register makes no sense (even with
2993           liveness analysis disabled). */
2994        tcg_debug_assert(NEED_SYNC_ARG(0));
2995        if (!ots->mem_allocated) {
2996            temp_allocate_frame(s, ots);
2997        }
2998        tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
2999        if (IS_DEAD_ARG(1)) {
3000            temp_dead(s, ts);
3001        }
3002        temp_dead(s, ots);
3003    } else {
3004        if (IS_DEAD_ARG(1) && !ts->fixed_reg && !ots->fixed_reg) {
3005            /* the mov can be suppressed */
3006            if (ots->val_type == TEMP_VAL_REG) {
3007                s->reg_to_temp[ots->reg] = NULL;
3008            }
3009            ots->reg = ts->reg;
3010            temp_dead(s, ts);
3011        } else {
3012            if (ots->val_type != TEMP_VAL_REG) {
3013                /* When allocating a new register, make sure to not spill the
3014                   input one. */
3015                tcg_regset_set_reg(allocated_regs, ts->reg);
3016                ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3017                                         allocated_regs, ots->indirect_base);
3018            }
3019            tcg_out_mov(s, otype, ots->reg, ts->reg);
3020        }
3021        ots->val_type = TEMP_VAL_REG;
3022        ots->mem_coherent = 0;
3023        s->reg_to_temp[ots->reg] = ots;
3024        if (NEED_SYNC_ARG(0)) {
3025            temp_sync(s, ots, allocated_regs, 0);
3026        }
3027    }
3028}
3029
3030static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3031{
3032    const TCGLifeData arg_life = op->life;
3033    const TCGOpDef * const def = &tcg_op_defs[op->opc];
3034    TCGRegSet i_allocated_regs;
3035    TCGRegSet o_allocated_regs;
3036    int i, k, nb_iargs, nb_oargs;
3037    TCGReg reg;
3038    TCGArg arg;
3039    const TCGArgConstraint *arg_ct;
3040    TCGTemp *ts;
3041    TCGArg new_args[TCG_MAX_OP_ARGS];
3042    int const_args[TCG_MAX_OP_ARGS];
3043
3044    nb_oargs = def->nb_oargs;
3045    nb_iargs = def->nb_iargs;
3046
3047    /* copy constants */
3048    memcpy(new_args + nb_oargs + nb_iargs, 
3049           op->args + nb_oargs + nb_iargs,
3050           sizeof(TCGArg) * def->nb_cargs);
3051
3052    i_allocated_regs = s->reserved_regs;
3053    o_allocated_regs = s->reserved_regs;
3054
3055    /* satisfy input constraints */ 
3056    for (k = 0; k < nb_iargs; k++) {
3057        i = def->sorted_args[nb_oargs + k];
3058        arg = op->args[i];
3059        arg_ct = &def->args_ct[i];
3060        ts = arg_temp(arg);
3061
3062        if (ts->val_type == TEMP_VAL_CONST
3063            && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
3064            /* constant is OK for instruction */
3065            const_args[i] = 1;
3066            new_args[i] = ts->val;
3067            goto iarg_end;
3068        }
3069
3070        temp_load(s, ts, arg_ct->u.regs, i_allocated_regs);
3071
3072        if (arg_ct->ct & TCG_CT_IALIAS) {
3073            if (ts->fixed_reg) {
3074                /* if fixed register, we must allocate a new register
3075                   if the alias is not the same register */
3076                if (arg != op->args[arg_ct->alias_index])
3077                    goto allocate_in_reg;
3078            } else {
3079                /* if the input is aliased to an output and if it is
3080                   not dead after the instruction, we must allocate
3081                   a new register and move it */
3082                if (!IS_DEAD_ARG(i)) {
3083                    goto allocate_in_reg;
3084                }
3085                /* check if the current register has already been allocated
3086                   for another input aliased to an output */
3087                int k2, i2;
3088                for (k2 = 0 ; k2 < k ; k2++) {
3089                    i2 = def->sorted_args[nb_oargs + k2];
3090                    if ((def->args_ct[i2].ct & TCG_CT_IALIAS) &&
3091                        (new_args[i2] == ts->reg)) {
3092                        goto allocate_in_reg;
3093                    }
3094                }
3095            }
3096        }
3097        reg = ts->reg;
3098        if (tcg_regset_test_reg(arg_ct->u.regs, reg)) {
3099            /* nothing to do : the constraint is satisfied */
3100        } else {
3101        allocate_in_reg:
3102            /* allocate a new register matching the constraint 
3103               and move the temporary register into it */
3104            reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs,
3105                                ts->indirect_base);
3106            tcg_out_mov(s, ts->type, reg, ts->reg);
3107        }
3108        new_args[i] = reg;
3109        const_args[i] = 0;
3110        tcg_regset_set_reg(i_allocated_regs, reg);
3111    iarg_end: ;
3112    }
3113    
3114    /* mark dead temporaries and free the associated registers */
3115    for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3116        if (IS_DEAD_ARG(i)) {
3117            temp_dead(s, arg_temp(op->args[i]));
3118        }
3119    }
3120
3121    if (def->flags & TCG_OPF_BB_END) {
3122        tcg_reg_alloc_bb_end(s, i_allocated_regs);
3123    } else {
3124        if (def->flags & TCG_OPF_CALL_CLOBBER) {
3125            /* XXX: permit generic clobber register list ? */ 
3126            for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3127                if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3128                    tcg_reg_free(s, i, i_allocated_regs);
3129                }
3130            }
3131        }
3132        if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3133            /* sync globals if the op has side effects and might trigger
3134               an exception. */
3135            sync_globals(s, i_allocated_regs);
3136        }
3137        
3138        /* satisfy the output constraints */
3139        for(k = 0; k < nb_oargs; k++) {
3140            i = def->sorted_args[k];
3141            arg = op->args[i];
3142            arg_ct = &def->args_ct[i];
3143            ts = arg_temp(arg);
3144            if ((arg_ct->ct & TCG_CT_ALIAS)
3145                && !const_args[arg_ct->alias_index]) {
3146                reg = new_args[arg_ct->alias_index];
3147            } else if (arg_ct->ct & TCG_CT_NEWREG) {
3148                reg = tcg_reg_alloc(s, arg_ct->u.regs,
3149                                    i_allocated_regs | o_allocated_regs,
3150                                    ts->indirect_base);
3151            } else {
3152                /* if fixed register, we try to use it */
3153                reg = ts->reg;
3154                if (ts->fixed_reg &&
3155                    tcg_regset_test_reg(arg_ct->u.regs, reg)) {
3156                    goto oarg_end;
3157                }
3158                reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs,
3159                                    ts->indirect_base);
3160            }
3161            tcg_regset_set_reg(o_allocated_regs, reg);
3162            /* if a fixed register is used, then a move will be done afterwards */
3163            if (!ts->fixed_reg) {
3164                if (ts->val_type == TEMP_VAL_REG) {
3165                    s->reg_to_temp[ts->reg] = NULL;
3166                }
3167                ts->val_type = TEMP_VAL_REG;
3168                ts->reg = reg;
3169                /* temp value is modified, so the value kept in memory is
3170                   potentially not the same */
3171                ts->mem_coherent = 0;
3172                s->reg_to_temp[reg] = ts;
3173            }
3174        oarg_end:
3175            new_args[i] = reg;
3176        }
3177    }
3178
3179    /* emit instruction */
3180    if (def->flags & TCG_OPF_VECTOR) {
3181        tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
3182                       new_args, const_args);
3183    } else {
3184        tcg_out_op(s, op->opc, new_args, const_args);
3185    }
3186
3187    /* move the outputs in the correct register if needed */
3188    for(i = 0; i < nb_oargs; i++) {
3189        ts = arg_temp(op->args[i]);
3190        reg = new_args[i];
3191        if (ts->fixed_reg && ts->reg != reg) {
3192            tcg_out_mov(s, ts->type, ts->reg, reg);
3193        }
3194        if (NEED_SYNC_ARG(i)) {
3195            temp_sync(s, ts, o_allocated_regs, IS_DEAD_ARG(i));
3196        } else if (IS_DEAD_ARG(i)) {
3197            temp_dead(s, ts);
3198        }
3199    }
3200}
3201
3202#ifdef TCG_TARGET_STACK_GROWSUP
3203#define STACK_DIR(x) (-(x))
3204#else
3205#define STACK_DIR(x) (x)
3206#endif
3207
3208static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
3209{
3210    const int nb_oargs = TCGOP_CALLO(op);
3211    const int nb_iargs = TCGOP_CALLI(op);
3212    const TCGLifeData arg_life = op->life;
3213    int flags, nb_regs, i;
3214    TCGReg reg;
3215    TCGArg arg;
3216    TCGTemp *ts;
3217    intptr_t stack_offset;
3218    size_t call_stack_size;
3219    tcg_insn_unit *func_addr;
3220    int allocate_args;
3221    TCGRegSet allocated_regs;
3222
3223    func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
3224    flags = op->args[nb_oargs + nb_iargs + 1];
3225
3226    nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
3227    if (nb_regs > nb_iargs) {
3228        nb_regs = nb_iargs;
3229    }
3230
3231    /* assign stack slots first */
3232    call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
3233    call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) & 
3234        ~(TCG_TARGET_STACK_ALIGN - 1);
3235    allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
3236    if (allocate_args) {
3237        /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
3238           preallocate call stack */
3239        tcg_abort();
3240    }
3241
3242    stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
3243    for (i = nb_regs; i < nb_iargs; i++) {
3244        arg = op->args[nb_oargs + i];
3245#ifdef TCG_TARGET_STACK_GROWSUP
3246        stack_offset -= sizeof(tcg_target_long);
3247#endif
3248        if (arg != TCG_CALL_DUMMY_ARG) {
3249            ts = arg_temp(arg);
3250            temp_load(s, ts, tcg_target_available_regs[ts->type],
3251                      s->reserved_regs);
3252            tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
3253        }
3254#ifndef TCG_TARGET_STACK_GROWSUP
3255        stack_offset += sizeof(tcg_target_long);
3256#endif
3257    }
3258    
3259    /* assign input registers */
3260    allocated_regs = s->reserved_regs;
3261    for (i = 0; i < nb_regs; i++) {
3262        arg = op->args[nb_oargs + i];
3263        if (arg != TCG_CALL_DUMMY_ARG) {
3264            ts = arg_temp(arg);
3265            reg = tcg_target_call_iarg_regs[i];
3266            tcg_reg_free(s, reg, allocated_regs);
3267
3268            if (ts->val_type == TEMP_VAL_REG) {
3269                if (ts->reg != reg) {
3270                    tcg_out_mov(s, ts->type, reg, ts->reg);
3271                }
3272            } else {
3273                TCGRegSet arg_set = 0;
3274
3275                tcg_regset_set_reg(arg_set, reg);
3276                temp_load(s, ts, arg_set, allocated_regs);
3277            }
3278
3279            tcg_regset_set_reg(allocated_regs, reg);
3280        }
3281    }
3282    
3283    /* mark dead temporaries and free the associated registers */
3284    for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3285        if (IS_DEAD_ARG(i)) {
3286            temp_dead(s, arg_temp(op->args[i]));
3287        }
3288    }
3289    
3290    /* clobber call registers */
3291    for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3292        if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3293            tcg_reg_free(s, i, allocated_regs);
3294        }
3295    }
3296
3297    /* Save globals if they might be written by the helper, sync them if
3298       they might be read. */
3299    if (flags & TCG_CALL_NO_READ_GLOBALS) {
3300        /* Nothing to do */
3301    } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
3302        sync_globals(s, allocated_regs);
3303    } else {
3304        save_globals(s, allocated_regs);
3305    }
3306
3307    tcg_out_call(s, func_addr);
3308
3309    /* assign output registers and emit moves if needed */
3310    for(i = 0; i < nb_oargs; i++) {
3311        arg = op->args[i];
3312        ts = arg_temp(arg);
3313        reg = tcg_target_call_oarg_regs[i];
3314        tcg_debug_assert(s->reg_to_temp[reg] == NULL);
3315
3316        if (ts->fixed_reg) {
3317            if (ts->reg != reg) {
3318                tcg_out_mov(s, ts->type, ts->reg, reg);
3319            }
3320        } else {
3321            if (ts->val_type == TEMP_VAL_REG) {
3322                s->reg_to_temp[ts->reg] = NULL;
3323            }
3324            ts->val_type = TEMP_VAL_REG;
3325            ts->reg = reg;
3326            ts->mem_coherent = 0;
3327            s->reg_to_temp[reg] = ts;
3328            if (NEED_SYNC_ARG(i)) {
3329                temp_sync(s, ts, allocated_regs, IS_DEAD_ARG(i));
3330            } else if (IS_DEAD_ARG(i)) {
3331                temp_dead(s, ts);
3332            }
3333        }
3334    }
3335}
3336
3337#ifdef CONFIG_PROFILER
3338
3339/* avoid copy/paste errors */
3340#define PROF_ADD(to, from, field)                       \
3341    do {                                                \
3342        (to)->field += atomic_read(&((from)->field));   \
3343    } while (0)
3344
3345#define PROF_MAX(to, from, field)                                       \
3346    do {                                                                \
3347        typeof((from)->field) val__ = atomic_read(&((from)->field));    \
3348        if (val__ > (to)->field) {                                      \
3349            (to)->field = val__;                                        \
3350        }                                                               \
3351    } while (0)
3352
3353/* Pass in a zero'ed @prof */
3354static inline
3355void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
3356{
3357    unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
3358    unsigned int i;
3359
3360    for (i = 0; i < n_ctxs; i++) {
3361        TCGContext *s = atomic_read(&tcg_ctxs[i]);
3362        const TCGProfile *orig = &s->prof;
3363
3364        if (counters) {
3365            PROF_ADD(prof, orig, cpu_exec_time);
3366            PROF_ADD(prof, orig, tb_count1);
3367            PROF_ADD(prof, orig, tb_count);
3368            PROF_ADD(prof, orig, op_count);
3369            PROF_MAX(prof, orig, op_count_max);
3370            PROF_ADD(prof, orig, temp_count);
3371            PROF_MAX(prof, orig, temp_count_max);
3372            PROF_ADD(prof, orig, del_op_count);
3373            PROF_ADD(prof, orig, code_in_len);
3374            PROF_ADD(prof, orig, code_out_len);
3375            PROF_ADD(prof, orig, search_out_len);
3376            PROF_ADD(prof, orig, interm_time);
3377            PROF_ADD(prof, orig, code_time);
3378            PROF_ADD(prof, orig, la_time);
3379            PROF_ADD(prof, orig, opt_time);
3380            PROF_ADD(prof, orig, restore_count);
3381            PROF_ADD(prof, orig, restore_time);
3382        }
3383        if (table) {
3384            int i;
3385
3386            for (i = 0; i < NB_OPS; i++) {
3387                PROF_ADD(prof, orig, table_op_count[i]);
3388            }
3389        }
3390    }
3391}
3392
3393#undef PROF_ADD
3394#undef PROF_MAX
3395
3396static void tcg_profile_snapshot_counters(TCGProfile *prof)
3397{
3398    tcg_profile_snapshot(prof, true, false);
3399}
3400
3401static void tcg_profile_snapshot_table(TCGProfile *prof)
3402{
3403    tcg_profile_snapshot(prof, false, true);
3404}
3405
3406void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
3407{
3408    TCGProfile prof = {};
3409    int i;
3410
3411    tcg_profile_snapshot_table(&prof);
3412    for (i = 0; i < NB_OPS; i++) {
3413        cpu_fprintf(f, "%s %" PRId64 "\n", tcg_op_defs[i].name,
3414                    prof.table_op_count[i]);
3415    }
3416}
3417
3418int64_t tcg_cpu_exec_time(void)
3419{
3420    unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
3421    unsigned int i;
3422    int64_t ret = 0;
3423
3424    for (i = 0; i < n_ctxs; i++) {
3425        const TCGContext *s = atomic_read(&tcg_ctxs[i]);
3426        const TCGProfile *prof = &s->prof;
3427
3428        ret += atomic_read(&prof->cpu_exec_time);
3429    }
3430    return ret;
3431}
3432#else
3433void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
3434{
3435    cpu_fprintf(f, "[TCG profiler not compiled]\n");
3436}
3437
3438int64_t tcg_cpu_exec_time(void)
3439{
3440    error_report("%s: TCG profiler not compiled", __func__);
3441    exit(EXIT_FAILURE);
3442}
3443#endif
3444
3445
3446int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
3447{
3448#ifdef CONFIG_PROFILER
3449    TCGProfile *prof = &s->prof;
3450#endif
3451    int i, num_insns;
3452    TCGOp *op;
3453
3454#ifdef CONFIG_PROFILER
3455    {
3456        int n = 0;
3457
3458        QTAILQ_FOREACH(op, &s->ops, link) {
3459            n++;
3460        }
3461        atomic_set(&prof->op_count, prof->op_count + n);
3462        if (n > prof->op_count_max) {
3463            atomic_set(&prof->op_count_max, n);
3464        }
3465
3466        n = s->nb_temps;
3467        atomic_set(&prof->temp_count, prof->temp_count + n);
3468        if (n > prof->temp_count_max) {
3469            atomic_set(&prof->temp_count_max, n);
3470        }
3471    }
3472#endif
3473
3474#ifdef DEBUG_DISAS
3475    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
3476                 && qemu_log_in_addr_range(tb->pc))) {
3477        qemu_log_lock();
3478        qemu_log("OP:\n");
3479        tcg_dump_ops(s);
3480        qemu_log("\n");
3481        qemu_log_unlock();
3482    }
3483#endif
3484
3485#ifdef CONFIG_PROFILER
3486    atomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
3487#endif
3488
3489#ifdef USE_TCG_OPTIMIZATIONS
3490    tcg_optimize(s);
3491#endif
3492
3493#ifdef CONFIG_PROFILER
3494    atomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
3495    atomic_set(&prof->la_time, prof->la_time - profile_getclock());
3496#endif
3497
3498    liveness_pass_1(s);
3499
3500    if (s->nb_indirects > 0) {
3501#ifdef DEBUG_DISAS
3502        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
3503                     && qemu_log_in_addr_range(tb->pc))) {
3504            qemu_log_lock();
3505            qemu_log("OP before indirect lowering:\n");
3506            tcg_dump_ops(s);
3507            qemu_log("\n");
3508            qemu_log_unlock();
3509        }
3510#endif
3511        /* Replace indirect temps with direct temps.  */
3512        if (liveness_pass_2(s)) {
3513            /* If changes were made, re-run liveness.  */
3514            liveness_pass_1(s);
3515        }
3516    }
3517
3518#ifdef CONFIG_PROFILER
3519    atomic_set(&prof->la_time, prof->la_time + profile_getclock());
3520#endif
3521
3522#ifdef DEBUG_DISAS
3523    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
3524                 && qemu_log_in_addr_range(tb->pc))) {
3525        qemu_log_lock();
3526        qemu_log("OP after optimization and liveness analysis:\n");
3527        tcg_dump_ops(s);
3528        qemu_log("\n");
3529        qemu_log_unlock();
3530    }
3531#endif
3532
3533    tcg_reg_alloc_start(s);
3534
3535    s->code_buf = tb->tc.ptr;
3536    s->code_ptr = tb->tc.ptr;
3537
3538#ifdef TCG_TARGET_NEED_LDST_LABELS
3539    QSIMPLEQ_INIT(&s->ldst_labels);
3540#endif
3541#ifdef TCG_TARGET_NEED_POOL_LABELS
3542    s->pool_labels = NULL;
3543#endif
3544
3545    num_insns = -1;
3546    QTAILQ_FOREACH(op, &s->ops, link) {
3547        TCGOpcode opc = op->opc;
3548
3549#ifdef CONFIG_PROFILER
3550        atomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
3551#endif
3552
3553        switch (opc) {
3554        case INDEX_op_mov_i32:
3555        case INDEX_op_mov_i64:
3556        case INDEX_op_mov_vec:
3557            tcg_reg_alloc_mov(s, op);
3558            break;
3559        case INDEX_op_movi_i32:
3560        case INDEX_op_movi_i64:
3561        case INDEX_op_dupi_vec:
3562            tcg_reg_alloc_movi(s, op);
3563            break;
3564        case INDEX_op_insn_start:
3565            if (num_insns >= 0) {
3566                size_t off = tcg_current_code_size(s);
3567                s->gen_insn_end_off[num_insns] = off;
3568                /* Assert that we do not overflow our stored offset.  */
3569                assert(s->gen_insn_end_off[num_insns] == off);
3570            }
3571            num_insns++;
3572            for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
3573                target_ulong a;
3574#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
3575                a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
3576#else
3577                a = op->args[i];
3578#endif
3579                s->gen_insn_data[num_insns][i] = a;
3580            }
3581            break;
3582        case INDEX_op_discard:
3583            temp_dead(s, arg_temp(op->args[0]));
3584            break;
3585        case INDEX_op_set_label:
3586            tcg_reg_alloc_bb_end(s, s->reserved_regs);
3587            tcg_out_label(s, arg_label(op->args[0]), s->code_ptr);
3588            break;
3589        case INDEX_op_call:
3590            tcg_reg_alloc_call(s, op);
3591            break;
3592        default:
3593            /* Sanity check that we've not introduced any unhandled opcodes. */
3594            tcg_debug_assert(tcg_op_supported(opc));
3595            /* Note: in order to speed up the code, it would be much
3596               faster to have specialized register allocator functions for
3597               some common argument patterns */
3598            tcg_reg_alloc_op(s, op);
3599            break;
3600        }
3601#ifdef CONFIG_DEBUG_TCG
3602        check_regs(s);
3603#endif
3604        /* Test for (pending) buffer overflow.  The assumption is that any
3605           one operation beginning below the high water mark cannot overrun
3606           the buffer completely.  Thus we can test for overflow after
3607           generating code without having to check during generation.  */
3608        if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
3609            return -1;
3610        }
3611    }
3612    tcg_debug_assert(num_insns >= 0);
3613    s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
3614
3615    /* Generate TB finalization at the end of block */
3616#ifdef TCG_TARGET_NEED_LDST_LABELS
3617    if (!tcg_out_ldst_finalize(s)) {
3618        return -1;
3619    }
3620#endif
3621#ifdef TCG_TARGET_NEED_POOL_LABELS
3622    if (!tcg_out_pool_finalize(s)) {
3623        return -1;
3624    }
3625#endif
3626
3627    /* flush instruction cache */
3628    flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
3629
3630    return tcg_current_code_size(s);
3631}
3632
3633#ifdef CONFIG_PROFILER
3634void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
3635{
3636    TCGProfile prof = {};
3637    const TCGProfile *s;
3638    int64_t tb_count;
3639    int64_t tb_div_count;
3640    int64_t tot;
3641
3642    tcg_profile_snapshot_counters(&prof);
3643    s = &prof;
3644    tb_count = s->tb_count;
3645    tb_div_count = tb_count ? tb_count : 1;
3646    tot = s->interm_time + s->code_time;
3647
3648    cpu_fprintf(f, "JIT cycles          %" PRId64 " (%0.3f s at 2.4 GHz)\n",
3649                tot, tot / 2.4e9);
3650    cpu_fprintf(f, "translated TBs      %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n", 
3651                tb_count, s->tb_count1 - tb_count,
3652                (double)(s->tb_count1 - s->tb_count)
3653                / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
3654    cpu_fprintf(f, "avg ops/TB          %0.1f max=%d\n", 
3655                (double)s->op_count / tb_div_count, s->op_count_max);
3656    cpu_fprintf(f, "deleted ops/TB      %0.2f\n",
3657                (double)s->del_op_count / tb_div_count);
3658    cpu_fprintf(f, "avg temps/TB        %0.2f max=%d\n",
3659                (double)s->temp_count / tb_div_count, s->temp_count_max);
3660    cpu_fprintf(f, "avg host code/TB    %0.1f\n",
3661                (double)s->code_out_len / tb_div_count);
3662    cpu_fprintf(f, "avg search data/TB  %0.1f\n",
3663                (double)s->search_out_len / tb_div_count);
3664    
3665    cpu_fprintf(f, "cycles/op           %0.1f\n", 
3666                s->op_count ? (double)tot / s->op_count : 0);
3667    cpu_fprintf(f, "cycles/in byte      %0.1f\n", 
3668                s->code_in_len ? (double)tot / s->code_in_len : 0);
3669    cpu_fprintf(f, "cycles/out byte     %0.1f\n", 
3670                s->code_out_len ? (double)tot / s->code_out_len : 0);
3671    cpu_fprintf(f, "cycles/search byte     %0.1f\n",
3672                s->search_out_len ? (double)tot / s->search_out_len : 0);
3673    if (tot == 0) {
3674        tot = 1;
3675    }
3676    cpu_fprintf(f, "  gen_interm time   %0.1f%%\n", 
3677                (double)s->interm_time / tot * 100.0);
3678    cpu_fprintf(f, "  gen_code time     %0.1f%%\n", 
3679                (double)s->code_time / tot * 100.0);
3680    cpu_fprintf(f, "optim./code time    %0.1f%%\n",
3681                (double)s->opt_time / (s->code_time ? s->code_time : 1)
3682                * 100.0);
3683    cpu_fprintf(f, "liveness/code time  %0.1f%%\n", 
3684                (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
3685    cpu_fprintf(f, "cpu_restore count   %" PRId64 "\n",
3686                s->restore_count);
3687    cpu_fprintf(f, "  avg cycles        %0.1f\n",
3688                s->restore_count ? (double)s->restore_time / s->restore_count : 0);
3689}
3690#else
3691void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
3692{
3693    cpu_fprintf(f, "[TCG profiler not compiled]\n");
3694}
3695#endif
3696
3697#ifdef ELF_HOST_MACHINE
3698/* In order to use this feature, the backend needs to do three things:
3699
3700   (1) Define ELF_HOST_MACHINE to indicate both what value to
3701       put into the ELF image and to indicate support for the feature.
3702
3703   (2) Define tcg_register_jit.  This should create a buffer containing
3704       the contents of a .debug_frame section that describes the post-
3705       prologue unwind info for the tcg machine.
3706
3707   (3) Call tcg_register_jit_int, with the constructed .debug_frame.
3708*/
3709
3710/* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
3711typedef enum {
3712    JIT_NOACTION = 0,
3713    JIT_REGISTER_FN,
3714    JIT_UNREGISTER_FN
3715} jit_actions_t;
3716
3717struct jit_code_entry {
3718    struct jit_code_entry *next_entry;
3719    struct jit_code_entry *prev_entry;
3720    const void *symfile_addr;
3721    uint64_t symfile_size;
3722};
3723
3724struct jit_descriptor {
3725    uint32_t version;
3726    uint32_t action_flag;
3727    struct jit_code_entry *relevant_entry;
3728    struct jit_code_entry *first_entry;
3729};
3730
3731void __jit_debug_register_code(void) __attribute__((noinline));
3732void __jit_debug_register_code(void)
3733{
3734    asm("");
3735}
3736
3737/* Must statically initialize the version, because GDB may check
3738   the version before we can set it.  */
3739struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
3740
3741/* End GDB interface.  */
3742
3743static int find_string(const char *strtab, const char *str)
3744{
3745    const char *p = strtab + 1;
3746
3747    while (1) {
3748        if (strcmp(p, str) == 0) {
3749            return p - strtab;
3750        }
3751        p += strlen(p) + 1;
3752    }
3753}
3754
3755static void tcg_register_jit_int(void *buf_ptr, size_t buf_size,
3756                                 const void *debug_frame,
3757                                 size_t debug_frame_size)
3758{
3759    struct __attribute__((packed)) DebugInfo {
3760        uint32_t  len;
3761        uint16_t  version;
3762        uint32_t  abbrev;
3763        uint8_t   ptr_size;
3764        uint8_t   cu_die;
3765        uint16_t  cu_lang;
3766        uintptr_t cu_low_pc;
3767        uintptr_t cu_high_pc;
3768        uint8_t   fn_die;
3769        char      fn_name[16];
3770        uintptr_t fn_low_pc;
3771        uintptr_t fn_high_pc;
3772        uint8_t   cu_eoc;
3773    };
3774
3775    struct ElfImage {
3776        ElfW(Ehdr) ehdr;
3777        ElfW(Phdr) phdr;
3778        ElfW(Shdr) shdr[7];
3779        ElfW(Sym)  sym[2];
3780        struct DebugInfo di;
3781        uint8_t    da[24];
3782        char       str[80];
3783    };
3784
3785    struct ElfImage *img;
3786
3787    static const struct ElfImage img_template = {
3788        .ehdr = {
3789            .e_ident[EI_MAG0] = ELFMAG0,
3790            .e_ident[EI_MAG1] = ELFMAG1,
3791            .e_ident[EI_MAG2] = ELFMAG2,
3792            .e_ident[EI_MAG3] = ELFMAG3,
3793            .e_ident[EI_CLASS] = ELF_CLASS,
3794            .e_ident[EI_DATA] = ELF_DATA,
3795            .e_ident[EI_VERSION] = EV_CURRENT,
3796            .e_type = ET_EXEC,
3797            .e_machine = ELF_HOST_MACHINE,
3798            .e_version = EV_CURRENT,
3799            .e_phoff = offsetof(struct ElfImage, phdr),
3800            .e_shoff = offsetof(struct ElfImage, shdr),
3801            .e_ehsize = sizeof(ElfW(Shdr)),
3802            .e_phentsize = sizeof(ElfW(Phdr)),
3803            .e_phnum = 1,
3804            .e_shentsize = sizeof(ElfW(Shdr)),
3805            .e_shnum = ARRAY_SIZE(img->shdr),
3806            .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
3807#ifdef ELF_HOST_FLAGS
3808            .e_flags = ELF_HOST_FLAGS,
3809#endif
3810#ifdef ELF_OSABI
3811            .e_ident[EI_OSABI] = ELF_OSABI,
3812#endif
3813        },
3814        .phdr = {
3815            .p_type = PT_LOAD,
3816            .p_flags = PF_X,
3817        },
3818        .shdr = {
3819            [0] = { .sh_type = SHT_NULL },
3820            /* Trick: The contents of code_gen_buffer are not present in
3821               this fake ELF file; that got allocated elsewhere.  Therefore
3822               we mark .text as SHT_NOBITS (similar to .bss) so that readers
3823               will not look for contents.  We can record any address.  */
3824            [1] = { /* .text */
3825                .sh_type = SHT_NOBITS,
3826                .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
3827            },
3828            [2] = { /* .debug_info */
3829                .sh_type = SHT_PROGBITS,
3830                .sh_offset = offsetof(struct ElfImage, di),
3831                .sh_size = sizeof(struct DebugInfo),
3832            },
3833            [3] = { /* .debug_abbrev */
3834                .sh_type = SHT_PROGBITS,
3835                .sh_offset = offsetof(struct ElfImage, da),
3836                .sh_size = sizeof(img->da),
3837            },
3838            [4] = { /* .debug_frame */
3839                .sh_type = SHT_PROGBITS,
3840                .sh_offset = sizeof(struct ElfImage),
3841            },
3842            [5] = { /* .symtab */
3843                .sh_type = SHT_SYMTAB,
3844                .sh_offset = offsetof(struct ElfImage, sym),
3845                .sh_size = sizeof(img->sym),
3846                .sh_info = 1,
3847                .sh_link = ARRAY_SIZE(img->shdr) - 1,
3848                .sh_entsize = sizeof(ElfW(Sym)),
3849            },
3850            [6] = { /* .strtab */
3851                .sh_type = SHT_STRTAB,
3852                .sh_offset = offsetof(struct ElfImage, str),
3853                .sh_size = sizeof(img->str),
3854            }
3855        },
3856        .sym = {
3857            [1] = { /* code_gen_buffer */
3858                .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
3859                .st_shndx = 1,
3860            }
3861        },
3862        .di = {
3863            .len = sizeof(struct DebugInfo) - 4,
3864            .version = 2,
3865            .ptr_size = sizeof(void *),
3866            .cu_die = 1,
3867            .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
3868            .fn_die = 2,
3869            .fn_name = "code_gen_buffer"
3870        },
3871        .da = {
3872            1,          /* abbrev number (the cu) */
3873            0x11, 1,    /* DW_TAG_compile_unit, has children */
3874            0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
3875            0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
3876            0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
3877            0, 0,       /* end of abbrev */
3878            2,          /* abbrev number (the fn) */
3879            0x2e, 0,    /* DW_TAG_subprogram, no children */
3880            0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
3881            0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
3882            0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
3883            0, 0,       /* end of abbrev */
3884            0           /* no more abbrev */
3885        },
3886        .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
3887               ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
3888    };
3889
3890    /* We only need a single jit entry; statically allocate it.  */
3891    static struct jit_code_entry one_entry;
3892
3893    uintptr_t buf = (uintptr_t)buf_ptr;
3894    size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
3895    DebugFrameHeader *dfh;
3896
3897    img = g_malloc(img_size);
3898    *img = img_template;
3899
3900    img->phdr.p_vaddr = buf;
3901    img->phdr.p_paddr = buf;
3902    img->phdr.p_memsz = buf_size;
3903
3904    img->shdr[1].sh_name = find_string(img->str, ".text");
3905    img->shdr[1].sh_addr = buf;
3906    img->shdr[1].sh_size = buf_size;
3907
3908    img->shdr[2].sh_name = find_string(img->str, ".debug_info");
3909    img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
3910
3911    img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
3912    img->shdr[4].sh_size = debug_frame_size;
3913
3914    img->shdr[5].sh_name = find_string(img->str, ".symtab");
3915    img->shdr[6].sh_name = find_string(img->str, ".strtab");
3916
3917    img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
3918    img->sym[1].st_value = buf;
3919    img->sym[1].st_size = buf_size;
3920
3921    img->di.cu_low_pc = buf;
3922    img->di.cu_high_pc = buf + buf_size;
3923    img->di.fn_low_pc = buf;
3924    img->di.fn_high_pc = buf + buf_size;
3925
3926    dfh = (DebugFrameHeader *)(img + 1);
3927    memcpy(dfh, debug_frame, debug_frame_size);
3928    dfh->fde.func_start = buf;
3929    dfh->fde.func_len = buf_size;
3930
3931#ifdef DEBUG_JIT
3932    /* Enable this block to be able to debug the ELF image file creation.
3933       One can use readelf, objdump, or other inspection utilities.  */
3934    {
3935        FILE *f = fopen("/tmp/qemu.jit", "w+b");
3936        if (f) {
3937            if (fwrite(img, img_size, 1, f) != img_size) {
3938                /* Avoid stupid unused return value warning for fwrite.  */
3939            }
3940            fclose(f);
3941        }
3942    }
3943#endif
3944
3945    one_entry.symfile_addr = img;
3946    one_entry.symfile_size = img_size;
3947
3948    __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
3949    __jit_debug_descriptor.relevant_entry = &one_entry;
3950    __jit_debug_descriptor.first_entry = &one_entry;
3951    __jit_debug_register_code();
3952}
3953#else
3954/* No support for the feature.  Provide the entry point expected by exec.c,
3955   and implement the internal function we declared earlier.  */
3956
3957static void tcg_register_jit_int(void *buf, size_t size,
3958                                 const void *debug_frame,
3959                                 size_t debug_frame_size)
3960{
3961}
3962
3963void tcg_register_jit(void *buf, size_t buf_size)
3964{
3965}
3966#endif /* ELF_HOST_MACHINE */
3967
3968#if !TCG_TARGET_MAYBE_vec
3969void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
3970{
3971    g_assert_not_reached();
3972}
3973#endif
3974