qemu/tcg/tcg.c
<<
>>
Prefs
   1/*
   2 * Tiny Code Generator for QEMU
   3 *
   4 * Copyright (c) 2008 Fabrice Bellard
   5 *
   6 * Permission is hereby granted, free of charge, to any person obtaining a copy
   7 * of this software and associated documentation files (the "Software"), to deal
   8 * in the Software without restriction, including without limitation the rights
   9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10 * copies of the Software, and to permit persons to whom the Software is
  11 * furnished to do so, subject to the following conditions:
  12 *
  13 * The above copyright notice and this permission notice shall be included in
  14 * all copies or substantial portions of the Software.
  15 *
  16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22 * THE SOFTWARE.
  23 */
  24
  25/* define it to use liveness analysis (better code) */
  26#define USE_TCG_OPTIMIZATIONS
  27
  28#include "qemu/osdep.h"
  29
  30/* Define to jump the ELF file used to communicate with GDB.  */
  31#undef DEBUG_JIT
  32
  33#include "qemu/error-report.h"
  34#include "qemu/cutils.h"
  35#include "qemu/host-utils.h"
  36#include "qemu/qemu-print.h"
  37#include "qemu/timer.h"
  38
  39/* Note: the long term plan is to reduce the dependencies on the QEMU
  40   CPU definitions. Currently they are used for qemu_ld/st
  41   instructions */
  42#define NO_CPU_IO_DEFS
  43#include "cpu.h"
  44
  45#include "exec/exec-all.h"
  46
  47#if !defined(CONFIG_USER_ONLY)
  48#include "hw/boards.h"
  49#endif
  50
  51#include "tcg-op.h"
  52
  53#if UINTPTR_MAX == UINT32_MAX
  54# define ELF_CLASS  ELFCLASS32
  55#else
  56# define ELF_CLASS  ELFCLASS64
  57#endif
  58#ifdef HOST_WORDS_BIGENDIAN
  59# define ELF_DATA   ELFDATA2MSB
  60#else
  61# define ELF_DATA   ELFDATA2LSB
  62#endif
  63
  64#include "elf.h"
  65#include "exec/log.h"
  66#include "sysemu/sysemu.h"
  67
  68/* Forward declarations for functions declared in tcg-target.inc.c and
  69   used here. */
  70static void tcg_target_init(TCGContext *s);
  71static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode);
  72static void tcg_target_qemu_prologue(TCGContext *s);
  73static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
  74                        intptr_t value, intptr_t addend);
  75
  76/* The CIE and FDE header definitions will be common to all hosts.  */
  77typedef struct {
  78    uint32_t len __attribute__((aligned((sizeof(void *)))));
  79    uint32_t id;
  80    uint8_t version;
  81    char augmentation[1];
  82    uint8_t code_align;
  83    uint8_t data_align;
  84    uint8_t return_column;
  85} DebugFrameCIE;
  86
  87typedef struct QEMU_PACKED {
  88    uint32_t len __attribute__((aligned((sizeof(void *)))));
  89    uint32_t cie_offset;
  90    uintptr_t func_start;
  91    uintptr_t func_len;
  92} DebugFrameFDEHeader;
  93
  94typedef struct QEMU_PACKED {
  95    DebugFrameCIE cie;
  96    DebugFrameFDEHeader fde;
  97} DebugFrameHeader;
  98
  99static void tcg_register_jit_int(void *buf, size_t size,
 100                                 const void *debug_frame,
 101                                 size_t debug_frame_size)
 102    __attribute__((unused));
 103
 104/* Forward declarations for functions declared and used in tcg-target.inc.c. */
 105static const char *target_parse_constraint(TCGArgConstraint *ct,
 106                                           const char *ct_str, TCGType type);
 107static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
 108                       intptr_t arg2);
 109static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
 110static void tcg_out_movi(TCGContext *s, TCGType type,
 111                         TCGReg ret, tcg_target_long arg);
 112static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
 113                       const int *const_args);
 114#if TCG_TARGET_MAYBE_vec
 115static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
 116                            TCGReg dst, TCGReg src);
 117static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
 118                             TCGReg dst, TCGReg base, intptr_t offset);
 119static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
 120                             TCGReg dst, tcg_target_long arg);
 121static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
 122                           unsigned vece, const TCGArg *args,
 123                           const int *const_args);
 124#else
 125static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
 126                                   TCGReg dst, TCGReg src)
 127{
 128    g_assert_not_reached();
 129}
 130static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
 131                                    TCGReg dst, TCGReg base, intptr_t offset)
 132{
 133    g_assert_not_reached();
 134}
 135static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type,
 136                                    TCGReg dst, tcg_target_long arg)
 137{
 138    g_assert_not_reached();
 139}
 140static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
 141                                  unsigned vece, const TCGArg *args,
 142                                  const int *const_args)
 143{
 144    g_assert_not_reached();
 145}
 146#endif
 147static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
 148                       intptr_t arg2);
 149static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
 150                        TCGReg base, intptr_t ofs);
 151static void tcg_out_call(TCGContext *s, tcg_insn_unit *target);
 152static int tcg_target_const_match(tcg_target_long val, TCGType type,
 153                                  const TCGArgConstraint *arg_ct);
 154#ifdef TCG_TARGET_NEED_LDST_LABELS
 155static int tcg_out_ldst_finalize(TCGContext *s);
 156#endif
 157
 158#define TCG_HIGHWATER 1024
 159
 160static TCGContext **tcg_ctxs;
 161static unsigned int n_tcg_ctxs;
 162TCGv_env cpu_env = 0;
 163
 164struct tcg_region_tree {
 165    QemuMutex lock;
 166    GTree *tree;
 167    /* padding to avoid false sharing is computed at run-time */
 168};
 169
 170/*
 171 * We divide code_gen_buffer into equally-sized "regions" that TCG threads
 172 * dynamically allocate from as demand dictates. Given appropriate region
 173 * sizing, this minimizes flushes even when some TCG threads generate a lot
 174 * more code than others.
 175 */
 176struct tcg_region_state {
 177    QemuMutex lock;
 178
 179    /* fields set at init time */
 180    void *start;
 181    void *start_aligned;
 182    void *end;
 183    size_t n;
 184    size_t size; /* size of one region */
 185    size_t stride; /* .size + guard size */
 186
 187    /* fields protected by the lock */
 188    size_t current; /* current region index */
 189    size_t agg_size_full; /* aggregate size of full regions */
 190};
 191
 192static struct tcg_region_state region;
 193/*
 194 * This is an array of struct tcg_region_tree's, with padding.
 195 * We use void * to simplify the computation of region_trees[i]; each
 196 * struct is found every tree_size bytes.
 197 */
 198static void *region_trees;
 199static size_t tree_size;
 200static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
 201static TCGRegSet tcg_target_call_clobber_regs;
 202
 203#if TCG_TARGET_INSN_UNIT_SIZE == 1
 204static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
 205{
 206    *s->code_ptr++ = v;
 207}
 208
 209static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
 210                                                      uint8_t v)
 211{
 212    *p = v;
 213}
 214#endif
 215
 216#if TCG_TARGET_INSN_UNIT_SIZE <= 2
 217static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
 218{
 219    if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
 220        *s->code_ptr++ = v;
 221    } else {
 222        tcg_insn_unit *p = s->code_ptr;
 223        memcpy(p, &v, sizeof(v));
 224        s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
 225    }
 226}
 227
 228static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
 229                                                       uint16_t v)
 230{
 231    if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
 232        *p = v;
 233    } else {
 234        memcpy(p, &v, sizeof(v));
 235    }
 236}
 237#endif
 238
 239#if TCG_TARGET_INSN_UNIT_SIZE <= 4
 240static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
 241{
 242    if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
 243        *s->code_ptr++ = v;
 244    } else {
 245        tcg_insn_unit *p = s->code_ptr;
 246        memcpy(p, &v, sizeof(v));
 247        s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
 248    }
 249}
 250
 251static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
 252                                                       uint32_t v)
 253{
 254    if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
 255        *p = v;
 256    } else {
 257        memcpy(p, &v, sizeof(v));
 258    }
 259}
 260#endif
 261
 262#if TCG_TARGET_INSN_UNIT_SIZE <= 8
 263static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
 264{
 265    if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
 266        *s->code_ptr++ = v;
 267    } else {
 268        tcg_insn_unit *p = s->code_ptr;
 269        memcpy(p, &v, sizeof(v));
 270        s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
 271    }
 272}
 273
 274static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
 275                                                       uint64_t v)
 276{
 277    if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
 278        *p = v;
 279    } else {
 280        memcpy(p, &v, sizeof(v));
 281    }
 282}
 283#endif
 284
 285/* label relocation processing */
 286
 287static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
 288                          TCGLabel *l, intptr_t addend)
 289{
 290    TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
 291
 292    r->type = type;
 293    r->ptr = code_ptr;
 294    r->addend = addend;
 295    QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
 296}
 297
 298static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr)
 299{
 300    tcg_debug_assert(!l->has_value);
 301    l->has_value = 1;
 302    l->u.value_ptr = ptr;
 303}
 304
 305TCGLabel *gen_new_label(void)
 306{
 307    TCGContext *s = tcg_ctx;
 308    TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
 309
 310    memset(l, 0, sizeof(TCGLabel));
 311    l->id = s->nb_labels++;
 312    QSIMPLEQ_INIT(&l->relocs);
 313
 314    QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
 315
 316    return l;
 317}
 318
 319static bool tcg_resolve_relocs(TCGContext *s)
 320{
 321    TCGLabel *l;
 322
 323    QSIMPLEQ_FOREACH(l, &s->labels, next) {
 324        TCGRelocation *r;
 325        uintptr_t value = l->u.value;
 326
 327        QSIMPLEQ_FOREACH(r, &l->relocs, next) {
 328            if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
 329                return false;
 330            }
 331        }
 332    }
 333    return true;
 334}
 335
 336static void set_jmp_reset_offset(TCGContext *s, int which)
 337{
 338    size_t off = tcg_current_code_size(s);
 339    s->tb_jmp_reset_offset[which] = off;
 340    /* Make sure that we didn't overflow the stored offset.  */
 341    assert(s->tb_jmp_reset_offset[which] == off);
 342}
 343
 344#include "tcg-target.inc.c"
 345
 346/* compare a pointer @ptr and a tb_tc @s */
 347static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
 348{
 349    if (ptr >= s->ptr + s->size) {
 350        return 1;
 351    } else if (ptr < s->ptr) {
 352        return -1;
 353    }
 354    return 0;
 355}
 356
 357static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
 358{
 359    const struct tb_tc *a = ap;
 360    const struct tb_tc *b = bp;
 361
 362    /*
 363     * When both sizes are set, we know this isn't a lookup.
 364     * This is the most likely case: every TB must be inserted; lookups
 365     * are a lot less frequent.
 366     */
 367    if (likely(a->size && b->size)) {
 368        if (a->ptr > b->ptr) {
 369            return 1;
 370        } else if (a->ptr < b->ptr) {
 371            return -1;
 372        }
 373        /* a->ptr == b->ptr should happen only on deletions */
 374        g_assert(a->size == b->size);
 375        return 0;
 376    }
 377    /*
 378     * All lookups have either .size field set to 0.
 379     * From the glib sources we see that @ap is always the lookup key. However
 380     * the docs provide no guarantee, so we just mark this case as likely.
 381     */
 382    if (likely(a->size == 0)) {
 383        return ptr_cmp_tb_tc(a->ptr, b);
 384    }
 385    return ptr_cmp_tb_tc(b->ptr, a);
 386}
 387
 388static void tcg_region_trees_init(void)
 389{
 390    size_t i;
 391
 392    tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
 393    region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
 394    for (i = 0; i < region.n; i++) {
 395        struct tcg_region_tree *rt = region_trees + i * tree_size;
 396
 397        qemu_mutex_init(&rt->lock);
 398        rt->tree = g_tree_new(tb_tc_cmp);
 399    }
 400}
 401
 402static struct tcg_region_tree *tc_ptr_to_region_tree(void *p)
 403{
 404    size_t region_idx;
 405
 406    if (p < region.start_aligned) {
 407        region_idx = 0;
 408    } else {
 409        ptrdiff_t offset = p - region.start_aligned;
 410
 411        if (offset > region.stride * (region.n - 1)) {
 412            region_idx = region.n - 1;
 413        } else {
 414            region_idx = offset / region.stride;
 415        }
 416    }
 417    return region_trees + region_idx * tree_size;
 418}
 419
 420void tcg_tb_insert(TranslationBlock *tb)
 421{
 422    struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
 423
 424    qemu_mutex_lock(&rt->lock);
 425    g_tree_insert(rt->tree, &tb->tc, tb);
 426    qemu_mutex_unlock(&rt->lock);
 427}
 428
 429void tcg_tb_remove(TranslationBlock *tb)
 430{
 431    struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
 432
 433    qemu_mutex_lock(&rt->lock);
 434    g_tree_remove(rt->tree, &tb->tc);
 435    qemu_mutex_unlock(&rt->lock);
 436}
 437
 438/*
 439 * Find the TB 'tb' such that
 440 * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
 441 * Return NULL if not found.
 442 */
 443TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
 444{
 445    struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
 446    TranslationBlock *tb;
 447    struct tb_tc s = { .ptr = (void *)tc_ptr };
 448
 449    qemu_mutex_lock(&rt->lock);
 450    tb = g_tree_lookup(rt->tree, &s);
 451    qemu_mutex_unlock(&rt->lock);
 452    return tb;
 453}
 454
 455static void tcg_region_tree_lock_all(void)
 456{
 457    size_t i;
 458
 459    for (i = 0; i < region.n; i++) {
 460        struct tcg_region_tree *rt = region_trees + i * tree_size;
 461
 462        qemu_mutex_lock(&rt->lock);
 463    }
 464}
 465
 466static void tcg_region_tree_unlock_all(void)
 467{
 468    size_t i;
 469
 470    for (i = 0; i < region.n; i++) {
 471        struct tcg_region_tree *rt = region_trees + i * tree_size;
 472
 473        qemu_mutex_unlock(&rt->lock);
 474    }
 475}
 476
 477void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
 478{
 479    size_t i;
 480
 481    tcg_region_tree_lock_all();
 482    for (i = 0; i < region.n; i++) {
 483        struct tcg_region_tree *rt = region_trees + i * tree_size;
 484
 485        g_tree_foreach(rt->tree, func, user_data);
 486    }
 487    tcg_region_tree_unlock_all();
 488}
 489
 490size_t tcg_nb_tbs(void)
 491{
 492    size_t nb_tbs = 0;
 493    size_t i;
 494
 495    tcg_region_tree_lock_all();
 496    for (i = 0; i < region.n; i++) {
 497        struct tcg_region_tree *rt = region_trees + i * tree_size;
 498
 499        nb_tbs += g_tree_nnodes(rt->tree);
 500    }
 501    tcg_region_tree_unlock_all();
 502    return nb_tbs;
 503}
 504
 505static void tcg_region_tree_reset_all(void)
 506{
 507    size_t i;
 508
 509    tcg_region_tree_lock_all();
 510    for (i = 0; i < region.n; i++) {
 511        struct tcg_region_tree *rt = region_trees + i * tree_size;
 512
 513        /* Increment the refcount first so that destroy acts as a reset */
 514        g_tree_ref(rt->tree);
 515        g_tree_destroy(rt->tree);
 516    }
 517    tcg_region_tree_unlock_all();
 518}
 519
 520static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
 521{
 522    void *start, *end;
 523
 524    start = region.start_aligned + curr_region * region.stride;
 525    end = start + region.size;
 526
 527    if (curr_region == 0) {
 528        start = region.start;
 529    }
 530    if (curr_region == region.n - 1) {
 531        end = region.end;
 532    }
 533
 534    *pstart = start;
 535    *pend = end;
 536}
 537
 538static void tcg_region_assign(TCGContext *s, size_t curr_region)
 539{
 540    void *start, *end;
 541
 542    tcg_region_bounds(curr_region, &start, &end);
 543
 544    s->code_gen_buffer = start;
 545    s->code_gen_ptr = start;
 546    s->code_gen_buffer_size = end - start;
 547    s->code_gen_highwater = end - TCG_HIGHWATER;
 548}
 549
 550static bool tcg_region_alloc__locked(TCGContext *s)
 551{
 552    if (region.current == region.n) {
 553        return true;
 554    }
 555    tcg_region_assign(s, region.current);
 556    region.current++;
 557    return false;
 558}
 559
 560/*
 561 * Request a new region once the one in use has filled up.
 562 * Returns true on error.
 563 */
 564static bool tcg_region_alloc(TCGContext *s)
 565{
 566    bool err;
 567    /* read the region size now; alloc__locked will overwrite it on success */
 568    size_t size_full = s->code_gen_buffer_size;
 569
 570    qemu_mutex_lock(&region.lock);
 571    err = tcg_region_alloc__locked(s);
 572    if (!err) {
 573        region.agg_size_full += size_full - TCG_HIGHWATER;
 574    }
 575    qemu_mutex_unlock(&region.lock);
 576    return err;
 577}
 578
 579/*
 580 * Perform a context's first region allocation.
 581 * This function does _not_ increment region.agg_size_full.
 582 */
 583static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
 584{
 585    return tcg_region_alloc__locked(s);
 586}
 587
 588/* Call from a safe-work context */
 589void tcg_region_reset_all(void)
 590{
 591    unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
 592    unsigned int i;
 593
 594    qemu_mutex_lock(&region.lock);
 595    region.current = 0;
 596    region.agg_size_full = 0;
 597
 598    for (i = 0; i < n_ctxs; i++) {
 599        TCGContext *s = atomic_read(&tcg_ctxs[i]);
 600        bool err = tcg_region_initial_alloc__locked(s);
 601
 602        g_assert(!err);
 603    }
 604    qemu_mutex_unlock(&region.lock);
 605
 606    tcg_region_tree_reset_all();
 607}
 608
 609#ifdef CONFIG_USER_ONLY
 610static size_t tcg_n_regions(void)
 611{
 612    return 1;
 613}
 614#else
 615/*
 616 * It is likely that some vCPUs will translate more code than others, so we
 617 * first try to set more regions than max_cpus, with those regions being of
 618 * reasonable size. If that's not possible we make do by evenly dividing
 619 * the code_gen_buffer among the vCPUs.
 620 */
 621static size_t tcg_n_regions(void)
 622{
 623    size_t i;
 624
 625    /* Use a single region if all we have is one vCPU thread */
 626#if !defined(CONFIG_USER_ONLY)
 627    MachineState *ms = MACHINE(qdev_get_machine());
 628    unsigned int max_cpus = ms->smp.max_cpus;
 629#endif
 630    if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
 631        return 1;
 632    }
 633
 634    /* Try to have more regions than max_cpus, with each region being >= 2 MB */
 635    for (i = 8; i > 0; i--) {
 636        size_t regions_per_thread = i;
 637        size_t region_size;
 638
 639        region_size = tcg_init_ctx.code_gen_buffer_size;
 640        region_size /= max_cpus * regions_per_thread;
 641
 642        if (region_size >= 2 * 1024u * 1024) {
 643            return max_cpus * regions_per_thread;
 644        }
 645    }
 646    /* If we can't, then just allocate one region per vCPU thread */
 647    return max_cpus;
 648}
 649#endif
 650
 651/*
 652 * Initializes region partitioning.
 653 *
 654 * Called at init time from the parent thread (i.e. the one calling
 655 * tcg_context_init), after the target's TCG globals have been set.
 656 *
 657 * Region partitioning works by splitting code_gen_buffer into separate regions,
 658 * and then assigning regions to TCG threads so that the threads can translate
 659 * code in parallel without synchronization.
 660 *
 661 * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
 662 * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
 663 * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
 664 * must have been parsed before calling this function, since it calls
 665 * qemu_tcg_mttcg_enabled().
 666 *
 667 * In user-mode we use a single region.  Having multiple regions in user-mode
 668 * is not supported, because the number of vCPU threads (recall that each thread
 669 * spawned by the guest corresponds to a vCPU thread) is only bounded by the
 670 * OS, and usually this number is huge (tens of thousands is not uncommon).
 671 * Thus, given this large bound on the number of vCPU threads and the fact
 672 * that code_gen_buffer is allocated at compile-time, we cannot guarantee
 673 * that the availability of at least one region per vCPU thread.
 674 *
 675 * However, this user-mode limitation is unlikely to be a significant problem
 676 * in practice. Multi-threaded guests share most if not all of their translated
 677 * code, which makes parallel code generation less appealing than in softmmu.
 678 */
 679void tcg_region_init(void)
 680{
 681    void *buf = tcg_init_ctx.code_gen_buffer;
 682    void *aligned;
 683    size_t size = tcg_init_ctx.code_gen_buffer_size;
 684    size_t page_size = qemu_real_host_page_size;
 685    size_t region_size;
 686    size_t n_regions;
 687    size_t i;
 688
 689    n_regions = tcg_n_regions();
 690
 691    /* The first region will be 'aligned - buf' bytes larger than the others */
 692    aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
 693    g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
 694    /*
 695     * Make region_size a multiple of page_size, using aligned as the start.
 696     * As a result of this we might end up with a few extra pages at the end of
 697     * the buffer; we will assign those to the last region.
 698     */
 699    region_size = (size - (aligned - buf)) / n_regions;
 700    region_size = QEMU_ALIGN_DOWN(region_size, page_size);
 701
 702    /* A region must have at least 2 pages; one code, one guard */
 703    g_assert(region_size >= 2 * page_size);
 704
 705    /* init the region struct */
 706    qemu_mutex_init(&region.lock);
 707    region.n = n_regions;
 708    region.size = region_size - page_size;
 709    region.stride = region_size;
 710    region.start = buf;
 711    region.start_aligned = aligned;
 712    /* page-align the end, since its last page will be a guard page */
 713    region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
 714    /* account for that last guard page */
 715    region.end -= page_size;
 716
 717    /* set guard pages */
 718    for (i = 0; i < region.n; i++) {
 719        void *start, *end;
 720        int rc;
 721
 722        tcg_region_bounds(i, &start, &end);
 723        rc = qemu_mprotect_none(end, page_size);
 724        g_assert(!rc);
 725    }
 726
 727    tcg_region_trees_init();
 728
 729    /* In user-mode we support only one ctx, so do the initial allocation now */
 730#ifdef CONFIG_USER_ONLY
 731    {
 732        bool err = tcg_region_initial_alloc__locked(tcg_ctx);
 733
 734        g_assert(!err);
 735    }
 736#endif
 737}
 738
 739static void alloc_tcg_plugin_context(TCGContext *s)
 740{
 741#ifdef CONFIG_PLUGIN
 742    s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
 743    s->plugin_tb->insns =
 744        g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
 745#endif
 746}
 747
 748/*
 749 * All TCG threads except the parent (i.e. the one that called tcg_context_init
 750 * and registered the target's TCG globals) must register with this function
 751 * before initiating translation.
 752 *
 753 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
 754 * of tcg_region_init() for the reasoning behind this.
 755 *
 756 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
 757 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
 758 * is not used anymore for translation once this function is called.
 759 *
 760 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
 761 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
 762 */
 763#ifdef CONFIG_USER_ONLY
 764void tcg_register_thread(void)
 765{
 766    tcg_ctx = &tcg_init_ctx;
 767}
 768#else
 769void tcg_register_thread(void)
 770{
 771    MachineState *ms = MACHINE(qdev_get_machine());
 772    TCGContext *s = g_malloc(sizeof(*s));
 773    unsigned int i, n;
 774    bool err;
 775
 776    *s = tcg_init_ctx;
 777
 778    /* Relink mem_base.  */
 779    for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
 780        if (tcg_init_ctx.temps[i].mem_base) {
 781            ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
 782            tcg_debug_assert(b >= 0 && b < n);
 783            s->temps[i].mem_base = &s->temps[b];
 784        }
 785    }
 786
 787    /* Claim an entry in tcg_ctxs */
 788    n = atomic_fetch_inc(&n_tcg_ctxs);
 789    g_assert(n < ms->smp.max_cpus);
 790    atomic_set(&tcg_ctxs[n], s);
 791
 792    if (n > 0) {
 793        alloc_tcg_plugin_context(s);
 794    }
 795
 796    tcg_ctx = s;
 797    qemu_mutex_lock(&region.lock);
 798    err = tcg_region_initial_alloc__locked(tcg_ctx);
 799    g_assert(!err);
 800    qemu_mutex_unlock(&region.lock);
 801}
 802#endif /* !CONFIG_USER_ONLY */
 803
 804/*
 805 * Returns the size (in bytes) of all translated code (i.e. from all regions)
 806 * currently in the cache.
 807 * See also: tcg_code_capacity()
 808 * Do not confuse with tcg_current_code_size(); that one applies to a single
 809 * TCG context.
 810 */
 811size_t tcg_code_size(void)
 812{
 813    unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
 814    unsigned int i;
 815    size_t total;
 816
 817    qemu_mutex_lock(&region.lock);
 818    total = region.agg_size_full;
 819    for (i = 0; i < n_ctxs; i++) {
 820        const TCGContext *s = atomic_read(&tcg_ctxs[i]);
 821        size_t size;
 822
 823        size = atomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
 824        g_assert(size <= s->code_gen_buffer_size);
 825        total += size;
 826    }
 827    qemu_mutex_unlock(&region.lock);
 828    return total;
 829}
 830
 831/*
 832 * Returns the code capacity (in bytes) of the entire cache, i.e. including all
 833 * regions.
 834 * See also: tcg_code_size()
 835 */
 836size_t tcg_code_capacity(void)
 837{
 838    size_t guard_size, capacity;
 839
 840    /* no need for synchronization; these variables are set at init time */
 841    guard_size = region.stride - region.size;
 842    capacity = region.end + guard_size - region.start;
 843    capacity -= region.n * (guard_size + TCG_HIGHWATER);
 844    return capacity;
 845}
 846
 847size_t tcg_tb_phys_invalidate_count(void)
 848{
 849    unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
 850    unsigned int i;
 851    size_t total = 0;
 852
 853    for (i = 0; i < n_ctxs; i++) {
 854        const TCGContext *s = atomic_read(&tcg_ctxs[i]);
 855
 856        total += atomic_read(&s->tb_phys_invalidate_count);
 857    }
 858    return total;
 859}
 860
 861/* pool based memory allocation */
 862void *tcg_malloc_internal(TCGContext *s, int size)
 863{
 864    TCGPool *p;
 865    int pool_size;
 866    
 867    if (size > TCG_POOL_CHUNK_SIZE) {
 868        /* big malloc: insert a new pool (XXX: could optimize) */
 869        p = g_malloc(sizeof(TCGPool) + size);
 870        p->size = size;
 871        p->next = s->pool_first_large;
 872        s->pool_first_large = p;
 873        return p->data;
 874    } else {
 875        p = s->pool_current;
 876        if (!p) {
 877            p = s->pool_first;
 878            if (!p)
 879                goto new_pool;
 880        } else {
 881            if (!p->next) {
 882            new_pool:
 883                pool_size = TCG_POOL_CHUNK_SIZE;
 884                p = g_malloc(sizeof(TCGPool) + pool_size);
 885                p->size = pool_size;
 886                p->next = NULL;
 887                if (s->pool_current) 
 888                    s->pool_current->next = p;
 889                else
 890                    s->pool_first = p;
 891            } else {
 892                p = p->next;
 893            }
 894        }
 895    }
 896    s->pool_current = p;
 897    s->pool_cur = p->data + size;
 898    s->pool_end = p->data + p->size;
 899    return p->data;
 900}
 901
 902void tcg_pool_reset(TCGContext *s)
 903{
 904    TCGPool *p, *t;
 905    for (p = s->pool_first_large; p; p = t) {
 906        t = p->next;
 907        g_free(p);
 908    }
 909    s->pool_first_large = NULL;
 910    s->pool_cur = s->pool_end = NULL;
 911    s->pool_current = NULL;
 912}
 913
 914typedef struct TCGHelperInfo {
 915    void *func;
 916    const char *name;
 917    unsigned flags;
 918    unsigned sizemask;
 919} TCGHelperInfo;
 920
 921#include "exec/helper-proto.h"
 922
 923static const TCGHelperInfo all_helpers[] = {
 924#include "exec/helper-tcg.h"
 925};
 926static GHashTable *helper_table;
 927
 928static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
 929static void process_op_defs(TCGContext *s);
 930static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
 931                                            TCGReg reg, const char *name);
 932
 933void tcg_context_init(TCGContext *s)
 934{
 935    int op, total_args, n, i;
 936    TCGOpDef *def;
 937    TCGArgConstraint *args_ct;
 938    int *sorted_args;
 939    TCGTemp *ts;
 940
 941    memset(s, 0, sizeof(*s));
 942    s->nb_globals = 0;
 943
 944    /* Count total number of arguments and allocate the corresponding
 945       space */
 946    total_args = 0;
 947    for(op = 0; op < NB_OPS; op++) {
 948        def = &tcg_op_defs[op];
 949        n = def->nb_iargs + def->nb_oargs;
 950        total_args += n;
 951    }
 952
 953    args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args);
 954    sorted_args = g_malloc(sizeof(int) * total_args);
 955
 956    for(op = 0; op < NB_OPS; op++) {
 957        def = &tcg_op_defs[op];
 958        def->args_ct = args_ct;
 959        def->sorted_args = sorted_args;
 960        n = def->nb_iargs + def->nb_oargs;
 961        sorted_args += n;
 962        args_ct += n;
 963    }
 964
 965    /* Register helpers.  */
 966    /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
 967    helper_table = g_hash_table_new(NULL, NULL);
 968
 969    for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
 970        g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
 971                            (gpointer)&all_helpers[i]);
 972    }
 973
 974    tcg_target_init(s);
 975    process_op_defs(s);
 976
 977    /* Reverse the order of the saved registers, assuming they're all at
 978       the start of tcg_target_reg_alloc_order.  */
 979    for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
 980        int r = tcg_target_reg_alloc_order[n];
 981        if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
 982            break;
 983        }
 984    }
 985    for (i = 0; i < n; ++i) {
 986        indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
 987    }
 988    for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
 989        indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
 990    }
 991
 992    alloc_tcg_plugin_context(s);
 993
 994    tcg_ctx = s;
 995    /*
 996     * In user-mode we simply share the init context among threads, since we
 997     * use a single region. See the documentation tcg_region_init() for the
 998     * reasoning behind this.
 999     * In softmmu we will have at most max_cpus TCG threads.
1000     */
1001#ifdef CONFIG_USER_ONLY
1002    tcg_ctxs = &tcg_ctx;
1003    n_tcg_ctxs = 1;
1004#else
1005    MachineState *ms = MACHINE(qdev_get_machine());
1006    unsigned int max_cpus = ms->smp.max_cpus;
1007    tcg_ctxs = g_new(TCGContext *, max_cpus);
1008#endif
1009
1010    tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1011    ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1012    cpu_env = temp_tcgv_ptr(ts);
1013}
1014
1015/*
1016 * Allocate TBs right before their corresponding translated code, making
1017 * sure that TBs and code are on different cache lines.
1018 */
1019TranslationBlock *tcg_tb_alloc(TCGContext *s)
1020{
1021    uintptr_t align = qemu_icache_linesize;
1022    TranslationBlock *tb;
1023    void *next;
1024
1025 retry:
1026    tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1027    next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1028
1029    if (unlikely(next > s->code_gen_highwater)) {
1030        if (tcg_region_alloc(s)) {
1031            return NULL;
1032        }
1033        goto retry;
1034    }
1035    atomic_set(&s->code_gen_ptr, next);
1036    s->data_gen_ptr = NULL;
1037    return tb;
1038}
1039
1040void tcg_prologue_init(TCGContext *s)
1041{
1042    size_t prologue_size, total_size;
1043    void *buf0, *buf1;
1044
1045    /* Put the prologue at the beginning of code_gen_buffer.  */
1046    buf0 = s->code_gen_buffer;
1047    total_size = s->code_gen_buffer_size;
1048    s->code_ptr = buf0;
1049    s->code_buf = buf0;
1050    s->data_gen_ptr = NULL;
1051    s->code_gen_prologue = buf0;
1052
1053    /* Compute a high-water mark, at which we voluntarily flush the buffer
1054       and start over.  The size here is arbitrary, significantly larger
1055       than we expect the code generation for any one opcode to require.  */
1056    s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
1057
1058#ifdef TCG_TARGET_NEED_POOL_LABELS
1059    s->pool_labels = NULL;
1060#endif
1061
1062    /* Generate the prologue.  */
1063    tcg_target_qemu_prologue(s);
1064
1065#ifdef TCG_TARGET_NEED_POOL_LABELS
1066    /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1067    {
1068        int result = tcg_out_pool_finalize(s);
1069        tcg_debug_assert(result == 0);
1070    }
1071#endif
1072
1073    buf1 = s->code_ptr;
1074    flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1);
1075
1076    /* Deduct the prologue from the buffer.  */
1077    prologue_size = tcg_current_code_size(s);
1078    s->code_gen_ptr = buf1;
1079    s->code_gen_buffer = buf1;
1080    s->code_buf = buf1;
1081    total_size -= prologue_size;
1082    s->code_gen_buffer_size = total_size;
1083
1084    tcg_register_jit(s->code_gen_buffer, total_size);
1085
1086#ifdef DEBUG_DISAS
1087    if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1088        qemu_log_lock();
1089        qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
1090        if (s->data_gen_ptr) {
1091            size_t code_size = s->data_gen_ptr - buf0;
1092            size_t data_size = prologue_size - code_size;
1093            size_t i;
1094
1095            log_disas(buf0, code_size);
1096
1097            for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1098                if (sizeof(tcg_target_ulong) == 8) {
1099                    qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1100                             (uintptr_t)s->data_gen_ptr + i,
1101                             *(uint64_t *)(s->data_gen_ptr + i));
1102                } else {
1103                    qemu_log("0x%08" PRIxPTR ":  .long  0x%08x\n",
1104                             (uintptr_t)s->data_gen_ptr + i,
1105                             *(uint32_t *)(s->data_gen_ptr + i));
1106                }
1107            }
1108        } else {
1109            log_disas(buf0, prologue_size);
1110        }
1111        qemu_log("\n");
1112        qemu_log_flush();
1113        qemu_log_unlock();
1114    }
1115#endif
1116
1117    /* Assert that goto_ptr is implemented completely.  */
1118    if (TCG_TARGET_HAS_goto_ptr) {
1119        tcg_debug_assert(s->code_gen_epilogue != NULL);
1120    }
1121}
1122
1123void tcg_func_start(TCGContext *s)
1124{
1125    tcg_pool_reset(s);
1126    s->nb_temps = s->nb_globals;
1127
1128    /* No temps have been previously allocated for size or locality.  */
1129    memset(s->free_temps, 0, sizeof(s->free_temps));
1130
1131    s->nb_ops = 0;
1132    s->nb_labels = 0;
1133    s->current_frame_offset = s->frame_start;
1134
1135#ifdef CONFIG_DEBUG_TCG
1136    s->goto_tb_issue_mask = 0;
1137#endif
1138
1139    QTAILQ_INIT(&s->ops);
1140    QTAILQ_INIT(&s->free_ops);
1141    QSIMPLEQ_INIT(&s->labels);
1142}
1143
1144static inline TCGTemp *tcg_temp_alloc(TCGContext *s)
1145{
1146    int n = s->nb_temps++;
1147    tcg_debug_assert(n < TCG_MAX_TEMPS);
1148    return memset(&s->temps[n], 0, sizeof(TCGTemp));
1149}
1150
1151static inline TCGTemp *tcg_global_alloc(TCGContext *s)
1152{
1153    TCGTemp *ts;
1154
1155    tcg_debug_assert(s->nb_globals == s->nb_temps);
1156    s->nb_globals++;
1157    ts = tcg_temp_alloc(s);
1158    ts->temp_global = 1;
1159
1160    return ts;
1161}
1162
1163static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1164                                            TCGReg reg, const char *name)
1165{
1166    TCGTemp *ts;
1167
1168    if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1169        tcg_abort();
1170    }
1171
1172    ts = tcg_global_alloc(s);
1173    ts->base_type = type;
1174    ts->type = type;
1175    ts->fixed_reg = 1;
1176    ts->reg = reg;
1177    ts->name = name;
1178    tcg_regset_set_reg(s->reserved_regs, reg);
1179
1180    return ts;
1181}
1182
1183void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1184{
1185    s->frame_start = start;
1186    s->frame_end = start + size;
1187    s->frame_temp
1188        = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1189}
1190
1191TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1192                                     intptr_t offset, const char *name)
1193{
1194    TCGContext *s = tcg_ctx;
1195    TCGTemp *base_ts = tcgv_ptr_temp(base);
1196    TCGTemp *ts = tcg_global_alloc(s);
1197    int indirect_reg = 0, bigendian = 0;
1198#ifdef HOST_WORDS_BIGENDIAN
1199    bigendian = 1;
1200#endif
1201
1202    if (!base_ts->fixed_reg) {
1203        /* We do not support double-indirect registers.  */
1204        tcg_debug_assert(!base_ts->indirect_reg);
1205        base_ts->indirect_base = 1;
1206        s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1207                            ? 2 : 1);
1208        indirect_reg = 1;
1209    }
1210
1211    if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1212        TCGTemp *ts2 = tcg_global_alloc(s);
1213        char buf[64];
1214
1215        ts->base_type = TCG_TYPE_I64;
1216        ts->type = TCG_TYPE_I32;
1217        ts->indirect_reg = indirect_reg;
1218        ts->mem_allocated = 1;
1219        ts->mem_base = base_ts;
1220        ts->mem_offset = offset + bigendian * 4;
1221        pstrcpy(buf, sizeof(buf), name);
1222        pstrcat(buf, sizeof(buf), "_0");
1223        ts->name = strdup(buf);
1224
1225        tcg_debug_assert(ts2 == ts + 1);
1226        ts2->base_type = TCG_TYPE_I64;
1227        ts2->type = TCG_TYPE_I32;
1228        ts2->indirect_reg = indirect_reg;
1229        ts2->mem_allocated = 1;
1230        ts2->mem_base = base_ts;
1231        ts2->mem_offset = offset + (1 - bigendian) * 4;
1232        pstrcpy(buf, sizeof(buf), name);
1233        pstrcat(buf, sizeof(buf), "_1");
1234        ts2->name = strdup(buf);
1235    } else {
1236        ts->base_type = type;
1237        ts->type = type;
1238        ts->indirect_reg = indirect_reg;
1239        ts->mem_allocated = 1;
1240        ts->mem_base = base_ts;
1241        ts->mem_offset = offset;
1242        ts->name = name;
1243    }
1244    return ts;
1245}
1246
1247TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1248{
1249    TCGContext *s = tcg_ctx;
1250    TCGTemp *ts;
1251    int idx, k;
1252
1253    k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1254    idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1255    if (idx < TCG_MAX_TEMPS) {
1256        /* There is already an available temp with the right type.  */
1257        clear_bit(idx, s->free_temps[k].l);
1258
1259        ts = &s->temps[idx];
1260        ts->temp_allocated = 1;
1261        tcg_debug_assert(ts->base_type == type);
1262        tcg_debug_assert(ts->temp_local == temp_local);
1263    } else {
1264        ts = tcg_temp_alloc(s);
1265        if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1266            TCGTemp *ts2 = tcg_temp_alloc(s);
1267
1268            ts->base_type = type;
1269            ts->type = TCG_TYPE_I32;
1270            ts->temp_allocated = 1;
1271            ts->temp_local = temp_local;
1272
1273            tcg_debug_assert(ts2 == ts + 1);
1274            ts2->base_type = TCG_TYPE_I64;
1275            ts2->type = TCG_TYPE_I32;
1276            ts2->temp_allocated = 1;
1277            ts2->temp_local = temp_local;
1278        } else {
1279            ts->base_type = type;
1280            ts->type = type;
1281            ts->temp_allocated = 1;
1282            ts->temp_local = temp_local;
1283        }
1284    }
1285
1286#if defined(CONFIG_DEBUG_TCG)
1287    s->temps_in_use++;
1288#endif
1289    return ts;
1290}
1291
1292TCGv_vec tcg_temp_new_vec(TCGType type)
1293{
1294    TCGTemp *t;
1295
1296#ifdef CONFIG_DEBUG_TCG
1297    switch (type) {
1298    case TCG_TYPE_V64:
1299        assert(TCG_TARGET_HAS_v64);
1300        break;
1301    case TCG_TYPE_V128:
1302        assert(TCG_TARGET_HAS_v128);
1303        break;
1304    case TCG_TYPE_V256:
1305        assert(TCG_TARGET_HAS_v256);
1306        break;
1307    default:
1308        g_assert_not_reached();
1309    }
1310#endif
1311
1312    t = tcg_temp_new_internal(type, 0);
1313    return temp_tcgv_vec(t);
1314}
1315
1316/* Create a new temp of the same type as an existing temp.  */
1317TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1318{
1319    TCGTemp *t = tcgv_vec_temp(match);
1320
1321    tcg_debug_assert(t->temp_allocated != 0);
1322
1323    t = tcg_temp_new_internal(t->base_type, 0);
1324    return temp_tcgv_vec(t);
1325}
1326
1327void tcg_temp_free_internal(TCGTemp *ts)
1328{
1329    TCGContext *s = tcg_ctx;
1330    int k, idx;
1331
1332#if defined(CONFIG_DEBUG_TCG)
1333    s->temps_in_use--;
1334    if (s->temps_in_use < 0) {
1335        fprintf(stderr, "More temporaries freed than allocated!\n");
1336    }
1337#endif
1338
1339    tcg_debug_assert(ts->temp_global == 0);
1340    tcg_debug_assert(ts->temp_allocated != 0);
1341    ts->temp_allocated = 0;
1342
1343    idx = temp_idx(ts);
1344    k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0);
1345    set_bit(idx, s->free_temps[k].l);
1346}
1347
1348TCGv_i32 tcg_const_i32(int32_t val)
1349{
1350    TCGv_i32 t0;
1351    t0 = tcg_temp_new_i32();
1352    tcg_gen_movi_i32(t0, val);
1353    return t0;
1354}
1355
1356TCGv_i64 tcg_const_i64(int64_t val)
1357{
1358    TCGv_i64 t0;
1359    t0 = tcg_temp_new_i64();
1360    tcg_gen_movi_i64(t0, val);
1361    return t0;
1362}
1363
1364TCGv_i32 tcg_const_local_i32(int32_t val)
1365{
1366    TCGv_i32 t0;
1367    t0 = tcg_temp_local_new_i32();
1368    tcg_gen_movi_i32(t0, val);
1369    return t0;
1370}
1371
1372TCGv_i64 tcg_const_local_i64(int64_t val)
1373{
1374    TCGv_i64 t0;
1375    t0 = tcg_temp_local_new_i64();
1376    tcg_gen_movi_i64(t0, val);
1377    return t0;
1378}
1379
1380#if defined(CONFIG_DEBUG_TCG)
1381void tcg_clear_temp_count(void)
1382{
1383    TCGContext *s = tcg_ctx;
1384    s->temps_in_use = 0;
1385}
1386
1387int tcg_check_temp_count(void)
1388{
1389    TCGContext *s = tcg_ctx;
1390    if (s->temps_in_use) {
1391        /* Clear the count so that we don't give another
1392         * warning immediately next time around.
1393         */
1394        s->temps_in_use = 0;
1395        return 1;
1396    }
1397    return 0;
1398}
1399#endif
1400
1401/* Return true if OP may appear in the opcode stream.
1402   Test the runtime variable that controls each opcode.  */
1403bool tcg_op_supported(TCGOpcode op)
1404{
1405    const bool have_vec
1406        = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1407
1408    switch (op) {
1409    case INDEX_op_discard:
1410    case INDEX_op_set_label:
1411    case INDEX_op_call:
1412    case INDEX_op_br:
1413    case INDEX_op_mb:
1414    case INDEX_op_insn_start:
1415    case INDEX_op_exit_tb:
1416    case INDEX_op_goto_tb:
1417    case INDEX_op_qemu_ld_i32:
1418    case INDEX_op_qemu_st_i32:
1419    case INDEX_op_qemu_ld_i64:
1420    case INDEX_op_qemu_st_i64:
1421        return true;
1422
1423    case INDEX_op_goto_ptr:
1424        return TCG_TARGET_HAS_goto_ptr;
1425
1426    case INDEX_op_mov_i32:
1427    case INDEX_op_movi_i32:
1428    case INDEX_op_setcond_i32:
1429    case INDEX_op_brcond_i32:
1430    case INDEX_op_ld8u_i32:
1431    case INDEX_op_ld8s_i32:
1432    case INDEX_op_ld16u_i32:
1433    case INDEX_op_ld16s_i32:
1434    case INDEX_op_ld_i32:
1435    case INDEX_op_st8_i32:
1436    case INDEX_op_st16_i32:
1437    case INDEX_op_st_i32:
1438    case INDEX_op_add_i32:
1439    case INDEX_op_sub_i32:
1440    case INDEX_op_mul_i32:
1441    case INDEX_op_and_i32:
1442    case INDEX_op_or_i32:
1443    case INDEX_op_xor_i32:
1444    case INDEX_op_shl_i32:
1445    case INDEX_op_shr_i32:
1446    case INDEX_op_sar_i32:
1447        return true;
1448
1449    case INDEX_op_movcond_i32:
1450        return TCG_TARGET_HAS_movcond_i32;
1451    case INDEX_op_div_i32:
1452    case INDEX_op_divu_i32:
1453        return TCG_TARGET_HAS_div_i32;
1454    case INDEX_op_rem_i32:
1455    case INDEX_op_remu_i32:
1456        return TCG_TARGET_HAS_rem_i32;
1457    case INDEX_op_div2_i32:
1458    case INDEX_op_divu2_i32:
1459        return TCG_TARGET_HAS_div2_i32;
1460    case INDEX_op_rotl_i32:
1461    case INDEX_op_rotr_i32:
1462        return TCG_TARGET_HAS_rot_i32;
1463    case INDEX_op_deposit_i32:
1464        return TCG_TARGET_HAS_deposit_i32;
1465    case INDEX_op_extract_i32:
1466        return TCG_TARGET_HAS_extract_i32;
1467    case INDEX_op_sextract_i32:
1468        return TCG_TARGET_HAS_sextract_i32;
1469    case INDEX_op_extract2_i32:
1470        return TCG_TARGET_HAS_extract2_i32;
1471    case INDEX_op_add2_i32:
1472        return TCG_TARGET_HAS_add2_i32;
1473    case INDEX_op_sub2_i32:
1474        return TCG_TARGET_HAS_sub2_i32;
1475    case INDEX_op_mulu2_i32:
1476        return TCG_TARGET_HAS_mulu2_i32;
1477    case INDEX_op_muls2_i32:
1478        return TCG_TARGET_HAS_muls2_i32;
1479    case INDEX_op_muluh_i32:
1480        return TCG_TARGET_HAS_muluh_i32;
1481    case INDEX_op_mulsh_i32:
1482        return TCG_TARGET_HAS_mulsh_i32;
1483    case INDEX_op_ext8s_i32:
1484        return TCG_TARGET_HAS_ext8s_i32;
1485    case INDEX_op_ext16s_i32:
1486        return TCG_TARGET_HAS_ext16s_i32;
1487    case INDEX_op_ext8u_i32:
1488        return TCG_TARGET_HAS_ext8u_i32;
1489    case INDEX_op_ext16u_i32:
1490        return TCG_TARGET_HAS_ext16u_i32;
1491    case INDEX_op_bswap16_i32:
1492        return TCG_TARGET_HAS_bswap16_i32;
1493    case INDEX_op_bswap32_i32:
1494        return TCG_TARGET_HAS_bswap32_i32;
1495    case INDEX_op_not_i32:
1496        return TCG_TARGET_HAS_not_i32;
1497    case INDEX_op_neg_i32:
1498        return TCG_TARGET_HAS_neg_i32;
1499    case INDEX_op_andc_i32:
1500        return TCG_TARGET_HAS_andc_i32;
1501    case INDEX_op_orc_i32:
1502        return TCG_TARGET_HAS_orc_i32;
1503    case INDEX_op_eqv_i32:
1504        return TCG_TARGET_HAS_eqv_i32;
1505    case INDEX_op_nand_i32:
1506        return TCG_TARGET_HAS_nand_i32;
1507    case INDEX_op_nor_i32:
1508        return TCG_TARGET_HAS_nor_i32;
1509    case INDEX_op_clz_i32:
1510        return TCG_TARGET_HAS_clz_i32;
1511    case INDEX_op_ctz_i32:
1512        return TCG_TARGET_HAS_ctz_i32;
1513    case INDEX_op_ctpop_i32:
1514        return TCG_TARGET_HAS_ctpop_i32;
1515
1516    case INDEX_op_brcond2_i32:
1517    case INDEX_op_setcond2_i32:
1518        return TCG_TARGET_REG_BITS == 32;
1519
1520    case INDEX_op_mov_i64:
1521    case INDEX_op_movi_i64:
1522    case INDEX_op_setcond_i64:
1523    case INDEX_op_brcond_i64:
1524    case INDEX_op_ld8u_i64:
1525    case INDEX_op_ld8s_i64:
1526    case INDEX_op_ld16u_i64:
1527    case INDEX_op_ld16s_i64:
1528    case INDEX_op_ld32u_i64:
1529    case INDEX_op_ld32s_i64:
1530    case INDEX_op_ld_i64:
1531    case INDEX_op_st8_i64:
1532    case INDEX_op_st16_i64:
1533    case INDEX_op_st32_i64:
1534    case INDEX_op_st_i64:
1535    case INDEX_op_add_i64:
1536    case INDEX_op_sub_i64:
1537    case INDEX_op_mul_i64:
1538    case INDEX_op_and_i64:
1539    case INDEX_op_or_i64:
1540    case INDEX_op_xor_i64:
1541    case INDEX_op_shl_i64:
1542    case INDEX_op_shr_i64:
1543    case INDEX_op_sar_i64:
1544    case INDEX_op_ext_i32_i64:
1545    case INDEX_op_extu_i32_i64:
1546        return TCG_TARGET_REG_BITS == 64;
1547
1548    case INDEX_op_movcond_i64:
1549        return TCG_TARGET_HAS_movcond_i64;
1550    case INDEX_op_div_i64:
1551    case INDEX_op_divu_i64:
1552        return TCG_TARGET_HAS_div_i64;
1553    case INDEX_op_rem_i64:
1554    case INDEX_op_remu_i64:
1555        return TCG_TARGET_HAS_rem_i64;
1556    case INDEX_op_div2_i64:
1557    case INDEX_op_divu2_i64:
1558        return TCG_TARGET_HAS_div2_i64;
1559    case INDEX_op_rotl_i64:
1560    case INDEX_op_rotr_i64:
1561        return TCG_TARGET_HAS_rot_i64;
1562    case INDEX_op_deposit_i64:
1563        return TCG_TARGET_HAS_deposit_i64;
1564    case INDEX_op_extract_i64:
1565        return TCG_TARGET_HAS_extract_i64;
1566    case INDEX_op_sextract_i64:
1567        return TCG_TARGET_HAS_sextract_i64;
1568    case INDEX_op_extract2_i64:
1569        return TCG_TARGET_HAS_extract2_i64;
1570    case INDEX_op_extrl_i64_i32:
1571        return TCG_TARGET_HAS_extrl_i64_i32;
1572    case INDEX_op_extrh_i64_i32:
1573        return TCG_TARGET_HAS_extrh_i64_i32;
1574    case INDEX_op_ext8s_i64:
1575        return TCG_TARGET_HAS_ext8s_i64;
1576    case INDEX_op_ext16s_i64:
1577        return TCG_TARGET_HAS_ext16s_i64;
1578    case INDEX_op_ext32s_i64:
1579        return TCG_TARGET_HAS_ext32s_i64;
1580    case INDEX_op_ext8u_i64:
1581        return TCG_TARGET_HAS_ext8u_i64;
1582    case INDEX_op_ext16u_i64:
1583        return TCG_TARGET_HAS_ext16u_i64;
1584    case INDEX_op_ext32u_i64:
1585        return TCG_TARGET_HAS_ext32u_i64;
1586    case INDEX_op_bswap16_i64:
1587        return TCG_TARGET_HAS_bswap16_i64;
1588    case INDEX_op_bswap32_i64:
1589        return TCG_TARGET_HAS_bswap32_i64;
1590    case INDEX_op_bswap64_i64:
1591        return TCG_TARGET_HAS_bswap64_i64;
1592    case INDEX_op_not_i64:
1593        return TCG_TARGET_HAS_not_i64;
1594    case INDEX_op_neg_i64:
1595        return TCG_TARGET_HAS_neg_i64;
1596    case INDEX_op_andc_i64:
1597        return TCG_TARGET_HAS_andc_i64;
1598    case INDEX_op_orc_i64:
1599        return TCG_TARGET_HAS_orc_i64;
1600    case INDEX_op_eqv_i64:
1601        return TCG_TARGET_HAS_eqv_i64;
1602    case INDEX_op_nand_i64:
1603        return TCG_TARGET_HAS_nand_i64;
1604    case INDEX_op_nor_i64:
1605        return TCG_TARGET_HAS_nor_i64;
1606    case INDEX_op_clz_i64:
1607        return TCG_TARGET_HAS_clz_i64;
1608    case INDEX_op_ctz_i64:
1609        return TCG_TARGET_HAS_ctz_i64;
1610    case INDEX_op_ctpop_i64:
1611        return TCG_TARGET_HAS_ctpop_i64;
1612    case INDEX_op_add2_i64:
1613        return TCG_TARGET_HAS_add2_i64;
1614    case INDEX_op_sub2_i64:
1615        return TCG_TARGET_HAS_sub2_i64;
1616    case INDEX_op_mulu2_i64:
1617        return TCG_TARGET_HAS_mulu2_i64;
1618    case INDEX_op_muls2_i64:
1619        return TCG_TARGET_HAS_muls2_i64;
1620    case INDEX_op_muluh_i64:
1621        return TCG_TARGET_HAS_muluh_i64;
1622    case INDEX_op_mulsh_i64:
1623        return TCG_TARGET_HAS_mulsh_i64;
1624
1625    case INDEX_op_mov_vec:
1626    case INDEX_op_dup_vec:
1627    case INDEX_op_dupi_vec:
1628    case INDEX_op_dupm_vec:
1629    case INDEX_op_ld_vec:
1630    case INDEX_op_st_vec:
1631    case INDEX_op_add_vec:
1632    case INDEX_op_sub_vec:
1633    case INDEX_op_and_vec:
1634    case INDEX_op_or_vec:
1635    case INDEX_op_xor_vec:
1636    case INDEX_op_cmp_vec:
1637        return have_vec;
1638    case INDEX_op_dup2_vec:
1639        return have_vec && TCG_TARGET_REG_BITS == 32;
1640    case INDEX_op_not_vec:
1641        return have_vec && TCG_TARGET_HAS_not_vec;
1642    case INDEX_op_neg_vec:
1643        return have_vec && TCG_TARGET_HAS_neg_vec;
1644    case INDEX_op_abs_vec:
1645        return have_vec && TCG_TARGET_HAS_abs_vec;
1646    case INDEX_op_andc_vec:
1647        return have_vec && TCG_TARGET_HAS_andc_vec;
1648    case INDEX_op_orc_vec:
1649        return have_vec && TCG_TARGET_HAS_orc_vec;
1650    case INDEX_op_mul_vec:
1651        return have_vec && TCG_TARGET_HAS_mul_vec;
1652    case INDEX_op_shli_vec:
1653    case INDEX_op_shri_vec:
1654    case INDEX_op_sari_vec:
1655        return have_vec && TCG_TARGET_HAS_shi_vec;
1656    case INDEX_op_shls_vec:
1657    case INDEX_op_shrs_vec:
1658    case INDEX_op_sars_vec:
1659        return have_vec && TCG_TARGET_HAS_shs_vec;
1660    case INDEX_op_shlv_vec:
1661    case INDEX_op_shrv_vec:
1662    case INDEX_op_sarv_vec:
1663        return have_vec && TCG_TARGET_HAS_shv_vec;
1664    case INDEX_op_ssadd_vec:
1665    case INDEX_op_usadd_vec:
1666    case INDEX_op_sssub_vec:
1667    case INDEX_op_ussub_vec:
1668        return have_vec && TCG_TARGET_HAS_sat_vec;
1669    case INDEX_op_smin_vec:
1670    case INDEX_op_umin_vec:
1671    case INDEX_op_smax_vec:
1672    case INDEX_op_umax_vec:
1673        return have_vec && TCG_TARGET_HAS_minmax_vec;
1674    case INDEX_op_bitsel_vec:
1675        return have_vec && TCG_TARGET_HAS_bitsel_vec;
1676    case INDEX_op_cmpsel_vec:
1677        return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1678
1679    default:
1680        tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1681        return true;
1682    }
1683}
1684
1685/* Note: we convert the 64 bit args to 32 bit and do some alignment
1686   and endian swap. Maybe it would be better to do the alignment
1687   and endian swap in tcg_reg_alloc_call(). */
1688void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1689{
1690    int i, real_args, nb_rets, pi;
1691    unsigned sizemask, flags;
1692    TCGHelperInfo *info;
1693    TCGOp *op;
1694
1695    info = g_hash_table_lookup(helper_table, (gpointer)func);
1696    flags = info->flags;
1697    sizemask = info->sizemask;
1698
1699#ifdef CONFIG_PLUGIN
1700    /* detect non-plugin helpers */
1701    if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1702        tcg_ctx->plugin_insn->calls_helpers = true;
1703    }
1704#endif
1705
1706#if defined(__sparc__) && !defined(__arch64__) \
1707    && !defined(CONFIG_TCG_INTERPRETER)
1708    /* We have 64-bit values in one register, but need to pass as two
1709       separate parameters.  Split them.  */
1710    int orig_sizemask = sizemask;
1711    int orig_nargs = nargs;
1712    TCGv_i64 retl, reth;
1713    TCGTemp *split_args[MAX_OPC_PARAM];
1714
1715    retl = NULL;
1716    reth = NULL;
1717    if (sizemask != 0) {
1718        for (i = real_args = 0; i < nargs; ++i) {
1719            int is_64bit = sizemask & (1 << (i+1)*2);
1720            if (is_64bit) {
1721                TCGv_i64 orig = temp_tcgv_i64(args[i]);
1722                TCGv_i32 h = tcg_temp_new_i32();
1723                TCGv_i32 l = tcg_temp_new_i32();
1724                tcg_gen_extr_i64_i32(l, h, orig);
1725                split_args[real_args++] = tcgv_i32_temp(h);
1726                split_args[real_args++] = tcgv_i32_temp(l);
1727            } else {
1728                split_args[real_args++] = args[i];
1729            }
1730        }
1731        nargs = real_args;
1732        args = split_args;
1733        sizemask = 0;
1734    }
1735#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1736    for (i = 0; i < nargs; ++i) {
1737        int is_64bit = sizemask & (1 << (i+1)*2);
1738        int is_signed = sizemask & (2 << (i+1)*2);
1739        if (!is_64bit) {
1740            TCGv_i64 temp = tcg_temp_new_i64();
1741            TCGv_i64 orig = temp_tcgv_i64(args[i]);
1742            if (is_signed) {
1743                tcg_gen_ext32s_i64(temp, orig);
1744            } else {
1745                tcg_gen_ext32u_i64(temp, orig);
1746            }
1747            args[i] = tcgv_i64_temp(temp);
1748        }
1749    }
1750#endif /* TCG_TARGET_EXTEND_ARGS */
1751
1752    op = tcg_emit_op(INDEX_op_call);
1753
1754    pi = 0;
1755    if (ret != NULL) {
1756#if defined(__sparc__) && !defined(__arch64__) \
1757    && !defined(CONFIG_TCG_INTERPRETER)
1758        if (orig_sizemask & 1) {
1759            /* The 32-bit ABI is going to return the 64-bit value in
1760               the %o0/%o1 register pair.  Prepare for this by using
1761               two return temporaries, and reassemble below.  */
1762            retl = tcg_temp_new_i64();
1763            reth = tcg_temp_new_i64();
1764            op->args[pi++] = tcgv_i64_arg(reth);
1765            op->args[pi++] = tcgv_i64_arg(retl);
1766            nb_rets = 2;
1767        } else {
1768            op->args[pi++] = temp_arg(ret);
1769            nb_rets = 1;
1770        }
1771#else
1772        if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
1773#ifdef HOST_WORDS_BIGENDIAN
1774            op->args[pi++] = temp_arg(ret + 1);
1775            op->args[pi++] = temp_arg(ret);
1776#else
1777            op->args[pi++] = temp_arg(ret);
1778            op->args[pi++] = temp_arg(ret + 1);
1779#endif
1780            nb_rets = 2;
1781        } else {
1782            op->args[pi++] = temp_arg(ret);
1783            nb_rets = 1;
1784        }
1785#endif
1786    } else {
1787        nb_rets = 0;
1788    }
1789    TCGOP_CALLO(op) = nb_rets;
1790
1791    real_args = 0;
1792    for (i = 0; i < nargs; i++) {
1793        int is_64bit = sizemask & (1 << (i+1)*2);
1794        if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1795#ifdef TCG_TARGET_CALL_ALIGN_ARGS
1796            /* some targets want aligned 64 bit args */
1797            if (real_args & 1) {
1798                op->args[pi++] = TCG_CALL_DUMMY_ARG;
1799                real_args++;
1800            }
1801#endif
1802           /* If stack grows up, then we will be placing successive
1803              arguments at lower addresses, which means we need to
1804              reverse the order compared to how we would normally
1805              treat either big or little-endian.  For those arguments
1806              that will wind up in registers, this still works for
1807              HPPA (the only current STACK_GROWSUP target) since the
1808              argument registers are *also* allocated in decreasing
1809              order.  If another such target is added, this logic may
1810              have to get more complicated to differentiate between
1811              stack arguments and register arguments.  */
1812#if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1813            op->args[pi++] = temp_arg(args[i] + 1);
1814            op->args[pi++] = temp_arg(args[i]);
1815#else
1816            op->args[pi++] = temp_arg(args[i]);
1817            op->args[pi++] = temp_arg(args[i] + 1);
1818#endif
1819            real_args += 2;
1820            continue;
1821        }
1822
1823        op->args[pi++] = temp_arg(args[i]);
1824        real_args++;
1825    }
1826    op->args[pi++] = (uintptr_t)func;
1827    op->args[pi++] = flags;
1828    TCGOP_CALLI(op) = real_args;
1829
1830    /* Make sure the fields didn't overflow.  */
1831    tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1832    tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1833
1834#if defined(__sparc__) && !defined(__arch64__) \
1835    && !defined(CONFIG_TCG_INTERPRETER)
1836    /* Free all of the parts we allocated above.  */
1837    for (i = real_args = 0; i < orig_nargs; ++i) {
1838        int is_64bit = orig_sizemask & (1 << (i+1)*2);
1839        if (is_64bit) {
1840            tcg_temp_free_internal(args[real_args++]);
1841            tcg_temp_free_internal(args[real_args++]);
1842        } else {
1843            real_args++;
1844        }
1845    }
1846    if (orig_sizemask & 1) {
1847        /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
1848           Note that describing these as TCGv_i64 eliminates an unnecessary
1849           zero-extension that tcg_gen_concat_i32_i64 would create.  */
1850        tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1851        tcg_temp_free_i64(retl);
1852        tcg_temp_free_i64(reth);
1853    }
1854#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1855    for (i = 0; i < nargs; ++i) {
1856        int is_64bit = sizemask & (1 << (i+1)*2);
1857        if (!is_64bit) {
1858            tcg_temp_free_internal(args[i]);
1859        }
1860    }
1861#endif /* TCG_TARGET_EXTEND_ARGS */
1862}
1863
1864static void tcg_reg_alloc_start(TCGContext *s)
1865{
1866    int i, n;
1867    TCGTemp *ts;
1868
1869    for (i = 0, n = s->nb_globals; i < n; i++) {
1870        ts = &s->temps[i];
1871        ts->val_type = (ts->fixed_reg ? TEMP_VAL_REG : TEMP_VAL_MEM);
1872    }
1873    for (n = s->nb_temps; i < n; i++) {
1874        ts = &s->temps[i];
1875        ts->val_type = (ts->temp_local ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
1876        ts->mem_allocated = 0;
1877        ts->fixed_reg = 0;
1878    }
1879
1880    memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1881}
1882
1883static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1884                                 TCGTemp *ts)
1885{
1886    int idx = temp_idx(ts);
1887
1888    if (ts->temp_global) {
1889        pstrcpy(buf, buf_size, ts->name);
1890    } else if (ts->temp_local) {
1891        snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1892    } else {
1893        snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1894    }
1895    return buf;
1896}
1897
1898static char *tcg_get_arg_str(TCGContext *s, char *buf,
1899                             int buf_size, TCGArg arg)
1900{
1901    return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1902}
1903
1904/* Find helper name.  */
1905static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
1906{
1907    const char *ret = NULL;
1908    if (helper_table) {
1909        TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
1910        if (info) {
1911            ret = info->name;
1912        }
1913    }
1914    return ret;
1915}
1916
1917static const char * const cond_name[] =
1918{
1919    [TCG_COND_NEVER] = "never",
1920    [TCG_COND_ALWAYS] = "always",
1921    [TCG_COND_EQ] = "eq",
1922    [TCG_COND_NE] = "ne",
1923    [TCG_COND_LT] = "lt",
1924    [TCG_COND_GE] = "ge",
1925    [TCG_COND_LE] = "le",
1926    [TCG_COND_GT] = "gt",
1927    [TCG_COND_LTU] = "ltu",
1928    [TCG_COND_GEU] = "geu",
1929    [TCG_COND_LEU] = "leu",
1930    [TCG_COND_GTU] = "gtu"
1931};
1932
1933static const char * const ldst_name[] =
1934{
1935    [MO_UB]   = "ub",
1936    [MO_SB]   = "sb",
1937    [MO_LEUW] = "leuw",
1938    [MO_LESW] = "lesw",
1939    [MO_LEUL] = "leul",
1940    [MO_LESL] = "lesl",
1941    [MO_LEQ]  = "leq",
1942    [MO_BEUW] = "beuw",
1943    [MO_BESW] = "besw",
1944    [MO_BEUL] = "beul",
1945    [MO_BESL] = "besl",
1946    [MO_BEQ]  = "beq",
1947};
1948
1949static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1950#ifdef TARGET_ALIGNED_ONLY
1951    [MO_UNALN >> MO_ASHIFT]    = "un+",
1952    [MO_ALIGN >> MO_ASHIFT]    = "",
1953#else
1954    [MO_UNALN >> MO_ASHIFT]    = "",
1955    [MO_ALIGN >> MO_ASHIFT]    = "al+",
1956#endif
1957    [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
1958    [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
1959    [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
1960    [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1961    [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1962    [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1963};
1964
1965static inline bool tcg_regset_single(TCGRegSet d)
1966{
1967    return (d & (d - 1)) == 0;
1968}
1969
1970static inline TCGReg tcg_regset_first(TCGRegSet d)
1971{
1972    if (TCG_TARGET_NB_REGS <= 32) {
1973        return ctz32(d);
1974    } else {
1975        return ctz64(d);
1976    }
1977}
1978
1979static void tcg_dump_ops(TCGContext *s, bool have_prefs)
1980{
1981    char buf[128];
1982    TCGOp *op;
1983
1984    QTAILQ_FOREACH(op, &s->ops, link) {
1985        int i, k, nb_oargs, nb_iargs, nb_cargs;
1986        const TCGOpDef *def;
1987        TCGOpcode c;
1988        int col = 0;
1989
1990        c = op->opc;
1991        def = &tcg_op_defs[c];
1992
1993        if (c == INDEX_op_insn_start) {
1994            nb_oargs = 0;
1995            col += qemu_log("\n ----");
1996
1997            for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
1998                target_ulong a;
1999#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2000                a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
2001#else
2002                a = op->args[i];
2003#endif
2004                col += qemu_log(" " TARGET_FMT_lx, a);
2005            }
2006        } else if (c == INDEX_op_call) {
2007            /* variable number of arguments */
2008            nb_oargs = TCGOP_CALLO(op);
2009            nb_iargs = TCGOP_CALLI(op);
2010            nb_cargs = def->nb_cargs;
2011
2012            /* function name, flags, out args */
2013            col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
2014                            tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
2015                            op->args[nb_oargs + nb_iargs + 1], nb_oargs);
2016            for (i = 0; i < nb_oargs; i++) {
2017                col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2018                                                       op->args[i]));
2019            }
2020            for (i = 0; i < nb_iargs; i++) {
2021                TCGArg arg = op->args[nb_oargs + i];
2022                const char *t = "<dummy>";
2023                if (arg != TCG_CALL_DUMMY_ARG) {
2024                    t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2025                }
2026                col += qemu_log(",%s", t);
2027            }
2028        } else {
2029            col += qemu_log(" %s ", def->name);
2030
2031            nb_oargs = def->nb_oargs;
2032            nb_iargs = def->nb_iargs;
2033            nb_cargs = def->nb_cargs;
2034
2035            if (def->flags & TCG_OPF_VECTOR) {
2036                col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
2037                                8 << TCGOP_VECE(op));
2038            }
2039
2040            k = 0;
2041            for (i = 0; i < nb_oargs; i++) {
2042                if (k != 0) {
2043                    col += qemu_log(",");
2044                }
2045                col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2046                                                      op->args[k++]));
2047            }
2048            for (i = 0; i < nb_iargs; i++) {
2049                if (k != 0) {
2050                    col += qemu_log(",");
2051                }
2052                col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2053                                                      op->args[k++]));
2054            }
2055            switch (c) {
2056            case INDEX_op_brcond_i32:
2057            case INDEX_op_setcond_i32:
2058            case INDEX_op_movcond_i32:
2059            case INDEX_op_brcond2_i32:
2060            case INDEX_op_setcond2_i32:
2061            case INDEX_op_brcond_i64:
2062            case INDEX_op_setcond_i64:
2063            case INDEX_op_movcond_i64:
2064            case INDEX_op_cmp_vec:
2065            case INDEX_op_cmpsel_vec:
2066                if (op->args[k] < ARRAY_SIZE(cond_name)
2067                    && cond_name[op->args[k]]) {
2068                    col += qemu_log(",%s", cond_name[op->args[k++]]);
2069                } else {
2070                    col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
2071                }
2072                i = 1;
2073                break;
2074            case INDEX_op_qemu_ld_i32:
2075            case INDEX_op_qemu_st_i32:
2076            case INDEX_op_qemu_ld_i64:
2077            case INDEX_op_qemu_st_i64:
2078                {
2079                    TCGMemOpIdx oi = op->args[k++];
2080                    MemOp op = get_memop(oi);
2081                    unsigned ix = get_mmuidx(oi);
2082
2083                    if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2084                        col += qemu_log(",$0x%x,%u", op, ix);
2085                    } else {
2086                        const char *s_al, *s_op;
2087                        s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2088                        s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2089                        col += qemu_log(",%s%s,%u", s_al, s_op, ix);
2090                    }
2091                    i = 1;
2092                }
2093                break;
2094            default:
2095                i = 0;
2096                break;
2097            }
2098            switch (c) {
2099            case INDEX_op_set_label:
2100            case INDEX_op_br:
2101            case INDEX_op_brcond_i32:
2102            case INDEX_op_brcond_i64:
2103            case INDEX_op_brcond2_i32:
2104                col += qemu_log("%s$L%d", k ? "," : "",
2105                                arg_label(op->args[k])->id);
2106                i++, k++;
2107                break;
2108            default:
2109                break;
2110            }
2111            for (; i < nb_cargs; i++, k++) {
2112                col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
2113            }
2114        }
2115
2116        if (have_prefs || op->life) {
2117            for (; col < 40; ++col) {
2118                putc(' ', qemu_logfile);
2119            }
2120        }
2121
2122        if (op->life) {
2123            unsigned life = op->life;
2124
2125            if (life & (SYNC_ARG * 3)) {
2126                qemu_log("  sync:");
2127                for (i = 0; i < 2; ++i) {
2128                    if (life & (SYNC_ARG << i)) {
2129                        qemu_log(" %d", i);
2130                    }
2131                }
2132            }
2133            life /= DEAD_ARG;
2134            if (life) {
2135                qemu_log("  dead:");
2136                for (i = 0; life; ++i, life >>= 1) {
2137                    if (life & 1) {
2138                        qemu_log(" %d", i);
2139                    }
2140                }
2141            }
2142        }
2143
2144        if (have_prefs) {
2145            for (i = 0; i < nb_oargs; ++i) {
2146                TCGRegSet set = op->output_pref[i];
2147
2148                if (i == 0) {
2149                    qemu_log("  pref=");
2150                } else {
2151                    qemu_log(",");
2152                }
2153                if (set == 0) {
2154                    qemu_log("none");
2155                } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2156                    qemu_log("all");
2157#ifdef CONFIG_DEBUG_TCG
2158                } else if (tcg_regset_single(set)) {
2159                    TCGReg reg = tcg_regset_first(set);
2160                    qemu_log("%s", tcg_target_reg_names[reg]);
2161#endif
2162                } else if (TCG_TARGET_NB_REGS <= 32) {
2163                    qemu_log("%#x", (uint32_t)set);
2164                } else {
2165                    qemu_log("%#" PRIx64, (uint64_t)set);
2166                }
2167            }
2168        }
2169
2170        qemu_log("\n");
2171    }
2172}
2173
2174/* we give more priority to constraints with less registers */
2175static int get_constraint_priority(const TCGOpDef *def, int k)
2176{
2177    const TCGArgConstraint *arg_ct;
2178
2179    int i, n;
2180    arg_ct = &def->args_ct[k];
2181    if (arg_ct->ct & TCG_CT_ALIAS) {
2182        /* an alias is equivalent to a single register */
2183        n = 1;
2184    } else {
2185        if (!(arg_ct->ct & TCG_CT_REG))
2186            return 0;
2187        n = 0;
2188        for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
2189            if (tcg_regset_test_reg(arg_ct->u.regs, i))
2190                n++;
2191        }
2192    }
2193    return TCG_TARGET_NB_REGS - n + 1;
2194}
2195
2196/* sort from highest priority to lowest */
2197static void sort_constraints(TCGOpDef *def, int start, int n)
2198{
2199    int i, j, p1, p2, tmp;
2200
2201    for(i = 0; i < n; i++)
2202        def->sorted_args[start + i] = start + i;
2203    if (n <= 1)
2204        return;
2205    for(i = 0; i < n - 1; i++) {
2206        for(j = i + 1; j < n; j++) {
2207            p1 = get_constraint_priority(def, def->sorted_args[start + i]);
2208            p2 = get_constraint_priority(def, def->sorted_args[start + j]);
2209            if (p1 < p2) {
2210                tmp = def->sorted_args[start + i];
2211                def->sorted_args[start + i] = def->sorted_args[start + j];
2212                def->sorted_args[start + j] = tmp;
2213            }
2214        }
2215    }
2216}
2217
2218static void process_op_defs(TCGContext *s)
2219{
2220    TCGOpcode op;
2221
2222    for (op = 0; op < NB_OPS; op++) {
2223        TCGOpDef *def = &tcg_op_defs[op];
2224        const TCGTargetOpDef *tdefs;
2225        TCGType type;
2226        int i, nb_args;
2227
2228        if (def->flags & TCG_OPF_NOT_PRESENT) {
2229            continue;
2230        }
2231
2232        nb_args = def->nb_iargs + def->nb_oargs;
2233        if (nb_args == 0) {
2234            continue;
2235        }
2236
2237        tdefs = tcg_target_op_def(op);
2238        /* Missing TCGTargetOpDef entry. */
2239        tcg_debug_assert(tdefs != NULL);
2240
2241        type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32);
2242        for (i = 0; i < nb_args; i++) {
2243            const char *ct_str = tdefs->args_ct_str[i];
2244            /* Incomplete TCGTargetOpDef entry. */
2245            tcg_debug_assert(ct_str != NULL);
2246
2247            def->args_ct[i].u.regs = 0;
2248            def->args_ct[i].ct = 0;
2249            while (*ct_str != '\0') {
2250                switch(*ct_str) {
2251                case '0' ... '9':
2252                    {
2253                        int oarg = *ct_str - '0';
2254                        tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2255                        tcg_debug_assert(oarg < def->nb_oargs);
2256                        tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG);
2257                        /* TCG_CT_ALIAS is for the output arguments.
2258                           The input is tagged with TCG_CT_IALIAS. */
2259                        def->args_ct[i] = def->args_ct[oarg];
2260                        def->args_ct[oarg].ct |= TCG_CT_ALIAS;
2261                        def->args_ct[oarg].alias_index = i;
2262                        def->args_ct[i].ct |= TCG_CT_IALIAS;
2263                        def->args_ct[i].alias_index = oarg;
2264                    }
2265                    ct_str++;
2266                    break;
2267                case '&':
2268                    def->args_ct[i].ct |= TCG_CT_NEWREG;
2269                    ct_str++;
2270                    break;
2271                case 'i':
2272                    def->args_ct[i].ct |= TCG_CT_CONST;
2273                    ct_str++;
2274                    break;
2275                default:
2276                    ct_str = target_parse_constraint(&def->args_ct[i],
2277                                                     ct_str, type);
2278                    /* Typo in TCGTargetOpDef constraint. */
2279                    tcg_debug_assert(ct_str != NULL);
2280                }
2281            }
2282        }
2283
2284        /* TCGTargetOpDef entry with too much information? */
2285        tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2286
2287        /* sort the constraints (XXX: this is just an heuristic) */
2288        sort_constraints(def, 0, def->nb_oargs);
2289        sort_constraints(def, def->nb_oargs, def->nb_iargs);
2290    }
2291}
2292
2293void tcg_op_remove(TCGContext *s, TCGOp *op)
2294{
2295    TCGLabel *label;
2296
2297    switch (op->opc) {
2298    case INDEX_op_br:
2299        label = arg_label(op->args[0]);
2300        label->refs--;
2301        break;
2302    case INDEX_op_brcond_i32:
2303    case INDEX_op_brcond_i64:
2304        label = arg_label(op->args[3]);
2305        label->refs--;
2306        break;
2307    case INDEX_op_brcond2_i32:
2308        label = arg_label(op->args[5]);
2309        label->refs--;
2310        break;
2311    default:
2312        break;
2313    }
2314
2315    QTAILQ_REMOVE(&s->ops, op, link);
2316    QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2317    s->nb_ops--;
2318
2319#ifdef CONFIG_PROFILER
2320    atomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2321#endif
2322}
2323
2324static TCGOp *tcg_op_alloc(TCGOpcode opc)
2325{
2326    TCGContext *s = tcg_ctx;
2327    TCGOp *op;
2328
2329    if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2330        op = tcg_malloc(sizeof(TCGOp));
2331    } else {
2332        op = QTAILQ_FIRST(&s->free_ops);
2333        QTAILQ_REMOVE(&s->free_ops, op, link);
2334    }
2335    memset(op, 0, offsetof(TCGOp, link));
2336    op->opc = opc;
2337    s->nb_ops++;
2338
2339    return op;
2340}
2341
2342TCGOp *tcg_emit_op(TCGOpcode opc)
2343{
2344    TCGOp *op = tcg_op_alloc(opc);
2345    QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2346    return op;
2347}
2348
2349TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2350{
2351    TCGOp *new_op = tcg_op_alloc(opc);
2352    QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2353    return new_op;
2354}
2355
2356TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2357{
2358    TCGOp *new_op = tcg_op_alloc(opc);
2359    QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2360    return new_op;
2361}
2362
2363/* Reachable analysis : remove unreachable code.  */
2364static void reachable_code_pass(TCGContext *s)
2365{
2366    TCGOp *op, *op_next;
2367    bool dead = false;
2368
2369    QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2370        bool remove = dead;
2371        TCGLabel *label;
2372        int call_flags;
2373
2374        switch (op->opc) {
2375        case INDEX_op_set_label:
2376            label = arg_label(op->args[0]);
2377            if (label->refs == 0) {
2378                /*
2379                 * While there is an occasional backward branch, virtually
2380                 * all branches generated by the translators are forward.
2381                 * Which means that generally we will have already removed
2382                 * all references to the label that will be, and there is
2383                 * little to be gained by iterating.
2384                 */
2385                remove = true;
2386            } else {
2387                /* Once we see a label, insns become live again.  */
2388                dead = false;
2389                remove = false;
2390
2391                /*
2392                 * Optimization can fold conditional branches to unconditional.
2393                 * If we find a label with one reference which is preceded by
2394                 * an unconditional branch to it, remove both.  This needed to
2395                 * wait until the dead code in between them was removed.
2396                 */
2397                if (label->refs == 1) {
2398                    TCGOp *op_prev = QTAILQ_PREV(op, link);
2399                    if (op_prev->opc == INDEX_op_br &&
2400                        label == arg_label(op_prev->args[0])) {
2401                        tcg_op_remove(s, op_prev);
2402                        remove = true;
2403                    }
2404                }
2405            }
2406            break;
2407
2408        case INDEX_op_br:
2409        case INDEX_op_exit_tb:
2410        case INDEX_op_goto_ptr:
2411            /* Unconditional branches; everything following is dead.  */
2412            dead = true;
2413            break;
2414
2415        case INDEX_op_call:
2416            /* Notice noreturn helper calls, raising exceptions.  */
2417            call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
2418            if (call_flags & TCG_CALL_NO_RETURN) {
2419                dead = true;
2420            }
2421            break;
2422
2423        case INDEX_op_insn_start:
2424            /* Never remove -- we need to keep these for unwind.  */
2425            remove = false;
2426            break;
2427
2428        default:
2429            break;
2430        }
2431
2432        if (remove) {
2433            tcg_op_remove(s, op);
2434        }
2435    }
2436}
2437
2438#define TS_DEAD  1
2439#define TS_MEM   2
2440
2441#define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2442#define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2443
2444/* For liveness_pass_1, the register preferences for a given temp.  */
2445static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2446{
2447    return ts->state_ptr;
2448}
2449
2450/* For liveness_pass_1, reset the preferences for a given temp to the
2451 * maximal regset for its type.
2452 */
2453static inline void la_reset_pref(TCGTemp *ts)
2454{
2455    *la_temp_pref(ts)
2456        = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2457}
2458
2459/* liveness analysis: end of function: all temps are dead, and globals
2460   should be in memory. */
2461static void la_func_end(TCGContext *s, int ng, int nt)
2462{
2463    int i;
2464
2465    for (i = 0; i < ng; ++i) {
2466        s->temps[i].state = TS_DEAD | TS_MEM;
2467        la_reset_pref(&s->temps[i]);
2468    }
2469    for (i = ng; i < nt; ++i) {
2470        s->temps[i].state = TS_DEAD;
2471        la_reset_pref(&s->temps[i]);
2472    }
2473}
2474
2475/* liveness analysis: end of basic block: all temps are dead, globals
2476   and local temps should be in memory. */
2477static void la_bb_end(TCGContext *s, int ng, int nt)
2478{
2479    int i;
2480
2481    for (i = 0; i < ng; ++i) {
2482        s->temps[i].state = TS_DEAD | TS_MEM;
2483        la_reset_pref(&s->temps[i]);
2484    }
2485    for (i = ng; i < nt; ++i) {
2486        s->temps[i].state = (s->temps[i].temp_local
2487                             ? TS_DEAD | TS_MEM
2488                             : TS_DEAD);
2489        la_reset_pref(&s->temps[i]);
2490    }
2491}
2492
2493/* liveness analysis: sync globals back to memory.  */
2494static void la_global_sync(TCGContext *s, int ng)
2495{
2496    int i;
2497
2498    for (i = 0; i < ng; ++i) {
2499        int state = s->temps[i].state;
2500        s->temps[i].state = state | TS_MEM;
2501        if (state == TS_DEAD) {
2502            /* If the global was previously dead, reset prefs.  */
2503            la_reset_pref(&s->temps[i]);
2504        }
2505    }
2506}
2507
2508/* liveness analysis: sync globals back to memory and kill.  */
2509static void la_global_kill(TCGContext *s, int ng)
2510{
2511    int i;
2512
2513    for (i = 0; i < ng; i++) {
2514        s->temps[i].state = TS_DEAD | TS_MEM;
2515        la_reset_pref(&s->temps[i]);
2516    }
2517}
2518
2519/* liveness analysis: note live globals crossing calls.  */
2520static void la_cross_call(TCGContext *s, int nt)
2521{
2522    TCGRegSet mask = ~tcg_target_call_clobber_regs;
2523    int i;
2524
2525    for (i = 0; i < nt; i++) {
2526        TCGTemp *ts = &s->temps[i];
2527        if (!(ts->state & TS_DEAD)) {
2528            TCGRegSet *pset = la_temp_pref(ts);
2529            TCGRegSet set = *pset;
2530
2531            set &= mask;
2532            /* If the combination is not possible, restart.  */
2533            if (set == 0) {
2534                set = tcg_target_available_regs[ts->type] & mask;
2535            }
2536            *pset = set;
2537        }
2538    }
2539}
2540
2541/* Liveness analysis : update the opc_arg_life array to tell if a
2542   given input arguments is dead. Instructions updating dead
2543   temporaries are removed. */
2544static void liveness_pass_1(TCGContext *s)
2545{
2546    int nb_globals = s->nb_globals;
2547    int nb_temps = s->nb_temps;
2548    TCGOp *op, *op_prev;
2549    TCGRegSet *prefs;
2550    int i;
2551
2552    prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2553    for (i = 0; i < nb_temps; ++i) {
2554        s->temps[i].state_ptr = prefs + i;
2555    }
2556
2557    /* ??? Should be redundant with the exit_tb that ends the TB.  */
2558    la_func_end(s, nb_globals, nb_temps);
2559
2560    QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2561        int nb_iargs, nb_oargs;
2562        TCGOpcode opc_new, opc_new2;
2563        bool have_opc_new2;
2564        TCGLifeData arg_life = 0;
2565        TCGTemp *ts;
2566        TCGOpcode opc = op->opc;
2567        const TCGOpDef *def = &tcg_op_defs[opc];
2568
2569        switch (opc) {
2570        case INDEX_op_call:
2571            {
2572                int call_flags;
2573                int nb_call_regs;
2574
2575                nb_oargs = TCGOP_CALLO(op);
2576                nb_iargs = TCGOP_CALLI(op);
2577                call_flags = op->args[nb_oargs + nb_iargs + 1];
2578
2579                /* pure functions can be removed if their result is unused */
2580                if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2581                    for (i = 0; i < nb_oargs; i++) {
2582                        ts = arg_temp(op->args[i]);
2583                        if (ts->state != TS_DEAD) {
2584                            goto do_not_remove_call;
2585                        }
2586                    }
2587                    goto do_remove;
2588                }
2589            do_not_remove_call:
2590
2591                /* Output args are dead.  */
2592                for (i = 0; i < nb_oargs; i++) {
2593                    ts = arg_temp(op->args[i]);
2594                    if (ts->state & TS_DEAD) {
2595                        arg_life |= DEAD_ARG << i;
2596                    }
2597                    if (ts->state & TS_MEM) {
2598                        arg_life |= SYNC_ARG << i;
2599                    }
2600                    ts->state = TS_DEAD;
2601                    la_reset_pref(ts);
2602
2603                    /* Not used -- it will be tcg_target_call_oarg_regs[i].  */
2604                    op->output_pref[i] = 0;
2605                }
2606
2607                if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2608                                    TCG_CALL_NO_READ_GLOBALS))) {
2609                    la_global_kill(s, nb_globals);
2610                } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2611                    la_global_sync(s, nb_globals);
2612                }
2613
2614                /* Record arguments that die in this helper.  */
2615                for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2616                    ts = arg_temp(op->args[i]);
2617                    if (ts && ts->state & TS_DEAD) {
2618                        arg_life |= DEAD_ARG << i;
2619                    }
2620                }
2621
2622                /* For all live registers, remove call-clobbered prefs.  */
2623                la_cross_call(s, nb_temps);
2624
2625                nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2626
2627                /* Input arguments are live for preceding opcodes.  */
2628                for (i = 0; i < nb_iargs; i++) {
2629                    ts = arg_temp(op->args[i + nb_oargs]);
2630                    if (ts && ts->state & TS_DEAD) {
2631                        /* For those arguments that die, and will be allocated
2632                         * in registers, clear the register set for that arg,
2633                         * to be filled in below.  For args that will be on
2634                         * the stack, reset to any available reg.
2635                         */
2636                        *la_temp_pref(ts)
2637                            = (i < nb_call_regs ? 0 :
2638                               tcg_target_available_regs[ts->type]);
2639                        ts->state &= ~TS_DEAD;
2640                    }
2641                }
2642
2643                /* For each input argument, add its input register to prefs.
2644                   If a temp is used once, this produces a single set bit.  */
2645                for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2646                    ts = arg_temp(op->args[i + nb_oargs]);
2647                    if (ts) {
2648                        tcg_regset_set_reg(*la_temp_pref(ts),
2649                                           tcg_target_call_iarg_regs[i]);
2650                    }
2651                }
2652            }
2653            break;
2654        case INDEX_op_insn_start:
2655            break;
2656        case INDEX_op_discard:
2657            /* mark the temporary as dead */
2658            ts = arg_temp(op->args[0]);
2659            ts->state = TS_DEAD;
2660            la_reset_pref(ts);
2661            break;
2662
2663        case INDEX_op_add2_i32:
2664            opc_new = INDEX_op_add_i32;
2665            goto do_addsub2;
2666        case INDEX_op_sub2_i32:
2667            opc_new = INDEX_op_sub_i32;
2668            goto do_addsub2;
2669        case INDEX_op_add2_i64:
2670            opc_new = INDEX_op_add_i64;
2671            goto do_addsub2;
2672        case INDEX_op_sub2_i64:
2673            opc_new = INDEX_op_sub_i64;
2674        do_addsub2:
2675            nb_iargs = 4;
2676            nb_oargs = 2;
2677            /* Test if the high part of the operation is dead, but not
2678               the low part.  The result can be optimized to a simple
2679               add or sub.  This happens often for x86_64 guest when the
2680               cpu mode is set to 32 bit.  */
2681            if (arg_temp(op->args[1])->state == TS_DEAD) {
2682                if (arg_temp(op->args[0])->state == TS_DEAD) {
2683                    goto do_remove;
2684                }
2685                /* Replace the opcode and adjust the args in place,
2686                   leaving 3 unused args at the end.  */
2687                op->opc = opc = opc_new;
2688                op->args[1] = op->args[2];
2689                op->args[2] = op->args[4];
2690                /* Fall through and mark the single-word operation live.  */
2691                nb_iargs = 2;
2692                nb_oargs = 1;
2693            }
2694            goto do_not_remove;
2695
2696        case INDEX_op_mulu2_i32:
2697            opc_new = INDEX_op_mul_i32;
2698            opc_new2 = INDEX_op_muluh_i32;
2699            have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2700            goto do_mul2;
2701        case INDEX_op_muls2_i32:
2702            opc_new = INDEX_op_mul_i32;
2703            opc_new2 = INDEX_op_mulsh_i32;
2704            have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2705            goto do_mul2;
2706        case INDEX_op_mulu2_i64:
2707            opc_new = INDEX_op_mul_i64;
2708            opc_new2 = INDEX_op_muluh_i64;
2709            have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2710            goto do_mul2;
2711        case INDEX_op_muls2_i64:
2712            opc_new = INDEX_op_mul_i64;
2713            opc_new2 = INDEX_op_mulsh_i64;
2714            have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2715            goto do_mul2;
2716        do_mul2:
2717            nb_iargs = 2;
2718            nb_oargs = 2;
2719            if (arg_temp(op->args[1])->state == TS_DEAD) {
2720                if (arg_temp(op->args[0])->state == TS_DEAD) {
2721                    /* Both parts of the operation are dead.  */
2722                    goto do_remove;
2723                }
2724                /* The high part of the operation is dead; generate the low. */
2725                op->opc = opc = opc_new;
2726                op->args[1] = op->args[2];
2727                op->args[2] = op->args[3];
2728            } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2729                /* The low part of the operation is dead; generate the high. */
2730                op->opc = opc = opc_new2;
2731                op->args[0] = op->args[1];
2732                op->args[1] = op->args[2];
2733                op->args[2] = op->args[3];
2734            } else {
2735                goto do_not_remove;
2736            }
2737            /* Mark the single-word operation live.  */
2738            nb_oargs = 1;
2739            goto do_not_remove;
2740
2741        default:
2742            /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2743            nb_iargs = def->nb_iargs;
2744            nb_oargs = def->nb_oargs;
2745
2746            /* Test if the operation can be removed because all
2747               its outputs are dead. We assume that nb_oargs == 0
2748               implies side effects */
2749            if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2750                for (i = 0; i < nb_oargs; i++) {
2751                    if (arg_temp(op->args[i])->state != TS_DEAD) {
2752                        goto do_not_remove;
2753                    }
2754                }
2755                goto do_remove;
2756            }
2757            goto do_not_remove;
2758
2759        do_remove:
2760            tcg_op_remove(s, op);
2761            break;
2762
2763        do_not_remove:
2764            for (i = 0; i < nb_oargs; i++) {
2765                ts = arg_temp(op->args[i]);
2766
2767                /* Remember the preference of the uses that followed.  */
2768                op->output_pref[i] = *la_temp_pref(ts);
2769
2770                /* Output args are dead.  */
2771                if (ts->state & TS_DEAD) {
2772                    arg_life |= DEAD_ARG << i;
2773                }
2774                if (ts->state & TS_MEM) {
2775                    arg_life |= SYNC_ARG << i;
2776                }
2777                ts->state = TS_DEAD;
2778                la_reset_pref(ts);
2779            }
2780
2781            /* If end of basic block, update.  */
2782            if (def->flags & TCG_OPF_BB_EXIT) {
2783                la_func_end(s, nb_globals, nb_temps);
2784            } else if (def->flags & TCG_OPF_BB_END) {
2785                la_bb_end(s, nb_globals, nb_temps);
2786            } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2787                la_global_sync(s, nb_globals);
2788                if (def->flags & TCG_OPF_CALL_CLOBBER) {
2789                    la_cross_call(s, nb_temps);
2790                }
2791            }
2792
2793            /* Record arguments that die in this opcode.  */
2794            for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2795                ts = arg_temp(op->args[i]);
2796                if (ts->state & TS_DEAD) {
2797                    arg_life |= DEAD_ARG << i;
2798                }
2799            }
2800
2801            /* Input arguments are live for preceding opcodes.  */
2802            for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2803                ts = arg_temp(op->args[i]);
2804                if (ts->state & TS_DEAD) {
2805                    /* For operands that were dead, initially allow
2806                       all regs for the type.  */
2807                    *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
2808                    ts->state &= ~TS_DEAD;
2809                }
2810            }
2811
2812            /* Incorporate constraints for this operand.  */
2813            switch (opc) {
2814            case INDEX_op_mov_i32:
2815            case INDEX_op_mov_i64:
2816                /* Note that these are TCG_OPF_NOT_PRESENT and do not
2817                   have proper constraints.  That said, special case
2818                   moves to propagate preferences backward.  */
2819                if (IS_DEAD_ARG(1)) {
2820                    *la_temp_pref(arg_temp(op->args[0]))
2821                        = *la_temp_pref(arg_temp(op->args[1]));
2822                }
2823                break;
2824
2825            default:
2826                for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2827                    const TCGArgConstraint *ct = &def->args_ct[i];
2828                    TCGRegSet set, *pset;
2829
2830                    ts = arg_temp(op->args[i]);
2831                    pset = la_temp_pref(ts);
2832                    set = *pset;
2833
2834                    set &= ct->u.regs;
2835                    if (ct->ct & TCG_CT_IALIAS) {
2836                        set &= op->output_pref[ct->alias_index];
2837                    }
2838                    /* If the combination is not possible, restart.  */
2839                    if (set == 0) {
2840                        set = ct->u.regs;
2841                    }
2842                    *pset = set;
2843                }
2844                break;
2845            }
2846            break;
2847        }
2848        op->life = arg_life;
2849    }
2850}
2851
2852/* Liveness analysis: Convert indirect regs to direct temporaries.  */
2853static bool liveness_pass_2(TCGContext *s)
2854{
2855    int nb_globals = s->nb_globals;
2856    int nb_temps, i;
2857    bool changes = false;
2858    TCGOp *op, *op_next;
2859
2860    /* Create a temporary for each indirect global.  */
2861    for (i = 0; i < nb_globals; ++i) {
2862        TCGTemp *its = &s->temps[i];
2863        if (its->indirect_reg) {
2864            TCGTemp *dts = tcg_temp_alloc(s);
2865            dts->type = its->type;
2866            dts->base_type = its->base_type;
2867            its->state_ptr = dts;
2868        } else {
2869            its->state_ptr = NULL;
2870        }
2871        /* All globals begin dead.  */
2872        its->state = TS_DEAD;
2873    }
2874    for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2875        TCGTemp *its = &s->temps[i];
2876        its->state_ptr = NULL;
2877        its->state = TS_DEAD;
2878    }
2879
2880    QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2881        TCGOpcode opc = op->opc;
2882        const TCGOpDef *def = &tcg_op_defs[opc];
2883        TCGLifeData arg_life = op->life;
2884        int nb_iargs, nb_oargs, call_flags;
2885        TCGTemp *arg_ts, *dir_ts;
2886
2887        if (opc == INDEX_op_call) {
2888            nb_oargs = TCGOP_CALLO(op);
2889            nb_iargs = TCGOP_CALLI(op);
2890            call_flags = op->args[nb_oargs + nb_iargs + 1];
2891        } else {
2892            nb_iargs = def->nb_iargs;
2893            nb_oargs = def->nb_oargs;
2894
2895            /* Set flags similar to how calls require.  */
2896            if (def->flags & TCG_OPF_BB_END) {
2897                /* Like writing globals: save_globals */
2898                call_flags = 0;
2899            } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2900                /* Like reading globals: sync_globals */
2901                call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2902            } else {
2903                /* No effect on globals.  */
2904                call_flags = (TCG_CALL_NO_READ_GLOBALS |
2905                              TCG_CALL_NO_WRITE_GLOBALS);
2906            }
2907        }
2908
2909        /* Make sure that input arguments are available.  */
2910        for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2911            arg_ts = arg_temp(op->args[i]);
2912            if (arg_ts) {
2913                dir_ts = arg_ts->state_ptr;
2914                if (dir_ts && arg_ts->state == TS_DEAD) {
2915                    TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2916                                      ? INDEX_op_ld_i32
2917                                      : INDEX_op_ld_i64);
2918                    TCGOp *lop = tcg_op_insert_before(s, op, lopc);
2919
2920                    lop->args[0] = temp_arg(dir_ts);
2921                    lop->args[1] = temp_arg(arg_ts->mem_base);
2922                    lop->args[2] = arg_ts->mem_offset;
2923
2924                    /* Loaded, but synced with memory.  */
2925                    arg_ts->state = TS_MEM;
2926                }
2927            }
2928        }
2929
2930        /* Perform input replacement, and mark inputs that became dead.
2931           No action is required except keeping temp_state up to date
2932           so that we reload when needed.  */
2933        for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2934            arg_ts = arg_temp(op->args[i]);
2935            if (arg_ts) {
2936                dir_ts = arg_ts->state_ptr;
2937                if (dir_ts) {
2938                    op->args[i] = temp_arg(dir_ts);
2939                    changes = true;
2940                    if (IS_DEAD_ARG(i)) {
2941                        arg_ts->state = TS_DEAD;
2942                    }
2943                }
2944            }
2945        }
2946
2947        /* Liveness analysis should ensure that the following are
2948           all correct, for call sites and basic block end points.  */
2949        if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2950            /* Nothing to do */
2951        } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2952            for (i = 0; i < nb_globals; ++i) {
2953                /* Liveness should see that globals are synced back,
2954                   that is, either TS_DEAD or TS_MEM.  */
2955                arg_ts = &s->temps[i];
2956                tcg_debug_assert(arg_ts->state_ptr == 0
2957                                 || arg_ts->state != 0);
2958            }
2959        } else {
2960            for (i = 0; i < nb_globals; ++i) {
2961                /* Liveness should see that globals are saved back,
2962                   that is, TS_DEAD, waiting to be reloaded.  */
2963                arg_ts = &s->temps[i];
2964                tcg_debug_assert(arg_ts->state_ptr == 0
2965                                 || arg_ts->state == TS_DEAD);
2966            }
2967        }
2968
2969        /* Outputs become available.  */
2970        for (i = 0; i < nb_oargs; i++) {
2971            arg_ts = arg_temp(op->args[i]);
2972            dir_ts = arg_ts->state_ptr;
2973            if (!dir_ts) {
2974                continue;
2975            }
2976            op->args[i] = temp_arg(dir_ts);
2977            changes = true;
2978
2979            /* The output is now live and modified.  */
2980            arg_ts->state = 0;
2981
2982            /* Sync outputs upon their last write.  */
2983            if (NEED_SYNC_ARG(i)) {
2984                TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2985                                  ? INDEX_op_st_i32
2986                                  : INDEX_op_st_i64);
2987                TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2988
2989                sop->args[0] = temp_arg(dir_ts);
2990                sop->args[1] = temp_arg(arg_ts->mem_base);
2991                sop->args[2] = arg_ts->mem_offset;
2992
2993                arg_ts->state = TS_MEM;
2994            }
2995            /* Drop outputs that are dead.  */
2996            if (IS_DEAD_ARG(i)) {
2997                arg_ts->state = TS_DEAD;
2998            }
2999        }
3000    }
3001
3002    return changes;
3003}
3004
3005#ifdef CONFIG_DEBUG_TCG
3006static void dump_regs(TCGContext *s)
3007{
3008    TCGTemp *ts;
3009    int i;
3010    char buf[64];
3011
3012    for(i = 0; i < s->nb_temps; i++) {
3013        ts = &s->temps[i];
3014        printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3015        switch(ts->val_type) {
3016        case TEMP_VAL_REG:
3017            printf("%s", tcg_target_reg_names[ts->reg]);
3018            break;
3019        case TEMP_VAL_MEM:
3020            printf("%d(%s)", (int)ts->mem_offset,
3021                   tcg_target_reg_names[ts->mem_base->reg]);
3022            break;
3023        case TEMP_VAL_CONST:
3024            printf("$0x%" TCG_PRIlx, ts->val);
3025            break;
3026        case TEMP_VAL_DEAD:
3027            printf("D");
3028            break;
3029        default:
3030            printf("???");
3031            break;
3032        }
3033        printf("\n");
3034    }
3035
3036    for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
3037        if (s->reg_to_temp[i] != NULL) {
3038            printf("%s: %s\n", 
3039                   tcg_target_reg_names[i], 
3040                   tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
3041        }
3042    }
3043}
3044
3045static void check_regs(TCGContext *s)
3046{
3047    int reg;
3048    int k;
3049    TCGTemp *ts;
3050    char buf[64];
3051
3052    for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
3053        ts = s->reg_to_temp[reg];
3054        if (ts != NULL) {
3055            if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
3056                printf("Inconsistency for register %s:\n", 
3057                       tcg_target_reg_names[reg]);
3058                goto fail;
3059            }
3060        }
3061    }
3062    for (k = 0; k < s->nb_temps; k++) {
3063        ts = &s->temps[k];
3064        if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg
3065            && s->reg_to_temp[ts->reg] != ts) {
3066            printf("Inconsistency for temp %s:\n",
3067                   tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3068        fail:
3069            printf("reg state:\n");
3070            dump_regs(s);
3071            tcg_abort();
3072        }
3073    }
3074}
3075#endif
3076
3077static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3078{
3079#if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
3080    /* Sparc64 stack is accessed with offset of 2047 */
3081    s->current_frame_offset = (s->current_frame_offset +
3082                               (tcg_target_long)sizeof(tcg_target_long) - 1) &
3083        ~(sizeof(tcg_target_long) - 1);
3084#endif
3085    if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
3086        s->frame_end) {
3087        tcg_abort();
3088    }
3089    ts->mem_offset = s->current_frame_offset;
3090    ts->mem_base = s->frame_temp;
3091    ts->mem_allocated = 1;
3092    s->current_frame_offset += sizeof(tcg_target_long);
3093}
3094
3095static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3096
3097/* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3098   mark it free; otherwise mark it dead.  */
3099static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3100{
3101    if (ts->fixed_reg) {
3102        return;
3103    }
3104    if (ts->val_type == TEMP_VAL_REG) {
3105        s->reg_to_temp[ts->reg] = NULL;
3106    }
3107    ts->val_type = (free_or_dead < 0
3108                    || ts->temp_local
3109                    || ts->temp_global
3110                    ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
3111}
3112
3113/* Mark a temporary as dead.  */
3114static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3115{
3116    temp_free_or_dead(s, ts, 1);
3117}
3118
3119/* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3120   registers needs to be allocated to store a constant.  If 'free_or_dead'
3121   is non-zero, subsequently release the temporary; if it is positive, the
3122   temp is dead; if it is negative, the temp is free.  */
3123static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3124                      TCGRegSet preferred_regs, int free_or_dead)
3125{
3126    if (ts->fixed_reg) {
3127        return;
3128    }
3129    if (!ts->mem_coherent) {
3130        if (!ts->mem_allocated) {
3131            temp_allocate_frame(s, ts);
3132        }
3133        switch (ts->val_type) {
3134        case TEMP_VAL_CONST:
3135            /* If we're going to free the temp immediately, then we won't
3136               require it later in a register, so attempt to store the
3137               constant to memory directly.  */
3138            if (free_or_dead
3139                && tcg_out_sti(s, ts->type, ts->val,
3140                               ts->mem_base->reg, ts->mem_offset)) {
3141                break;
3142            }
3143            temp_load(s, ts, tcg_target_available_regs[ts->type],
3144                      allocated_regs, preferred_regs);
3145            /* fallthrough */
3146
3147        case TEMP_VAL_REG:
3148            tcg_out_st(s, ts->type, ts->reg,
3149                       ts->mem_base->reg, ts->mem_offset);
3150            break;
3151
3152        case TEMP_VAL_MEM:
3153            break;
3154
3155        case TEMP_VAL_DEAD:
3156        default:
3157            tcg_abort();
3158        }
3159        ts->mem_coherent = 1;
3160    }
3161    if (free_or_dead) {
3162        temp_free_or_dead(s, ts, free_or_dead);
3163    }
3164}
3165
3166/* free register 'reg' by spilling the corresponding temporary if necessary */
3167static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3168{
3169    TCGTemp *ts = s->reg_to_temp[reg];
3170    if (ts != NULL) {
3171        temp_sync(s, ts, allocated_regs, 0, -1);
3172    }
3173}
3174
3175/**
3176 * tcg_reg_alloc:
3177 * @required_regs: Set of registers in which we must allocate.
3178 * @allocated_regs: Set of registers which must be avoided.
3179 * @preferred_regs: Set of registers we should prefer.
3180 * @rev: True if we search the registers in "indirect" order.
3181 *
3182 * The allocated register must be in @required_regs & ~@allocated_regs,
3183 * but if we can put it in @preferred_regs we may save a move later.
3184 */
3185static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3186                            TCGRegSet allocated_regs,
3187                            TCGRegSet preferred_regs, bool rev)
3188{
3189    int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3190    TCGRegSet reg_ct[2];
3191    const int *order;
3192
3193    reg_ct[1] = required_regs & ~allocated_regs;
3194    tcg_debug_assert(reg_ct[1] != 0);
3195    reg_ct[0] = reg_ct[1] & preferred_regs;
3196
3197    /* Skip the preferred_regs option if it cannot be satisfied,
3198       or if the preference made no difference.  */
3199    f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3200
3201    order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3202
3203    /* Try free registers, preferences first.  */
3204    for (j = f; j < 2; j++) {
3205        TCGRegSet set = reg_ct[j];
3206
3207        if (tcg_regset_single(set)) {
3208            /* One register in the set.  */
3209            TCGReg reg = tcg_regset_first(set);
3210            if (s->reg_to_temp[reg] == NULL) {
3211                return reg;
3212            }
3213        } else {
3214            for (i = 0; i < n; i++) {
3215                TCGReg reg = order[i];
3216                if (s->reg_to_temp[reg] == NULL &&
3217                    tcg_regset_test_reg(set, reg)) {
3218                    return reg;
3219                }
3220            }
3221        }
3222    }
3223
3224    /* We must spill something.  */
3225    for (j = f; j < 2; j++) {
3226        TCGRegSet set = reg_ct[j];
3227
3228        if (tcg_regset_single(set)) {
3229            /* One register in the set.  */
3230            TCGReg reg = tcg_regset_first(set);
3231            tcg_reg_free(s, reg, allocated_regs);
3232            return reg;
3233        } else {
3234            for (i = 0; i < n; i++) {
3235                TCGReg reg = order[i];
3236                if (tcg_regset_test_reg(set, reg)) {
3237                    tcg_reg_free(s, reg, allocated_regs);
3238                    return reg;
3239                }
3240            }
3241        }
3242    }
3243
3244    tcg_abort();
3245}
3246
3247/* Make sure the temporary is in a register.  If needed, allocate the register
3248   from DESIRED while avoiding ALLOCATED.  */
3249static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3250                      TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3251{
3252    TCGReg reg;
3253
3254    switch (ts->val_type) {
3255    case TEMP_VAL_REG:
3256        return;
3257    case TEMP_VAL_CONST:
3258        reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3259                            preferred_regs, ts->indirect_base);
3260        tcg_out_movi(s, ts->type, reg, ts->val);
3261        ts->mem_coherent = 0;
3262        break;
3263    case TEMP_VAL_MEM:
3264        reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3265                            preferred_regs, ts->indirect_base);
3266        tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3267        ts->mem_coherent = 1;
3268        break;
3269    case TEMP_VAL_DEAD:
3270    default:
3271        tcg_abort();
3272    }
3273    ts->reg = reg;
3274    ts->val_type = TEMP_VAL_REG;
3275    s->reg_to_temp[reg] = ts;
3276}
3277
3278/* Save a temporary to memory. 'allocated_regs' is used in case a
3279   temporary registers needs to be allocated to store a constant.  */
3280static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3281{
3282    /* The liveness analysis already ensures that globals are back
3283       in memory. Keep an tcg_debug_assert for safety. */
3284    tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
3285}
3286
3287/* save globals to their canonical location and assume they can be
3288   modified be the following code. 'allocated_regs' is used in case a
3289   temporary registers needs to be allocated to store a constant. */
3290static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3291{
3292    int i, n;
3293
3294    for (i = 0, n = s->nb_globals; i < n; i++) {
3295        temp_save(s, &s->temps[i], allocated_regs);
3296    }
3297}
3298
3299/* sync globals to their canonical location and assume they can be
3300   read by the following code. 'allocated_regs' is used in case a
3301   temporary registers needs to be allocated to store a constant. */
3302static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3303{
3304    int i, n;
3305
3306    for (i = 0, n = s->nb_globals; i < n; i++) {
3307        TCGTemp *ts = &s->temps[i];
3308        tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3309                         || ts->fixed_reg
3310                         || ts->mem_coherent);
3311    }
3312}
3313
3314/* at the end of a basic block, we assume all temporaries are dead and
3315   all globals are stored at their canonical location. */
3316static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3317{
3318    int i;
3319
3320    for (i = s->nb_globals; i < s->nb_temps; i++) {
3321        TCGTemp *ts = &s->temps[i];
3322        if (ts->temp_local) {
3323            temp_save(s, ts, allocated_regs);
3324        } else {
3325            /* The liveness analysis already ensures that temps are dead.
3326               Keep an tcg_debug_assert for safety. */
3327            tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3328        }
3329    }
3330
3331    save_globals(s, allocated_regs);
3332}
3333
3334/*
3335 * Specialized code generation for INDEX_op_movi_*.
3336 */
3337static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3338                                  tcg_target_ulong val, TCGLifeData arg_life,
3339                                  TCGRegSet preferred_regs)
3340{
3341    /* ENV should not be modified.  */
3342    tcg_debug_assert(!ots->fixed_reg);
3343
3344    /* The movi is not explicitly generated here.  */
3345    if (ots->val_type == TEMP_VAL_REG) {
3346        s->reg_to_temp[ots->reg] = NULL;
3347    }
3348    ots->val_type = TEMP_VAL_CONST;
3349    ots->val = val;
3350    ots->mem_coherent = 0;
3351    if (NEED_SYNC_ARG(0)) {
3352        temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3353    } else if (IS_DEAD_ARG(0)) {
3354        temp_dead(s, ots);
3355    }
3356}
3357
3358static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op)
3359{
3360    TCGTemp *ots = arg_temp(op->args[0]);
3361    tcg_target_ulong val = op->args[1];
3362
3363    tcg_reg_alloc_do_movi(s, ots, val, op->life, op->output_pref[0]);
3364}
3365
3366/*
3367 * Specialized code generation for INDEX_op_mov_*.
3368 */
3369static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3370{
3371    const TCGLifeData arg_life = op->life;
3372    TCGRegSet allocated_regs, preferred_regs;
3373    TCGTemp *ts, *ots;
3374    TCGType otype, itype;
3375
3376    allocated_regs = s->reserved_regs;
3377    preferred_regs = op->output_pref[0];
3378    ots = arg_temp(op->args[0]);
3379    ts = arg_temp(op->args[1]);
3380
3381    /* ENV should not be modified.  */
3382    tcg_debug_assert(!ots->fixed_reg);
3383
3384    /* Note that otype != itype for no-op truncation.  */
3385    otype = ots->type;
3386    itype = ts->type;
3387
3388    if (ts->val_type == TEMP_VAL_CONST) {
3389        /* propagate constant or generate sti */
3390        tcg_target_ulong val = ts->val;
3391        if (IS_DEAD_ARG(1)) {
3392            temp_dead(s, ts);
3393        }
3394        tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3395        return;
3396    }
3397
3398    /* If the source value is in memory we're going to be forced
3399       to have it in a register in order to perform the copy.  Copy
3400       the SOURCE value into its own register first, that way we
3401       don't have to reload SOURCE the next time it is used. */
3402    if (ts->val_type == TEMP_VAL_MEM) {
3403        temp_load(s, ts, tcg_target_available_regs[itype],
3404                  allocated_regs, preferred_regs);
3405    }
3406
3407    tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3408    if (IS_DEAD_ARG(0)) {
3409        /* mov to a non-saved dead register makes no sense (even with
3410           liveness analysis disabled). */
3411        tcg_debug_assert(NEED_SYNC_ARG(0));
3412        if (!ots->mem_allocated) {
3413            temp_allocate_frame(s, ots);
3414        }
3415        tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3416        if (IS_DEAD_ARG(1)) {
3417            temp_dead(s, ts);
3418        }
3419        temp_dead(s, ots);
3420    } else {
3421        if (IS_DEAD_ARG(1) && !ts->fixed_reg) {
3422            /* the mov can be suppressed */
3423            if (ots->val_type == TEMP_VAL_REG) {
3424                s->reg_to_temp[ots->reg] = NULL;
3425            }
3426            ots->reg = ts->reg;
3427            temp_dead(s, ts);
3428        } else {
3429            if (ots->val_type != TEMP_VAL_REG) {
3430                /* When allocating a new register, make sure to not spill the
3431                   input one. */
3432                tcg_regset_set_reg(allocated_regs, ts->reg);
3433                ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3434                                         allocated_regs, preferred_regs,
3435                                         ots->indirect_base);
3436            }
3437            if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3438                /*
3439                 * Cross register class move not supported.
3440                 * Store the source register into the destination slot
3441                 * and leave the destination temp as TEMP_VAL_MEM.
3442                 */
3443                assert(!ots->fixed_reg);
3444                if (!ts->mem_allocated) {
3445                    temp_allocate_frame(s, ots);
3446                }
3447                tcg_out_st(s, ts->type, ts->reg,
3448                           ots->mem_base->reg, ots->mem_offset);
3449                ots->mem_coherent = 1;
3450                temp_free_or_dead(s, ots, -1);
3451                return;
3452            }
3453        }
3454        ots->val_type = TEMP_VAL_REG;
3455        ots->mem_coherent = 0;
3456        s->reg_to_temp[ots->reg] = ots;
3457        if (NEED_SYNC_ARG(0)) {
3458            temp_sync(s, ots, allocated_regs, 0, 0);
3459        }
3460    }
3461}
3462
3463/*
3464 * Specialized code generation for INDEX_op_dup_vec.
3465 */
3466static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3467{
3468    const TCGLifeData arg_life = op->life;
3469    TCGRegSet dup_out_regs, dup_in_regs;
3470    TCGTemp *its, *ots;
3471    TCGType itype, vtype;
3472    intptr_t endian_fixup;
3473    unsigned vece;
3474    bool ok;
3475
3476    ots = arg_temp(op->args[0]);
3477    its = arg_temp(op->args[1]);
3478
3479    /* ENV should not be modified.  */
3480    tcg_debug_assert(!ots->fixed_reg);
3481
3482    itype = its->type;
3483    vece = TCGOP_VECE(op);
3484    vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3485
3486    if (its->val_type == TEMP_VAL_CONST) {
3487        /* Propagate constant via movi -> dupi.  */
3488        tcg_target_ulong val = its->val;
3489        if (IS_DEAD_ARG(1)) {
3490            temp_dead(s, its);
3491        }
3492        tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3493        return;
3494    }
3495
3496    dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].u.regs;
3497    dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].u.regs;
3498
3499    /* Allocate the output register now.  */
3500    if (ots->val_type != TEMP_VAL_REG) {
3501        TCGRegSet allocated_regs = s->reserved_regs;
3502
3503        if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3504            /* Make sure to not spill the input register. */
3505            tcg_regset_set_reg(allocated_regs, its->reg);
3506        }
3507        ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3508                                 op->output_pref[0], ots->indirect_base);
3509        ots->val_type = TEMP_VAL_REG;
3510        ots->mem_coherent = 0;
3511        s->reg_to_temp[ots->reg] = ots;
3512    }
3513
3514    switch (its->val_type) {
3515    case TEMP_VAL_REG:
3516        /*
3517         * The dup constriaints must be broad, covering all possible VECE.
3518         * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3519         * to fail, indicating that extra moves are required for that case.
3520         */
3521        if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3522            if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3523                goto done;
3524            }
3525            /* Try again from memory or a vector input register.  */
3526        }
3527        if (!its->mem_coherent) {
3528            /*
3529             * The input register is not synced, and so an extra store
3530             * would be required to use memory.  Attempt an integer-vector
3531             * register move first.  We do not have a TCGRegSet for this.
3532             */
3533            if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3534                break;
3535            }
3536            /* Sync the temp back to its slot and load from there.  */
3537            temp_sync(s, its, s->reserved_regs, 0, 0);
3538        }
3539        /* fall through */
3540
3541    case TEMP_VAL_MEM:
3542#ifdef HOST_WORDS_BIGENDIAN
3543        endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
3544        endian_fixup -= 1 << vece;
3545#else
3546        endian_fixup = 0;
3547#endif
3548        if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
3549                             its->mem_offset + endian_fixup)) {
3550            goto done;
3551        }
3552        tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
3553        break;
3554
3555    default:
3556        g_assert_not_reached();
3557    }
3558
3559    /* We now have a vector input register, so dup must succeed. */
3560    ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
3561    tcg_debug_assert(ok);
3562
3563 done:
3564    if (IS_DEAD_ARG(1)) {
3565        temp_dead(s, its);
3566    }
3567    if (NEED_SYNC_ARG(0)) {
3568        temp_sync(s, ots, s->reserved_regs, 0, 0);
3569    }
3570    if (IS_DEAD_ARG(0)) {
3571        temp_dead(s, ots);
3572    }
3573}
3574
3575static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3576{
3577    const TCGLifeData arg_life = op->life;
3578    const TCGOpDef * const def = &tcg_op_defs[op->opc];
3579    TCGRegSet i_allocated_regs;
3580    TCGRegSet o_allocated_regs;
3581    int i, k, nb_iargs, nb_oargs;
3582    TCGReg reg;
3583    TCGArg arg;
3584    const TCGArgConstraint *arg_ct;
3585    TCGTemp *ts;
3586    TCGArg new_args[TCG_MAX_OP_ARGS];
3587    int const_args[TCG_MAX_OP_ARGS];
3588
3589    nb_oargs = def->nb_oargs;
3590    nb_iargs = def->nb_iargs;
3591
3592    /* copy constants */
3593    memcpy(new_args + nb_oargs + nb_iargs, 
3594           op->args + nb_oargs + nb_iargs,
3595           sizeof(TCGArg) * def->nb_cargs);
3596
3597    i_allocated_regs = s->reserved_regs;
3598    o_allocated_regs = s->reserved_regs;
3599
3600    /* satisfy input constraints */ 
3601    for (k = 0; k < nb_iargs; k++) {
3602        TCGRegSet i_preferred_regs, o_preferred_regs;
3603
3604        i = def->sorted_args[nb_oargs + k];
3605        arg = op->args[i];
3606        arg_ct = &def->args_ct[i];
3607        ts = arg_temp(arg);
3608
3609        if (ts->val_type == TEMP_VAL_CONST
3610            && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
3611            /* constant is OK for instruction */
3612            const_args[i] = 1;
3613            new_args[i] = ts->val;
3614            continue;
3615        }
3616
3617        i_preferred_regs = o_preferred_regs = 0;
3618        if (arg_ct->ct & TCG_CT_IALIAS) {
3619            o_preferred_regs = op->output_pref[arg_ct->alias_index];
3620            if (ts->fixed_reg) {
3621                /* if fixed register, we must allocate a new register
3622                   if the alias is not the same register */
3623                if (arg != op->args[arg_ct->alias_index]) {
3624                    goto allocate_in_reg;
3625                }
3626            } else {
3627                /* if the input is aliased to an output and if it is
3628                   not dead after the instruction, we must allocate
3629                   a new register and move it */
3630                if (!IS_DEAD_ARG(i)) {
3631                    goto allocate_in_reg;
3632                }
3633
3634                /* check if the current register has already been allocated
3635                   for another input aliased to an output */
3636                if (ts->val_type == TEMP_VAL_REG) {
3637                    int k2, i2;
3638                    reg = ts->reg;
3639                    for (k2 = 0 ; k2 < k ; k2++) {
3640                        i2 = def->sorted_args[nb_oargs + k2];
3641                        if ((def->args_ct[i2].ct & TCG_CT_IALIAS) &&
3642                            reg == new_args[i2]) {
3643                            goto allocate_in_reg;
3644                        }
3645                    }
3646                }
3647                i_preferred_regs = o_preferred_regs;
3648            }
3649        }
3650
3651        temp_load(s, ts, arg_ct->u.regs, i_allocated_regs, i_preferred_regs);
3652        reg = ts->reg;
3653
3654        if (tcg_regset_test_reg(arg_ct->u.regs, reg)) {
3655            /* nothing to do : the constraint is satisfied */
3656        } else {
3657        allocate_in_reg:
3658            /* allocate a new register matching the constraint 
3659               and move the temporary register into it */
3660            temp_load(s, ts, tcg_target_available_regs[ts->type],
3661                      i_allocated_regs, 0);
3662            reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs,
3663                                o_preferred_regs, ts->indirect_base);
3664            if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3665                /*
3666                 * Cross register class move not supported.  Sync the
3667                 * temp back to its slot and load from there.
3668                 */
3669                temp_sync(s, ts, i_allocated_regs, 0, 0);
3670                tcg_out_ld(s, ts->type, reg,
3671                           ts->mem_base->reg, ts->mem_offset);
3672            }
3673        }
3674        new_args[i] = reg;
3675        const_args[i] = 0;
3676        tcg_regset_set_reg(i_allocated_regs, reg);
3677    }
3678    
3679    /* mark dead temporaries and free the associated registers */
3680    for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3681        if (IS_DEAD_ARG(i)) {
3682            temp_dead(s, arg_temp(op->args[i]));
3683        }
3684    }
3685
3686    if (def->flags & TCG_OPF_BB_END) {
3687        tcg_reg_alloc_bb_end(s, i_allocated_regs);
3688    } else {
3689        if (def->flags & TCG_OPF_CALL_CLOBBER) {
3690            /* XXX: permit generic clobber register list ? */ 
3691            for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3692                if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3693                    tcg_reg_free(s, i, i_allocated_regs);
3694                }
3695            }
3696        }
3697        if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3698            /* sync globals if the op has side effects and might trigger
3699               an exception. */
3700            sync_globals(s, i_allocated_regs);
3701        }
3702        
3703        /* satisfy the output constraints */
3704        for(k = 0; k < nb_oargs; k++) {
3705            i = def->sorted_args[k];
3706            arg = op->args[i];
3707            arg_ct = &def->args_ct[i];
3708            ts = arg_temp(arg);
3709
3710            /* ENV should not be modified.  */
3711            tcg_debug_assert(!ts->fixed_reg);
3712
3713            if ((arg_ct->ct & TCG_CT_ALIAS)
3714                && !const_args[arg_ct->alias_index]) {
3715                reg = new_args[arg_ct->alias_index];
3716            } else if (arg_ct->ct & TCG_CT_NEWREG) {
3717                reg = tcg_reg_alloc(s, arg_ct->u.regs,
3718                                    i_allocated_regs | o_allocated_regs,
3719                                    op->output_pref[k], ts->indirect_base);
3720            } else {
3721                reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs,
3722                                    op->output_pref[k], ts->indirect_base);
3723            }
3724            tcg_regset_set_reg(o_allocated_regs, reg);
3725            if (ts->val_type == TEMP_VAL_REG) {
3726                s->reg_to_temp[ts->reg] = NULL;
3727            }
3728            ts->val_type = TEMP_VAL_REG;
3729            ts->reg = reg;
3730            /*
3731             * Temp value is modified, so the value kept in memory is
3732             * potentially not the same.
3733             */
3734            ts->mem_coherent = 0;
3735            s->reg_to_temp[reg] = ts;
3736            new_args[i] = reg;
3737        }
3738    }
3739
3740    /* emit instruction */
3741    if (def->flags & TCG_OPF_VECTOR) {
3742        tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
3743                       new_args, const_args);
3744    } else {
3745        tcg_out_op(s, op->opc, new_args, const_args);
3746    }
3747
3748    /* move the outputs in the correct register if needed */
3749    for(i = 0; i < nb_oargs; i++) {
3750        ts = arg_temp(op->args[i]);
3751
3752        /* ENV should not be modified.  */
3753        tcg_debug_assert(!ts->fixed_reg);
3754
3755        if (NEED_SYNC_ARG(i)) {
3756            temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
3757        } else if (IS_DEAD_ARG(i)) {
3758            temp_dead(s, ts);
3759        }
3760    }
3761}
3762
3763#ifdef TCG_TARGET_STACK_GROWSUP
3764#define STACK_DIR(x) (-(x))
3765#else
3766#define STACK_DIR(x) (x)
3767#endif
3768
3769static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
3770{
3771    const int nb_oargs = TCGOP_CALLO(op);
3772    const int nb_iargs = TCGOP_CALLI(op);
3773    const TCGLifeData arg_life = op->life;
3774    int flags, nb_regs, i;
3775    TCGReg reg;
3776    TCGArg arg;
3777    TCGTemp *ts;
3778    intptr_t stack_offset;
3779    size_t call_stack_size;
3780    tcg_insn_unit *func_addr;
3781    int allocate_args;
3782    TCGRegSet allocated_regs;
3783
3784    func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
3785    flags = op->args[nb_oargs + nb_iargs + 1];
3786
3787    nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
3788    if (nb_regs > nb_iargs) {
3789        nb_regs = nb_iargs;
3790    }
3791
3792    /* assign stack slots first */
3793    call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
3794    call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) & 
3795        ~(TCG_TARGET_STACK_ALIGN - 1);
3796    allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
3797    if (allocate_args) {
3798        /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
3799           preallocate call stack */
3800        tcg_abort();
3801    }
3802
3803    stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
3804    for (i = nb_regs; i < nb_iargs; i++) {
3805        arg = op->args[nb_oargs + i];
3806#ifdef TCG_TARGET_STACK_GROWSUP
3807        stack_offset -= sizeof(tcg_target_long);
3808#endif
3809        if (arg != TCG_CALL_DUMMY_ARG) {
3810            ts = arg_temp(arg);
3811            temp_load(s, ts, tcg_target_available_regs[ts->type],
3812                      s->reserved_regs, 0);
3813            tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
3814        }
3815#ifndef TCG_TARGET_STACK_GROWSUP
3816        stack_offset += sizeof(tcg_target_long);
3817#endif
3818    }
3819    
3820    /* assign input registers */
3821    allocated_regs = s->reserved_regs;
3822    for (i = 0; i < nb_regs; i++) {
3823        arg = op->args[nb_oargs + i];
3824        if (arg != TCG_CALL_DUMMY_ARG) {
3825            ts = arg_temp(arg);
3826            reg = tcg_target_call_iarg_regs[i];
3827
3828            if (ts->val_type == TEMP_VAL_REG) {
3829                if (ts->reg != reg) {
3830                    tcg_reg_free(s, reg, allocated_regs);
3831                    if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3832                        /*
3833                         * Cross register class move not supported.  Sync the
3834                         * temp back to its slot and load from there.
3835                         */
3836                        temp_sync(s, ts, allocated_regs, 0, 0);
3837                        tcg_out_ld(s, ts->type, reg,
3838                                   ts->mem_base->reg, ts->mem_offset);
3839                    }
3840                }
3841            } else {
3842                TCGRegSet arg_set = 0;
3843
3844                tcg_reg_free(s, reg, allocated_regs);
3845                tcg_regset_set_reg(arg_set, reg);
3846                temp_load(s, ts, arg_set, allocated_regs, 0);
3847            }
3848
3849            tcg_regset_set_reg(allocated_regs, reg);
3850        }
3851    }
3852    
3853    /* mark dead temporaries and free the associated registers */
3854    for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3855        if (IS_DEAD_ARG(i)) {
3856            temp_dead(s, arg_temp(op->args[i]));
3857        }
3858    }
3859    
3860    /* clobber call registers */
3861    for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3862        if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3863            tcg_reg_free(s, i, allocated_regs);
3864        }
3865    }
3866
3867    /* Save globals if they might be written by the helper, sync them if
3868       they might be read. */
3869    if (flags & TCG_CALL_NO_READ_GLOBALS) {
3870        /* Nothing to do */
3871    } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
3872        sync_globals(s, allocated_regs);
3873    } else {
3874        save_globals(s, allocated_regs);
3875    }
3876
3877    tcg_out_call(s, func_addr);
3878
3879    /* assign output registers and emit moves if needed */
3880    for(i = 0; i < nb_oargs; i++) {
3881        arg = op->args[i];
3882        ts = arg_temp(arg);
3883
3884        /* ENV should not be modified.  */
3885        tcg_debug_assert(!ts->fixed_reg);
3886
3887        reg = tcg_target_call_oarg_regs[i];
3888        tcg_debug_assert(s->reg_to_temp[reg] == NULL);
3889        if (ts->val_type == TEMP_VAL_REG) {
3890            s->reg_to_temp[ts->reg] = NULL;
3891        }
3892        ts->val_type = TEMP_VAL_REG;
3893        ts->reg = reg;
3894        ts->mem_coherent = 0;
3895        s->reg_to_temp[reg] = ts;
3896        if (NEED_SYNC_ARG(i)) {
3897            temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
3898        } else if (IS_DEAD_ARG(i)) {
3899            temp_dead(s, ts);
3900        }
3901    }
3902}
3903
3904#ifdef CONFIG_PROFILER
3905
3906/* avoid copy/paste errors */
3907#define PROF_ADD(to, from, field)                       \
3908    do {                                                \
3909        (to)->field += atomic_read(&((from)->field));   \
3910    } while (0)
3911
3912#define PROF_MAX(to, from, field)                                       \
3913    do {                                                                \
3914        typeof((from)->field) val__ = atomic_read(&((from)->field));    \
3915        if (val__ > (to)->field) {                                      \
3916            (to)->field = val__;                                        \
3917        }                                                               \
3918    } while (0)
3919
3920/* Pass in a zero'ed @prof */
3921static inline
3922void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
3923{
3924    unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
3925    unsigned int i;
3926
3927    for (i = 0; i < n_ctxs; i++) {
3928        TCGContext *s = atomic_read(&tcg_ctxs[i]);
3929        const TCGProfile *orig = &s->prof;
3930
3931        if (counters) {
3932            PROF_ADD(prof, orig, cpu_exec_time);
3933            PROF_ADD(prof, orig, tb_count1);
3934            PROF_ADD(prof, orig, tb_count);
3935            PROF_ADD(prof, orig, op_count);
3936            PROF_MAX(prof, orig, op_count_max);
3937            PROF_ADD(prof, orig, temp_count);
3938            PROF_MAX(prof, orig, temp_count_max);
3939            PROF_ADD(prof, orig, del_op_count);
3940            PROF_ADD(prof, orig, code_in_len);
3941            PROF_ADD(prof, orig, code_out_len);
3942            PROF_ADD(prof, orig, search_out_len);
3943            PROF_ADD(prof, orig, interm_time);
3944            PROF_ADD(prof, orig, code_time);
3945            PROF_ADD(prof, orig, la_time);
3946            PROF_ADD(prof, orig, opt_time);
3947            PROF_ADD(prof, orig, restore_count);
3948            PROF_ADD(prof, orig, restore_time);
3949        }
3950        if (table) {
3951            int i;
3952
3953            for (i = 0; i < NB_OPS; i++) {
3954                PROF_ADD(prof, orig, table_op_count[i]);
3955            }
3956        }
3957    }
3958}
3959
3960#undef PROF_ADD
3961#undef PROF_MAX
3962
3963static void tcg_profile_snapshot_counters(TCGProfile *prof)
3964{
3965    tcg_profile_snapshot(prof, true, false);
3966}
3967
3968static void tcg_profile_snapshot_table(TCGProfile *prof)
3969{
3970    tcg_profile_snapshot(prof, false, true);
3971}
3972
3973void tcg_dump_op_count(void)
3974{
3975    TCGProfile prof = {};
3976    int i;
3977
3978    tcg_profile_snapshot_table(&prof);
3979    for (i = 0; i < NB_OPS; i++) {
3980        qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name,
3981                    prof.table_op_count[i]);
3982    }
3983}
3984
3985int64_t tcg_cpu_exec_time(void)
3986{
3987    unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
3988    unsigned int i;
3989    int64_t ret = 0;
3990
3991    for (i = 0; i < n_ctxs; i++) {
3992        const TCGContext *s = atomic_read(&tcg_ctxs[i]);
3993        const TCGProfile *prof = &s->prof;
3994
3995        ret += atomic_read(&prof->cpu_exec_time);
3996    }
3997    return ret;
3998}
3999#else
4000void tcg_dump_op_count(void)
4001{
4002    qemu_printf("[TCG profiler not compiled]\n");
4003}
4004
4005int64_t tcg_cpu_exec_time(void)
4006{
4007    error_report("%s: TCG profiler not compiled", __func__);
4008    exit(EXIT_FAILURE);
4009}
4010#endif
4011
4012
4013int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
4014{
4015#ifdef CONFIG_PROFILER
4016    TCGProfile *prof = &s->prof;
4017#endif
4018    int i, num_insns;
4019    TCGOp *op;
4020
4021#ifdef CONFIG_PROFILER
4022    {
4023        int n = 0;
4024
4025        QTAILQ_FOREACH(op, &s->ops, link) {
4026            n++;
4027        }
4028        atomic_set(&prof->op_count, prof->op_count + n);
4029        if (n > prof->op_count_max) {
4030            atomic_set(&prof->op_count_max, n);
4031        }
4032
4033        n = s->nb_temps;
4034        atomic_set(&prof->temp_count, prof->temp_count + n);
4035        if (n > prof->temp_count_max) {
4036            atomic_set(&prof->temp_count_max, n);
4037        }
4038    }
4039#endif
4040
4041#ifdef DEBUG_DISAS
4042    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4043                 && qemu_log_in_addr_range(tb->pc))) {
4044        qemu_log_lock();
4045        qemu_log("OP:\n");
4046        tcg_dump_ops(s, false);
4047        qemu_log("\n");
4048        qemu_log_unlock();
4049    }
4050#endif
4051
4052#ifdef CONFIG_DEBUG_TCG
4053    /* Ensure all labels referenced have been emitted.  */
4054    {
4055        TCGLabel *l;
4056        bool error = false;
4057
4058        QSIMPLEQ_FOREACH(l, &s->labels, next) {
4059            if (unlikely(!l->present) && l->refs) {
4060                qemu_log_mask(CPU_LOG_TB_OP,
4061                              "$L%d referenced but not present.\n", l->id);
4062                error = true;
4063            }
4064        }
4065        assert(!error);
4066    }
4067#endif
4068
4069#ifdef CONFIG_PROFILER
4070    atomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4071#endif
4072
4073#ifdef USE_TCG_OPTIMIZATIONS
4074    tcg_optimize(s);
4075#endif
4076
4077#ifdef CONFIG_PROFILER
4078    atomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4079    atomic_set(&prof->la_time, prof->la_time - profile_getclock());
4080#endif
4081
4082    reachable_code_pass(s);
4083    liveness_pass_1(s);
4084
4085    if (s->nb_indirects > 0) {
4086#ifdef DEBUG_DISAS
4087        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4088                     && qemu_log_in_addr_range(tb->pc))) {
4089            qemu_log_lock();
4090            qemu_log("OP before indirect lowering:\n");
4091            tcg_dump_ops(s, false);
4092            qemu_log("\n");
4093            qemu_log_unlock();
4094        }
4095#endif
4096        /* Replace indirect temps with direct temps.  */
4097        if (liveness_pass_2(s)) {
4098            /* If changes were made, re-run liveness.  */
4099            liveness_pass_1(s);
4100        }
4101    }
4102
4103#ifdef CONFIG_PROFILER
4104    atomic_set(&prof->la_time, prof->la_time + profile_getclock());
4105#endif
4106
4107#ifdef DEBUG_DISAS
4108    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4109                 && qemu_log_in_addr_range(tb->pc))) {
4110        qemu_log_lock();
4111        qemu_log("OP after optimization and liveness analysis:\n");
4112        tcg_dump_ops(s, true);
4113        qemu_log("\n");
4114        qemu_log_unlock();
4115    }
4116#endif
4117
4118    tcg_reg_alloc_start(s);
4119
4120    s->code_buf = tb->tc.ptr;
4121    s->code_ptr = tb->tc.ptr;
4122
4123#ifdef TCG_TARGET_NEED_LDST_LABELS
4124    QSIMPLEQ_INIT(&s->ldst_labels);
4125#endif
4126#ifdef TCG_TARGET_NEED_POOL_LABELS
4127    s->pool_labels = NULL;
4128#endif
4129
4130    num_insns = -1;
4131    QTAILQ_FOREACH(op, &s->ops, link) {
4132        TCGOpcode opc = op->opc;
4133
4134#ifdef CONFIG_PROFILER
4135        atomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4136#endif
4137
4138        switch (opc) {
4139        case INDEX_op_mov_i32:
4140        case INDEX_op_mov_i64:
4141        case INDEX_op_mov_vec:
4142            tcg_reg_alloc_mov(s, op);
4143            break;
4144        case INDEX_op_movi_i32:
4145        case INDEX_op_movi_i64:
4146        case INDEX_op_dupi_vec:
4147            tcg_reg_alloc_movi(s, op);
4148            break;
4149        case INDEX_op_dup_vec:
4150            tcg_reg_alloc_dup(s, op);
4151            break;
4152        case INDEX_op_insn_start:
4153            if (num_insns >= 0) {
4154                size_t off = tcg_current_code_size(s);
4155                s->gen_insn_end_off[num_insns] = off;
4156                /* Assert that we do not overflow our stored offset.  */
4157                assert(s->gen_insn_end_off[num_insns] == off);
4158            }
4159            num_insns++;
4160            for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4161                target_ulong a;
4162#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4163                a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4164#else
4165                a = op->args[i];
4166#endif
4167                s->gen_insn_data[num_insns][i] = a;
4168            }
4169            break;
4170        case INDEX_op_discard:
4171            temp_dead(s, arg_temp(op->args[0]));
4172            break;
4173        case INDEX_op_set_label:
4174            tcg_reg_alloc_bb_end(s, s->reserved_regs);
4175            tcg_out_label(s, arg_label(op->args[0]), s->code_ptr);
4176            break;
4177        case INDEX_op_call:
4178            tcg_reg_alloc_call(s, op);
4179            break;
4180        default:
4181            /* Sanity check that we've not introduced any unhandled opcodes. */
4182            tcg_debug_assert(tcg_op_supported(opc));
4183            /* Note: in order to speed up the code, it would be much
4184               faster to have specialized register allocator functions for
4185               some common argument patterns */
4186            tcg_reg_alloc_op(s, op);
4187            break;
4188        }
4189#ifdef CONFIG_DEBUG_TCG
4190        check_regs(s);
4191#endif
4192        /* Test for (pending) buffer overflow.  The assumption is that any
4193           one operation beginning below the high water mark cannot overrun
4194           the buffer completely.  Thus we can test for overflow after
4195           generating code without having to check during generation.  */
4196        if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4197            return -1;
4198        }
4199        /* Test for TB overflow, as seen by gen_insn_end_off.  */
4200        if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4201            return -2;
4202        }
4203    }
4204    tcg_debug_assert(num_insns >= 0);
4205    s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4206
4207    /* Generate TB finalization at the end of block */
4208#ifdef TCG_TARGET_NEED_LDST_LABELS
4209    i = tcg_out_ldst_finalize(s);
4210    if (i < 0) {
4211        return i;
4212    }
4213#endif
4214#ifdef TCG_TARGET_NEED_POOL_LABELS
4215    i = tcg_out_pool_finalize(s);
4216    if (i < 0) {
4217        return i;
4218    }
4219#endif
4220    if (!tcg_resolve_relocs(s)) {
4221        return -2;
4222    }
4223
4224    /* flush instruction cache */
4225    flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
4226
4227    return tcg_current_code_size(s);
4228}
4229
4230#ifdef CONFIG_PROFILER
4231void tcg_dump_info(void)
4232{
4233    TCGProfile prof = {};
4234    const TCGProfile *s;
4235    int64_t tb_count;
4236    int64_t tb_div_count;
4237    int64_t tot;
4238
4239    tcg_profile_snapshot_counters(&prof);
4240    s = &prof;
4241    tb_count = s->tb_count;
4242    tb_div_count = tb_count ? tb_count : 1;
4243    tot = s->interm_time + s->code_time;
4244
4245    qemu_printf("JIT cycles          %" PRId64 " (%0.3f s at 2.4 GHz)\n",
4246                tot, tot / 2.4e9);
4247    qemu_printf("translated TBs      %" PRId64 " (aborted=%" PRId64
4248                " %0.1f%%)\n",
4249                tb_count, s->tb_count1 - tb_count,
4250                (double)(s->tb_count1 - s->tb_count)
4251                / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4252    qemu_printf("avg ops/TB          %0.1f max=%d\n",
4253                (double)s->op_count / tb_div_count, s->op_count_max);
4254    qemu_printf("deleted ops/TB      %0.2f\n",
4255                (double)s->del_op_count / tb_div_count);
4256    qemu_printf("avg temps/TB        %0.2f max=%d\n",
4257                (double)s->temp_count / tb_div_count, s->temp_count_max);
4258    qemu_printf("avg host code/TB    %0.1f\n",
4259                (double)s->code_out_len / tb_div_count);
4260    qemu_printf("avg search data/TB  %0.1f\n",
4261                (double)s->search_out_len / tb_div_count);
4262    
4263    qemu_printf("cycles/op           %0.1f\n",
4264                s->op_count ? (double)tot / s->op_count : 0);
4265    qemu_printf("cycles/in byte      %0.1f\n",
4266                s->code_in_len ? (double)tot / s->code_in_len : 0);
4267    qemu_printf("cycles/out byte     %0.1f\n",
4268                s->code_out_len ? (double)tot / s->code_out_len : 0);
4269    qemu_printf("cycles/search byte     %0.1f\n",
4270                s->search_out_len ? (double)tot / s->search_out_len : 0);
4271    if (tot == 0) {
4272        tot = 1;
4273    }
4274    qemu_printf("  gen_interm time   %0.1f%%\n",
4275                (double)s->interm_time / tot * 100.0);
4276    qemu_printf("  gen_code time     %0.1f%%\n",
4277                (double)s->code_time / tot * 100.0);
4278    qemu_printf("optim./code time    %0.1f%%\n",
4279                (double)s->opt_time / (s->code_time ? s->code_time : 1)
4280                * 100.0);
4281    qemu_printf("liveness/code time  %0.1f%%\n",
4282                (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
4283    qemu_printf("cpu_restore count   %" PRId64 "\n",
4284                s->restore_count);
4285    qemu_printf("  avg cycles        %0.1f\n",
4286                s->restore_count ? (double)s->restore_time / s->restore_count : 0);
4287}
4288#else
4289void tcg_dump_info(void)
4290{
4291    qemu_printf("[TCG profiler not compiled]\n");
4292}
4293#endif
4294
4295#ifdef ELF_HOST_MACHINE
4296/* In order to use this feature, the backend needs to do three things:
4297
4298   (1) Define ELF_HOST_MACHINE to indicate both what value to
4299       put into the ELF image and to indicate support for the feature.
4300
4301   (2) Define tcg_register_jit.  This should create a buffer containing
4302       the contents of a .debug_frame section that describes the post-
4303       prologue unwind info for the tcg machine.
4304
4305   (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4306*/
4307
4308/* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
4309typedef enum {
4310    JIT_NOACTION = 0,
4311    JIT_REGISTER_FN,
4312    JIT_UNREGISTER_FN
4313} jit_actions_t;
4314
4315struct jit_code_entry {
4316    struct jit_code_entry *next_entry;
4317    struct jit_code_entry *prev_entry;
4318    const void *symfile_addr;
4319    uint64_t symfile_size;
4320};
4321
4322struct jit_descriptor {
4323    uint32_t version;
4324    uint32_t action_flag;
4325    struct jit_code_entry *relevant_entry;
4326    struct jit_code_entry *first_entry;
4327};
4328
4329void __jit_debug_register_code(void) __attribute__((noinline));
4330void __jit_debug_register_code(void)
4331{
4332    asm("");
4333}
4334
4335/* Must statically initialize the version, because GDB may check
4336   the version before we can set it.  */
4337struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4338
4339/* End GDB interface.  */
4340
4341static int find_string(const char *strtab, const char *str)
4342{
4343    const char *p = strtab + 1;
4344
4345    while (1) {
4346        if (strcmp(p, str) == 0) {
4347            return p - strtab;
4348        }
4349        p += strlen(p) + 1;
4350    }
4351}
4352
4353static void tcg_register_jit_int(void *buf_ptr, size_t buf_size,
4354                                 const void *debug_frame,
4355                                 size_t debug_frame_size)
4356{
4357    struct __attribute__((packed)) DebugInfo {
4358        uint32_t  len;
4359        uint16_t  version;
4360        uint32_t  abbrev;
4361        uint8_t   ptr_size;
4362        uint8_t   cu_die;
4363        uint16_t  cu_lang;
4364        uintptr_t cu_low_pc;
4365        uintptr_t cu_high_pc;
4366        uint8_t   fn_die;
4367        char      fn_name[16];
4368        uintptr_t fn_low_pc;
4369        uintptr_t fn_high_pc;
4370        uint8_t   cu_eoc;
4371    };
4372
4373    struct ElfImage {
4374        ElfW(Ehdr) ehdr;
4375        ElfW(Phdr) phdr;
4376        ElfW(Shdr) shdr[7];
4377        ElfW(Sym)  sym[2];
4378        struct DebugInfo di;
4379        uint8_t    da[24];
4380        char       str[80];
4381    };
4382
4383    struct ElfImage *img;
4384
4385    static const struct ElfImage img_template = {
4386        .ehdr = {
4387            .e_ident[EI_MAG0] = ELFMAG0,
4388            .e_ident[EI_MAG1] = ELFMAG1,
4389            .e_ident[EI_MAG2] = ELFMAG2,
4390            .e_ident[EI_MAG3] = ELFMAG3,
4391            .e_ident[EI_CLASS] = ELF_CLASS,
4392            .e_ident[EI_DATA] = ELF_DATA,
4393            .e_ident[EI_VERSION] = EV_CURRENT,
4394            .e_type = ET_EXEC,
4395            .e_machine = ELF_HOST_MACHINE,
4396            .e_version = EV_CURRENT,
4397            .e_phoff = offsetof(struct ElfImage, phdr),
4398            .e_shoff = offsetof(struct ElfImage, shdr),
4399            .e_ehsize = sizeof(ElfW(Shdr)),
4400            .e_phentsize = sizeof(ElfW(Phdr)),
4401            .e_phnum = 1,
4402            .e_shentsize = sizeof(ElfW(Shdr)),
4403            .e_shnum = ARRAY_SIZE(img->shdr),
4404            .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4405#ifdef ELF_HOST_FLAGS
4406            .e_flags = ELF_HOST_FLAGS,
4407#endif
4408#ifdef ELF_OSABI
4409            .e_ident[EI_OSABI] = ELF_OSABI,
4410#endif
4411        },
4412        .phdr = {
4413            .p_type = PT_LOAD,
4414            .p_flags = PF_X,
4415        },
4416        .shdr = {
4417            [0] = { .sh_type = SHT_NULL },
4418            /* Trick: The contents of code_gen_buffer are not present in
4419               this fake ELF file; that got allocated elsewhere.  Therefore
4420               we mark .text as SHT_NOBITS (similar to .bss) so that readers
4421               will not look for contents.  We can record any address.  */
4422            [1] = { /* .text */
4423                .sh_type = SHT_NOBITS,
4424                .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4425            },
4426            [2] = { /* .debug_info */
4427                .sh_type = SHT_PROGBITS,
4428                .sh_offset = offsetof(struct ElfImage, di),
4429                .sh_size = sizeof(struct DebugInfo),
4430            },
4431            [3] = { /* .debug_abbrev */
4432                .sh_type = SHT_PROGBITS,
4433                .sh_offset = offsetof(struct ElfImage, da),
4434                .sh_size = sizeof(img->da),
4435            },
4436            [4] = { /* .debug_frame */
4437                .sh_type = SHT_PROGBITS,
4438                .sh_offset = sizeof(struct ElfImage),
4439            },
4440            [5] = { /* .symtab */
4441                .sh_type = SHT_SYMTAB,
4442                .sh_offset = offsetof(struct ElfImage, sym),
4443                .sh_size = sizeof(img->sym),
4444                .sh_info = 1,
4445                .sh_link = ARRAY_SIZE(img->shdr) - 1,
4446                .sh_entsize = sizeof(ElfW(Sym)),
4447            },
4448            [6] = { /* .strtab */
4449                .sh_type = SHT_STRTAB,
4450                .sh_offset = offsetof(struct ElfImage, str),
4451                .sh_size = sizeof(img->str),
4452            }
4453        },
4454        .sym = {
4455            [1] = { /* code_gen_buffer */
4456                .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
4457                .st_shndx = 1,
4458            }
4459        },
4460        .di = {
4461            .len = sizeof(struct DebugInfo) - 4,
4462            .version = 2,
4463            .ptr_size = sizeof(void *),
4464            .cu_die = 1,
4465            .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
4466            .fn_die = 2,
4467            .fn_name = "code_gen_buffer"
4468        },
4469        .da = {
4470            1,          /* abbrev number (the cu) */
4471            0x11, 1,    /* DW_TAG_compile_unit, has children */
4472            0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
4473            0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4474            0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4475            0, 0,       /* end of abbrev */
4476            2,          /* abbrev number (the fn) */
4477            0x2e, 0,    /* DW_TAG_subprogram, no children */
4478            0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
4479            0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4480            0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4481            0, 0,       /* end of abbrev */
4482            0           /* no more abbrev */
4483        },
4484        .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4485               ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4486    };
4487
4488    /* We only need a single jit entry; statically allocate it.  */
4489    static struct jit_code_entry one_entry;
4490
4491    uintptr_t buf = (uintptr_t)buf_ptr;
4492    size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
4493    DebugFrameHeader *dfh;
4494
4495    img = g_malloc(img_size);
4496    *img = img_template;
4497
4498    img->phdr.p_vaddr = buf;
4499    img->phdr.p_paddr = buf;
4500    img->phdr.p_memsz = buf_size;
4501
4502    img->shdr[1].sh_name = find_string(img->str, ".text");
4503    img->shdr[1].sh_addr = buf;
4504    img->shdr[1].sh_size = buf_size;
4505
4506    img->shdr[2].sh_name = find_string(img->str, ".debug_info");
4507    img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
4508
4509    img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
4510    img->shdr[4].sh_size = debug_frame_size;
4511
4512    img->shdr[5].sh_name = find_string(img->str, ".symtab");
4513    img->shdr[6].sh_name = find_string(img->str, ".strtab");
4514
4515    img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
4516    img->sym[1].st_value = buf;
4517    img->sym[1].st_size = buf_size;
4518
4519    img->di.cu_low_pc = buf;
4520    img->di.cu_high_pc = buf + buf_size;
4521    img->di.fn_low_pc = buf;
4522    img->di.fn_high_pc = buf + buf_size;
4523
4524    dfh = (DebugFrameHeader *)(img + 1);
4525    memcpy(dfh, debug_frame, debug_frame_size);
4526    dfh->fde.func_start = buf;
4527    dfh->fde.func_len = buf_size;
4528
4529#ifdef DEBUG_JIT
4530    /* Enable this block to be able to debug the ELF image file creation.
4531       One can use readelf, objdump, or other inspection utilities.  */
4532    {
4533        FILE *f = fopen("/tmp/qemu.jit", "w+b");
4534        if (f) {
4535            if (fwrite(img, img_size, 1, f) != img_size) {
4536                /* Avoid stupid unused return value warning for fwrite.  */
4537            }
4538            fclose(f);
4539        }
4540    }
4541#endif
4542
4543    one_entry.symfile_addr = img;
4544    one_entry.symfile_size = img_size;
4545
4546    __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
4547    __jit_debug_descriptor.relevant_entry = &one_entry;
4548    __jit_debug_descriptor.first_entry = &one_entry;
4549    __jit_debug_register_code();
4550}
4551#else
4552/* No support for the feature.  Provide the entry point expected by exec.c,
4553   and implement the internal function we declared earlier.  */
4554
4555static void tcg_register_jit_int(void *buf, size_t size,
4556                                 const void *debug_frame,
4557                                 size_t debug_frame_size)
4558{
4559}
4560
4561void tcg_register_jit(void *buf, size_t buf_size)
4562{
4563}
4564#endif /* ELF_HOST_MACHINE */
4565
4566#if !TCG_TARGET_MAYBE_vec
4567void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
4568{
4569    g_assert_not_reached();
4570}
4571#endif
4572