qemu/tests/plugin/howvec.c
<<
>>
Prefs
   1/*
   2 * Copyright (C) 2019, Alex Bennée <alex.bennee@linaro.org>
   3 *
   4 * How vectorised is this code?
   5 *
   6 * Attempt to measure the amount of vectorisation that has been done
   7 * on some code by counting classes of instruction.
   8 *
   9 * License: GNU GPL, version 2 or later.
  10 *   See the COPYING file in the top-level directory.
  11 */
  12#include <inttypes.h>
  13#include <assert.h>
  14#include <stdlib.h>
  15#include <inttypes.h>
  16#include <string.h>
  17#include <unistd.h>
  18#include <stdio.h>
  19#include <glib.h>
  20
  21#include <qemu-plugin.h>
  22
  23QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
  24
  25#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
  26
  27typedef enum {
  28    COUNT_CLASS,
  29    COUNT_INDIVIDUAL,
  30    COUNT_NONE
  31} CountType;
  32
  33static int limit = 50;
  34static bool do_inline;
  35static bool verbose;
  36
  37static GMutex lock;
  38static GHashTable *insns;
  39
  40typedef struct {
  41    const char *class;
  42    const char *opt;
  43    uint32_t mask;
  44    uint32_t pattern;
  45    CountType what;
  46    uint64_t count;
  47} InsnClassExecCount;
  48
  49typedef struct {
  50    char *insn;
  51    uint32_t opcode;
  52    uint64_t count;
  53    InsnClassExecCount *class;
  54} InsnExecCount;
  55
  56/*
  57 * Matchers for classes of instructions, order is important.
  58 *
  59 * Your most precise match must be before looser matches. If no match
  60 * is found in the table we can create an individual entry.
  61 *
  62 * 31..28 27..24 23..20 19..16 15..12 11..8 7..4 3..0
  63 */
  64static InsnClassExecCount aarch64_insn_classes[] = {
  65    /* "Reserved"" */
  66    { "  UDEF",              "udef",   0xffff0000, 0x00000000, COUNT_NONE},
  67    { "  SVE",               "sve",    0x1e000000, 0x04000000, COUNT_CLASS},
  68    { "Reserved",            "res",    0x1e000000, 0x00000000, COUNT_CLASS},
  69    /* Data Processing Immediate */
  70    { "  PCrel addr",        "pcrel",  0x1f000000, 0x10000000, COUNT_CLASS},
  71    { "  Add/Sub (imm,tags)","asit",   0x1f800000, 0x11800000, COUNT_CLASS},
  72    { "  Add/Sub (imm)",     "asi",    0x1f000000, 0x11000000, COUNT_CLASS},
  73    { "  Logical (imm)",     "logi",   0x1f800000, 0x12000000, COUNT_CLASS},
  74    { "  Move Wide (imm)",   "movwi",  0x1f800000, 0x12800000, COUNT_CLASS},
  75    { "  Bitfield",          "bitf",   0x1f800000, 0x13000000, COUNT_CLASS},
  76    { "  Extract",           "extr",   0x1f800000, 0x13800000, COUNT_CLASS},
  77    { "Data Proc Imm",       "dpri",   0x1c000000, 0x10000000, COUNT_CLASS},
  78    /* Branches */
  79    { "  Cond Branch (imm)", "cndb",   0xfe000000, 0x54000000, COUNT_CLASS},
  80    { "  Exception Gen",     "excp",   0xff000000, 0xd4000000, COUNT_CLASS},
  81    { "    NOP",             "nop",    0xffffffff, 0xd503201f, COUNT_NONE},
  82    { "  Hints",             "hint",   0xfffff000, 0xd5032000, COUNT_CLASS},
  83    { "  Barriers",          "barr",   0xfffff000, 0xd5033000, COUNT_CLASS},
  84    { "  PSTATE",            "psta",   0xfff8f000, 0xd5004000, COUNT_CLASS},
  85    { "  System Insn",       "sins",   0xffd80000, 0xd5080000, COUNT_CLASS},
  86    { "  System Reg",        "sreg",   0xffd00000, 0xd5100000, COUNT_CLASS},
  87    { "  Branch (reg)",      "breg",   0xfe000000, 0xd6000000, COUNT_CLASS},
  88    { "  Branch (imm)",      "bimm",   0x7c000000, 0x14000000, COUNT_CLASS},
  89    { "  Cmp & Branch",      "cmpb",   0x7e000000, 0x34000000, COUNT_CLASS},
  90    { "  Tst & Branch",      "tstb",   0x7e000000, 0x36000000, COUNT_CLASS},
  91    { "Branches",            "branch", 0x1c000000, 0x14000000, COUNT_CLASS},
  92    /* Loads and Stores */
  93    { "  AdvSimd ldstmult",  "advlsm", 0xbfbf0000, 0x0c000000, COUNT_CLASS},
  94    { "  AdvSimd ldstmult++","advlsmp",0xbfb00000, 0x0c800000, COUNT_CLASS},
  95    { "  AdvSimd ldst",      "advlss", 0xbf9f0000, 0x0d000000, COUNT_CLASS},
  96    { "  AdvSimd ldst++",    "advlssp",0xbf800000, 0x0d800000, COUNT_CLASS},
  97    { "  ldst excl",         "ldstx",  0x3f000000, 0x08000000, COUNT_CLASS},
  98    { "    Prefetch",        "prfm",   0xff000000, 0xd8000000, COUNT_CLASS},
  99    { "  Load Reg (lit)",    "ldlit",  0x1b000000, 0x18000000, COUNT_CLASS},
 100    { "  ldst noalloc pair", "ldstnap",0x3b800000, 0x28000000, COUNT_CLASS},
 101    { "  ldst pair",         "ldstp",  0x38000000, 0x28000000, COUNT_CLASS},
 102    { "  ldst reg",          "ldstr",  0x3b200000, 0x38000000, COUNT_CLASS},
 103    { "  Atomic ldst",       "atomic", 0x3b200c00, 0x38200000, COUNT_CLASS},
 104    { "  ldst reg (reg off)","ldstro", 0x3b200b00, 0x38200800, COUNT_CLASS},
 105    { "  ldst reg (pac)",    "ldstpa", 0x3b200200, 0x38200800, COUNT_CLASS},
 106    { "  ldst reg (imm)",    "ldsti",  0x3b000000, 0x39000000, COUNT_CLASS},
 107    { "Loads & Stores",      "ldst",   0x0a000000, 0x08000000, COUNT_CLASS},
 108    /* Data Processing Register */
 109    { "Data Proc Reg",       "dprr",   0x0e000000, 0x0a000000, COUNT_CLASS},
 110    /* Scalar FP */
 111    { "Scalar FP ",          "fpsimd", 0x0e000000, 0x0e000000, COUNT_CLASS},
 112    /* Unclassified */
 113    { "Unclassified",        "unclas", 0x00000000, 0x00000000, COUNT_CLASS},
 114};
 115
 116static InsnClassExecCount sparc32_insn_classes[] = {
 117    { "Call",                "call",   0xc0000000, 0x40000000, COUNT_CLASS},
 118    { "Branch ICond",        "bcc",    0xc1c00000, 0x00800000, COUNT_CLASS},
 119    { "Branch Fcond",        "fbcc",   0xc1c00000, 0x01800000, COUNT_CLASS},
 120    { "SetHi",               "sethi",  0xc1c00000, 0x01000000, COUNT_CLASS},
 121    { "FPU ALU",             "fpu",    0xc1f00000, 0x81a00000, COUNT_CLASS},
 122    { "ALU",                 "alu",    0xc0000000, 0x80000000, COUNT_CLASS},
 123    { "Load/Store",          "ldst",   0xc0000000, 0xc0000000, COUNT_CLASS},
 124    /* Unclassified */
 125    { "Unclassified",        "unclas", 0x00000000, 0x00000000, COUNT_INDIVIDUAL},
 126};
 127
 128static InsnClassExecCount sparc64_insn_classes[] = {
 129    { "SetHi & Branches",     "op0",   0xc0000000, 0x00000000, COUNT_CLASS},
 130    { "Call",                 "op1",   0xc0000000, 0x40000000, COUNT_CLASS},
 131    { "Arith/Logical/Move",   "op2",   0xc0000000, 0x80000000, COUNT_CLASS},
 132    { "Arith/Logical/Move",   "op3",   0xc0000000, 0xc0000000, COUNT_CLASS},
 133    /* Unclassified */
 134    { "Unclassified",        "unclas", 0x00000000, 0x00000000, COUNT_INDIVIDUAL},
 135};
 136
 137/* Default matcher for currently unclassified architectures */
 138static InsnClassExecCount default_insn_classes[] = {
 139    { "Unclassified",        "unclas", 0x00000000, 0x00000000, COUNT_INDIVIDUAL},
 140};
 141
 142typedef struct {
 143    const char *qemu_target;
 144    InsnClassExecCount *table;
 145    int table_sz;
 146} ClassSelector;
 147
 148static ClassSelector class_tables[] =
 149{
 150    { "aarch64", aarch64_insn_classes, ARRAY_SIZE(aarch64_insn_classes) },
 151    { "sparc",   sparc32_insn_classes, ARRAY_SIZE(sparc32_insn_classes) },
 152    { "sparc64", sparc64_insn_classes, ARRAY_SIZE(sparc64_insn_classes) },
 153    { NULL, default_insn_classes, ARRAY_SIZE(default_insn_classes) },
 154};
 155
 156static InsnClassExecCount *class_table;
 157static int class_table_sz;
 158
 159static gint cmp_exec_count(gconstpointer a, gconstpointer b)
 160{
 161    InsnExecCount *ea = (InsnExecCount *) a;
 162    InsnExecCount *eb = (InsnExecCount *) b;
 163    return ea->count > eb->count ? -1 : 1;
 164}
 165
 166static void plugin_exit(qemu_plugin_id_t id, void *p)
 167{
 168    g_autoptr(GString) report = g_string_new("Instruction Classes:\n");
 169    int i;
 170    GList *counts;
 171    InsnClassExecCount *class = NULL;
 172
 173    for (i = 0; i < class_table_sz; i++) {
 174        class = &class_table[i];
 175        switch (class->what) {
 176        case COUNT_CLASS:
 177            if (class->count || verbose) {
 178                g_string_append_printf(report, "Class: %-24s\t(%ld hits)\n",
 179                                       class->class,
 180                                       class->count);
 181            }
 182            break;
 183        case COUNT_INDIVIDUAL:
 184            g_string_append_printf(report, "Class: %-24s\tcounted individually\n",
 185                                   class->class);
 186            break;
 187        case COUNT_NONE:
 188            g_string_append_printf(report, "Class: %-24s\tnot counted\n",
 189                                   class->class);
 190            break;
 191        default:
 192            break;
 193        }
 194    }
 195
 196    counts = g_hash_table_get_values(insns);
 197    if (counts && g_list_next(counts)) {
 198        GList *it;
 199
 200        g_string_append_printf(report,"Individual Instructions:\n");
 201
 202        it = g_list_sort(counts, cmp_exec_count);
 203
 204        for (i = 0; i < limit && it->next; i++, it = it->next) {
 205            InsnExecCount *rec = (InsnExecCount *) it->data;
 206            g_string_append_printf(report, "Instr: %-24s\t(%ld hits)\t(op=%#08x/%s)\n",
 207                                   rec->insn,
 208                                   rec->count,
 209                                   rec->opcode,
 210                                   rec->class ?
 211                                   rec->class->class : "un-categorised");
 212        }
 213        g_list_free(it);
 214    }
 215
 216    qemu_plugin_outs(report->str);
 217}
 218
 219static void plugin_init(void)
 220{
 221    insns = g_hash_table_new(NULL, g_direct_equal);
 222}
 223
 224static void vcpu_insn_exec_before(unsigned int cpu_index, void *udata)
 225{
 226    uint64_t *count = (uint64_t *) udata;
 227    (*count)++;
 228}
 229
 230static uint64_t * find_counter(struct qemu_plugin_insn *insn)
 231{
 232    int i;
 233    uint64_t *cnt = NULL;
 234    uint32_t opcode;
 235    InsnClassExecCount *class = NULL;
 236
 237    /*
 238     * We only match the first 32 bits of the instruction which is
 239     * fine for most RISCs but a bit limiting for CISC architectures.
 240     * They would probably benefit from a more tailored plugin.
 241     * However we can fall back to individual instruction counting.
 242     */
 243    opcode = *((uint32_t *)qemu_plugin_insn_data(insn));
 244
 245    for (i = 0; !cnt && i < class_table_sz; i++) {
 246        class = &class_table[i];
 247        uint32_t masked_bits = opcode & class->mask;
 248        if (masked_bits == class->pattern) {
 249            break;
 250        }
 251    }
 252
 253    g_assert(class);
 254
 255    switch (class->what) {
 256    case COUNT_NONE:
 257        return NULL;
 258    case COUNT_CLASS:
 259        return &class->count;
 260    case COUNT_INDIVIDUAL:
 261    {
 262        InsnExecCount *icount;
 263
 264        g_mutex_lock(&lock);
 265        icount = (InsnExecCount *) g_hash_table_lookup(insns,
 266                                                       GUINT_TO_POINTER(opcode));
 267
 268        if (!icount) {
 269            icount = g_new0(InsnExecCount, 1);
 270            icount->opcode = opcode;
 271            icount->insn = qemu_plugin_insn_disas(insn);
 272            icount->class = class;
 273
 274            g_hash_table_insert(insns, GUINT_TO_POINTER(opcode),
 275                                (gpointer) icount);
 276        }
 277        g_mutex_unlock(&lock);
 278
 279        return &icount->count;
 280    }
 281    default:
 282        g_assert_not_reached();
 283    }
 284
 285    return NULL;
 286}
 287
 288static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
 289{
 290    size_t n = qemu_plugin_tb_n_insns(tb);
 291    size_t i;
 292
 293    for (i = 0; i < n; i++) {
 294        uint64_t *cnt;
 295        struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, i);
 296        cnt = find_counter(insn);
 297
 298        if (cnt) {
 299            if (do_inline) {
 300                qemu_plugin_register_vcpu_insn_exec_inline(
 301                    insn, QEMU_PLUGIN_INLINE_ADD_U64, cnt, 1);
 302            } else {
 303                qemu_plugin_register_vcpu_insn_exec_cb(
 304                    insn, vcpu_insn_exec_before, QEMU_PLUGIN_CB_NO_REGS, cnt);
 305            }
 306        }
 307    }
 308}
 309
 310QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
 311                                           const qemu_info_t *info,
 312                                           int argc, char **argv)
 313{
 314    int i;
 315
 316    /* Select a class table appropriate to the guest architecture */
 317    for (i = 0; i < ARRAY_SIZE(class_tables); i++) {
 318        ClassSelector *entry = &class_tables[i];
 319        if (!entry->qemu_target ||
 320            strcmp(entry->qemu_target, info->target_name) == 0) {
 321            class_table = entry->table;
 322            class_table_sz = entry->table_sz;
 323            break;
 324        }
 325    }
 326
 327    for (i = 0; i < argc; i++) {
 328        char *p = argv[i];
 329        if (strcmp(p, "inline") == 0) {
 330            do_inline = true;
 331        } else if (strcmp(p, "verbose") == 0) {
 332            verbose = true;
 333        } else {
 334            int j;
 335            CountType type = COUNT_INDIVIDUAL;
 336            if (*p == '!') {
 337                type = COUNT_NONE;
 338                p++;
 339            }
 340            for (j = 0; j < class_table_sz; j++) {
 341                if (strcmp(p, class_table[j].opt) == 0) {
 342                    class_table[j].what = type;
 343                    break;
 344                }
 345            }
 346        }
 347    }
 348
 349    plugin_init();
 350
 351    qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
 352    qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
 353    return 0;
 354}
 355