qemu/contrib/plugins/lockstep.c
<<
>>
Prefs
   1/*
   2 * Lockstep Execution Plugin
   3 *
   4 * Allows you to execute two QEMU instances in lockstep and report
   5 * when their execution diverges. This is mainly useful for developers
   6 * who want to see where a change to TCG code generation has
   7 * introduced a subtle and hard to find bug.
   8 *
   9 * Caveats:
  10 *   - single-threaded linux-user apps only with non-deterministic syscalls
  11 *   - no MTTCG enabled system emulation (icount may help)
  12 *
  13 * While icount makes things more deterministic it doesn't mean a
  14 * particular run may execute the exact same sequence of blocks. An
  15 * asynchronous event (for example X11 graphics update) may cause a
  16 * block to end early and a new partial block to start. This means
  17 * serial only test cases are a better bet. -d nochain may also help.
  18 *
  19 * This code is not thread safe!
  20 *
  21 * Copyright (c) 2020 Linaro Ltd
  22 *
  23 * SPDX-License-Identifier: GPL-2.0-or-later
  24 */
  25
  26#include <glib.h>
  27#include <inttypes.h>
  28#include <unistd.h>
  29#include <sys/socket.h>
  30#include <sys/un.h>
  31#include <stdio.h>
  32#include <errno.h>
  33
  34#include <qemu-plugin.h>
  35
  36QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
  37
  38/* saved so we can uninstall later */
  39static qemu_plugin_id_t our_id;
  40
  41static unsigned long bb_count;
  42static unsigned long insn_count;
  43
  44/* Information about a translated block */
  45typedef struct {
  46    uint64_t pc;
  47    uint64_t insns;
  48} BlockInfo;
  49
  50/* Information about an execution state in the log */
  51typedef struct {
  52    BlockInfo *block;
  53    unsigned long insn_count;
  54    unsigned long block_count;
  55} ExecInfo;
  56
  57/* The execution state we compare */
  58typedef struct {
  59    uint64_t pc;
  60    unsigned long insn_count;
  61} ExecState;
  62
  63typedef struct {
  64    GSList *log_pos;
  65    int distance;
  66} DivergeState;
  67
  68/* list of translated block info */
  69static GSList *blocks;
  70
  71/* execution log and points of divergence */
  72static GSList *log, *divergence_log;
  73
  74static int socket_fd;
  75static char *path_to_unlink;
  76
  77static bool verbose;
  78
  79static void plugin_cleanup(qemu_plugin_id_t id)
  80{
  81    /* Free our block data */
  82    g_slist_free_full(blocks, &g_free);
  83    g_slist_free_full(log, &g_free);
  84    g_slist_free(divergence_log);
  85
  86    close(socket_fd);
  87    if (path_to_unlink) {
  88        unlink(path_to_unlink);
  89    }
  90}
  91
  92static void plugin_exit(qemu_plugin_id_t id, void *p)
  93{
  94    g_autoptr(GString) out = g_string_new("No divergence :-)\n");
  95    g_string_append_printf(out, "Executed %ld/%d blocks\n",
  96                           bb_count, g_slist_length(log));
  97    g_string_append_printf(out, "Executed ~%ld instructions\n", insn_count);
  98    qemu_plugin_outs(out->str);
  99
 100    plugin_cleanup(id);
 101}
 102
 103static void report_divergance(ExecState *us, ExecState *them)
 104{
 105    DivergeState divrec = { log, 0 };
 106    g_autoptr(GString) out = g_string_new("");
 107    bool diverged = false;
 108
 109    /*
 110     * If we have diverged before did we get back on track or are we
 111     * totally loosing it?
 112     */
 113    if (divergence_log) {
 114        DivergeState *last = (DivergeState *) divergence_log->data;
 115        GSList *entry;
 116
 117        for (entry = log; g_slist_next(entry); entry = g_slist_next(entry)) {
 118            if (entry == last->log_pos) {
 119                break;
 120            }
 121            divrec.distance++;
 122        }
 123
 124        /*
 125         * If the last two records are so close it is likely we will
 126         * not recover synchronisation with the other end.
 127         */
 128        if (divrec.distance == 1 && last->distance == 1) {
 129            diverged = true;
 130        }
 131    }
 132    divergence_log = g_slist_prepend(divergence_log,
 133                                     g_memdup(&divrec, sizeof(divrec)));
 134
 135    /* Output short log entry of going out of sync... */
 136    if (verbose || divrec.distance == 1 || diverged) {
 137        g_string_printf(out, "@ 0x%016lx vs 0x%016lx (%d/%d since last)\n",
 138                        us->pc, them->pc, g_slist_length(divergence_log),
 139                        divrec.distance);
 140        qemu_plugin_outs(out->str);
 141    }
 142
 143    if (diverged) {
 144        int i;
 145        GSList *entry;
 146
 147        g_string_printf(out, "Δ insn_count @ 0x%016lx (%ld) vs 0x%016lx (%ld)\n",
 148                        us->pc, us->insn_count, them->pc, them->insn_count);
 149
 150        for (entry = log, i = 0;
 151             g_slist_next(entry) && i < 5;
 152             entry = g_slist_next(entry), i++) {
 153            ExecInfo *prev = (ExecInfo *) entry->data;
 154            g_string_append_printf(out,
 155                                   "  previously @ 0x%016lx/%ld (%ld insns)\n",
 156                                   prev->block->pc, prev->block->insns,
 157                                   prev->insn_count);
 158        }
 159        qemu_plugin_outs(out->str);
 160        qemu_plugin_outs("too much divergence... giving up.");
 161        qemu_plugin_uninstall(our_id, plugin_cleanup);
 162    }
 163}
 164
 165static void vcpu_tb_exec(unsigned int cpu_index, void *udata)
 166{
 167    BlockInfo *bi = (BlockInfo *) udata;
 168    ExecState us, them;
 169    ssize_t bytes;
 170    ExecInfo *exec;
 171
 172    us.pc = bi->pc;
 173    us.insn_count = insn_count;
 174
 175    /*
 176     * Write our current position to the other end. If we fail the
 177     * other end has probably died and we should shut down gracefully.
 178     */
 179    bytes = write(socket_fd, &us, sizeof(ExecState));
 180    if (bytes < sizeof(ExecState)) {
 181        qemu_plugin_outs(bytes < 0 ?
 182                         "problem writing to socket" :
 183                         "wrote less than expected to socket");
 184        qemu_plugin_uninstall(our_id, plugin_cleanup);
 185        return;
 186    }
 187
 188    /*
 189     * Now read where our peer has reached. Again a failure probably
 190     * indicates the other end died and we should close down cleanly.
 191     */
 192    bytes = read(socket_fd, &them, sizeof(ExecState));
 193    if (bytes < sizeof(ExecState)) {
 194        qemu_plugin_outs(bytes < 0 ?
 195                         "problem reading from socket" :
 196                         "read less than expected");
 197        qemu_plugin_uninstall(our_id, plugin_cleanup);
 198        return;
 199    }
 200
 201    /*
 202     * Compare and report if we have diverged.
 203     */
 204    if (us.pc != them.pc) {
 205        report_divergance(&us, &them);
 206    }
 207
 208    /*
 209     * Assume this block will execute fully and record it
 210     * in the execution log.
 211     */
 212    insn_count += bi->insns;
 213    bb_count++;
 214    exec = g_new0(ExecInfo, 1);
 215    exec->block = bi;
 216    exec->insn_count = insn_count;
 217    exec->block_count = bb_count;
 218    log = g_slist_prepend(log, exec);
 219}
 220
 221static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
 222{
 223    BlockInfo *bi = g_new0(BlockInfo, 1);
 224    bi->pc = qemu_plugin_tb_vaddr(tb);
 225    bi->insns = qemu_plugin_tb_n_insns(tb);
 226
 227    /* save a reference so we can free later */
 228    blocks = g_slist_prepend(blocks, bi);
 229    qemu_plugin_register_vcpu_tb_exec_cb(tb, vcpu_tb_exec,
 230                                         QEMU_PLUGIN_CB_NO_REGS, (void *)bi);
 231}
 232
 233
 234/*
 235 * Instead of encoding master/slave status into what is essentially
 236 * two peers we shall just take the simple approach of checking for
 237 * the existence of the pipe and assuming if it's not there we are the
 238 * first process.
 239 */
 240static bool setup_socket(const char *path)
 241{
 242    struct sockaddr_un sockaddr;
 243    int fd;
 244
 245    fd = socket(AF_UNIX, SOCK_STREAM, 0);
 246    if (fd < 0) {
 247        perror("create socket");
 248        return false;
 249    }
 250
 251    sockaddr.sun_family = AF_UNIX;
 252    g_strlcpy(sockaddr.sun_path, path, sizeof(sockaddr.sun_path) - 1);
 253    if (bind(fd, (struct sockaddr *)&sockaddr, sizeof(sockaddr)) < 0) {
 254        perror("bind socket");
 255        close(fd);
 256        return false;
 257    }
 258
 259    /* remember to clean-up */
 260    path_to_unlink = g_strdup(path);
 261
 262    if (listen(fd, 1) < 0) {
 263        perror("listen socket");
 264        close(fd);
 265        return false;
 266    }
 267
 268    socket_fd = accept(fd, NULL, NULL);
 269    if (socket_fd < 0 && errno != EINTR) {
 270        perror("accept socket");
 271        close(fd);
 272        return false;
 273    }
 274
 275    qemu_plugin_outs("setup_socket::ready\n");
 276
 277    close(fd);
 278    return true;
 279}
 280
 281static bool connect_socket(const char *path)
 282{
 283    int fd;
 284    struct sockaddr_un sockaddr;
 285
 286    fd = socket(AF_UNIX, SOCK_STREAM, 0);
 287    if (fd < 0) {
 288        perror("create socket");
 289        return false;
 290    }
 291
 292    sockaddr.sun_family = AF_UNIX;
 293    g_strlcpy(sockaddr.sun_path, path, sizeof(sockaddr.sun_path) - 1);
 294
 295    if (connect(fd, (struct sockaddr *)&sockaddr, sizeof(sockaddr)) < 0) {
 296        perror("failed to connect");
 297        close(fd);
 298        return false;
 299    }
 300
 301    qemu_plugin_outs("connect_socket::ready\n");
 302
 303    socket_fd = fd;
 304    return true;
 305}
 306
 307static bool setup_unix_socket(const char *path)
 308{
 309    if (g_file_test(path, G_FILE_TEST_EXISTS)) {
 310        return connect_socket(path);
 311    } else {
 312        return setup_socket(path);
 313    }
 314}
 315
 316
 317QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
 318                                           const qemu_info_t *info,
 319                                           int argc, char **argv)
 320{
 321    int i;
 322    g_autofree char *sock_path = NULL;
 323
 324    for (i = 0; i < argc; i++) {
 325        char *p = argv[i];
 326        g_autofree char **tokens = g_strsplit(p, "=", 2);
 327
 328        if (g_strcmp0(tokens[0], "verbose") == 0) {
 329            if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &verbose)) {
 330                fprintf(stderr, "boolean argument parsing failed: %s\n", p);
 331                return -1;
 332            }
 333        } else if (g_strcmp0(tokens[0], "sockpath") == 0) {
 334            sock_path = tokens[1];
 335        } else {
 336            fprintf(stderr, "option parsing failed: %s\n", p);
 337            return -1;
 338        }
 339    }
 340
 341    if (sock_path == NULL) {
 342        fprintf(stderr, "Need a socket path to talk to other instance.\n");
 343        return -1;
 344    }
 345
 346    if (!setup_unix_socket(sock_path)) {
 347        fprintf(stderr, "Failed to setup socket for communications.\n");
 348        return -1;
 349    }
 350
 351    our_id = id;
 352
 353    qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
 354    qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
 355    return 0;
 356}
 357