linux/tools/perf/builtin-kvm.c
<<
>>
Prefs
   1#include "builtin.h"
   2#include "perf.h"
   3
   4#include "util/evsel.h"
   5#include "util/evlist.h"
   6#include "util/util.h"
   7#include "util/cache.h"
   8#include "util/symbol.h"
   9#include "util/thread.h"
  10#include "util/header.h"
  11#include "util/session.h"
  12#include "util/intlist.h"
  13#include <subcmd/parse-options.h>
  14#include "util/trace-event.h"
  15#include "util/debug.h"
  16#include "util/tool.h"
  17#include "util/stat.h"
  18#include "util/top.h"
  19#include "util/data.h"
  20#include "util/ordered-events.h"
  21
  22#include <sys/prctl.h>
  23#ifdef HAVE_TIMERFD_SUPPORT
  24#include <sys/timerfd.h>
  25#endif
  26
  27#include <linux/time64.h>
  28#include <termios.h>
  29#include <semaphore.h>
  30#include <pthread.h>
  31#include <math.h>
  32
  33#ifdef HAVE_KVM_STAT_SUPPORT
  34#include "util/kvm-stat.h"
  35
  36void exit_event_get_key(struct perf_evsel *evsel,
  37                        struct perf_sample *sample,
  38                        struct event_key *key)
  39{
  40        key->info = 0;
  41        key->key = perf_evsel__intval(evsel, sample, kvm_exit_reason);
  42}
  43
  44bool kvm_exit_event(struct perf_evsel *evsel)
  45{
  46        return !strcmp(evsel->name, kvm_exit_trace);
  47}
  48
  49bool exit_event_begin(struct perf_evsel *evsel,
  50                      struct perf_sample *sample, struct event_key *key)
  51{
  52        if (kvm_exit_event(evsel)) {
  53                exit_event_get_key(evsel, sample, key);
  54                return true;
  55        }
  56
  57        return false;
  58}
  59
  60bool kvm_entry_event(struct perf_evsel *evsel)
  61{
  62        return !strcmp(evsel->name, kvm_entry_trace);
  63}
  64
  65bool exit_event_end(struct perf_evsel *evsel,
  66                    struct perf_sample *sample __maybe_unused,
  67                    struct event_key *key __maybe_unused)
  68{
  69        return kvm_entry_event(evsel);
  70}
  71
  72static const char *get_exit_reason(struct perf_kvm_stat *kvm,
  73                                   struct exit_reasons_table *tbl,
  74                                   u64 exit_code)
  75{
  76        while (tbl->reason != NULL) {
  77                if (tbl->exit_code == exit_code)
  78                        return tbl->reason;
  79                tbl++;
  80        }
  81
  82        pr_err("unknown kvm exit code:%lld on %s\n",
  83                (unsigned long long)exit_code, kvm->exit_reasons_isa);
  84        return "UNKNOWN";
  85}
  86
  87void exit_event_decode_key(struct perf_kvm_stat *kvm,
  88                           struct event_key *key,
  89                           char *decode)
  90{
  91        const char *exit_reason = get_exit_reason(kvm, key->exit_reasons,
  92                                                  key->key);
  93
  94        scnprintf(decode, decode_str_len, "%s", exit_reason);
  95}
  96
  97static bool register_kvm_events_ops(struct perf_kvm_stat *kvm)
  98{
  99        struct kvm_reg_events_ops *events_ops = kvm_reg_events_ops;
 100
 101        for (events_ops = kvm_reg_events_ops; events_ops->name; events_ops++) {
 102                if (!strcmp(events_ops->name, kvm->report_event)) {
 103                        kvm->events_ops = events_ops->ops;
 104                        return true;
 105                }
 106        }
 107
 108        return false;
 109}
 110
 111struct vcpu_event_record {
 112        int vcpu_id;
 113        u64 start_time;
 114        struct kvm_event *last_event;
 115};
 116
 117
 118static void init_kvm_event_record(struct perf_kvm_stat *kvm)
 119{
 120        unsigned int i;
 121
 122        for (i = 0; i < EVENTS_CACHE_SIZE; i++)
 123                INIT_LIST_HEAD(&kvm->kvm_events_cache[i]);
 124}
 125
 126#ifdef HAVE_TIMERFD_SUPPORT
 127static void clear_events_cache_stats(struct list_head *kvm_events_cache)
 128{
 129        struct list_head *head;
 130        struct kvm_event *event;
 131        unsigned int i;
 132        int j;
 133
 134        for (i = 0; i < EVENTS_CACHE_SIZE; i++) {
 135                head = &kvm_events_cache[i];
 136                list_for_each_entry(event, head, hash_entry) {
 137                        /* reset stats for event */
 138                        event->total.time = 0;
 139                        init_stats(&event->total.stats);
 140
 141                        for (j = 0; j < event->max_vcpu; ++j) {
 142                                event->vcpu[j].time = 0;
 143                                init_stats(&event->vcpu[j].stats);
 144                        }
 145                }
 146        }
 147}
 148#endif
 149
 150static int kvm_events_hash_fn(u64 key)
 151{
 152        return key & (EVENTS_CACHE_SIZE - 1);
 153}
 154
 155static bool kvm_event_expand(struct kvm_event *event, int vcpu_id)
 156{
 157        int old_max_vcpu = event->max_vcpu;
 158        void *prev;
 159
 160        if (vcpu_id < event->max_vcpu)
 161                return true;
 162
 163        while (event->max_vcpu <= vcpu_id)
 164                event->max_vcpu += DEFAULT_VCPU_NUM;
 165
 166        prev = event->vcpu;
 167        event->vcpu = realloc(event->vcpu,
 168                              event->max_vcpu * sizeof(*event->vcpu));
 169        if (!event->vcpu) {
 170                free(prev);
 171                pr_err("Not enough memory\n");
 172                return false;
 173        }
 174
 175        memset(event->vcpu + old_max_vcpu, 0,
 176               (event->max_vcpu - old_max_vcpu) * sizeof(*event->vcpu));
 177        return true;
 178}
 179
 180static struct kvm_event *kvm_alloc_init_event(struct event_key *key)
 181{
 182        struct kvm_event *event;
 183
 184        event = zalloc(sizeof(*event));
 185        if (!event) {
 186                pr_err("Not enough memory\n");
 187                return NULL;
 188        }
 189
 190        event->key = *key;
 191        init_stats(&event->total.stats);
 192        return event;
 193}
 194
 195static struct kvm_event *find_create_kvm_event(struct perf_kvm_stat *kvm,
 196                                               struct event_key *key)
 197{
 198        struct kvm_event *event;
 199        struct list_head *head;
 200
 201        BUG_ON(key->key == INVALID_KEY);
 202
 203        head = &kvm->kvm_events_cache[kvm_events_hash_fn(key->key)];
 204        list_for_each_entry(event, head, hash_entry) {
 205                if (event->key.key == key->key && event->key.info == key->info)
 206                        return event;
 207        }
 208
 209        event = kvm_alloc_init_event(key);
 210        if (!event)
 211                return NULL;
 212
 213        list_add(&event->hash_entry, head);
 214        return event;
 215}
 216
 217static bool handle_begin_event(struct perf_kvm_stat *kvm,
 218                               struct vcpu_event_record *vcpu_record,
 219                               struct event_key *key, u64 timestamp)
 220{
 221        struct kvm_event *event = NULL;
 222
 223        if (key->key != INVALID_KEY)
 224                event = find_create_kvm_event(kvm, key);
 225
 226        vcpu_record->last_event = event;
 227        vcpu_record->start_time = timestamp;
 228        return true;
 229}
 230
 231static void
 232kvm_update_event_stats(struct kvm_event_stats *kvm_stats, u64 time_diff)
 233{
 234        kvm_stats->time += time_diff;
 235        update_stats(&kvm_stats->stats, time_diff);
 236}
 237
 238static double kvm_event_rel_stddev(int vcpu_id, struct kvm_event *event)
 239{
 240        struct kvm_event_stats *kvm_stats = &event->total;
 241
 242        if (vcpu_id != -1)
 243                kvm_stats = &event->vcpu[vcpu_id];
 244
 245        return rel_stddev_stats(stddev_stats(&kvm_stats->stats),
 246                                avg_stats(&kvm_stats->stats));
 247}
 248
 249static bool update_kvm_event(struct kvm_event *event, int vcpu_id,
 250                             u64 time_diff)
 251{
 252        if (vcpu_id == -1) {
 253                kvm_update_event_stats(&event->total, time_diff);
 254                return true;
 255        }
 256
 257        if (!kvm_event_expand(event, vcpu_id))
 258                return false;
 259
 260        kvm_update_event_stats(&event->vcpu[vcpu_id], time_diff);
 261        return true;
 262}
 263
 264static bool is_child_event(struct perf_kvm_stat *kvm,
 265                           struct perf_evsel *evsel,
 266                           struct perf_sample *sample,
 267                           struct event_key *key)
 268{
 269        struct child_event_ops *child_ops;
 270
 271        child_ops = kvm->events_ops->child_ops;
 272
 273        if (!child_ops)
 274                return false;
 275
 276        for (; child_ops->name; child_ops++) {
 277                if (!strcmp(evsel->name, child_ops->name)) {
 278                        child_ops->get_key(evsel, sample, key);
 279                        return true;
 280                }
 281        }
 282
 283        return false;
 284}
 285
 286static bool handle_child_event(struct perf_kvm_stat *kvm,
 287                               struct vcpu_event_record *vcpu_record,
 288                               struct event_key *key,
 289                               struct perf_sample *sample __maybe_unused)
 290{
 291        struct kvm_event *event = NULL;
 292
 293        if (key->key != INVALID_KEY)
 294                event = find_create_kvm_event(kvm, key);
 295
 296        vcpu_record->last_event = event;
 297
 298        return true;
 299}
 300
 301static bool skip_event(const char *event)
 302{
 303        const char * const *skip_events;
 304
 305        for (skip_events = kvm_skip_events; *skip_events; skip_events++)
 306                if (!strcmp(event, *skip_events))
 307                        return true;
 308
 309        return false;
 310}
 311
 312static bool handle_end_event(struct perf_kvm_stat *kvm,
 313                             struct vcpu_event_record *vcpu_record,
 314                             struct event_key *key,
 315                             struct perf_sample *sample)
 316{
 317        struct kvm_event *event;
 318        u64 time_begin, time_diff;
 319        int vcpu;
 320
 321        if (kvm->trace_vcpu == -1)
 322                vcpu = -1;
 323        else
 324                vcpu = vcpu_record->vcpu_id;
 325
 326        event = vcpu_record->last_event;
 327        time_begin = vcpu_record->start_time;
 328
 329        /* The begin event is not caught. */
 330        if (!time_begin)
 331                return true;
 332
 333        /*
 334         * In some case, the 'begin event' only records the start timestamp,
 335         * the actual event is recognized in the 'end event' (e.g. mmio-event).
 336         */
 337
 338        /* Both begin and end events did not get the key. */
 339        if (!event && key->key == INVALID_KEY)
 340                return true;
 341
 342        if (!event)
 343                event = find_create_kvm_event(kvm, key);
 344
 345        if (!event)
 346                return false;
 347
 348        vcpu_record->last_event = NULL;
 349        vcpu_record->start_time = 0;
 350
 351        /* seems to happen once in a while during live mode */
 352        if (sample->time < time_begin) {
 353                pr_debug("End time before begin time; skipping event.\n");
 354                return true;
 355        }
 356
 357        time_diff = sample->time - time_begin;
 358
 359        if (kvm->duration && time_diff > kvm->duration) {
 360                char decode[decode_str_len];
 361
 362                kvm->events_ops->decode_key(kvm, &event->key, decode);
 363                if (!skip_event(decode)) {
 364                        pr_info("%" PRIu64 " VM %d, vcpu %d: %s event took %" PRIu64 "usec\n",
 365                                 sample->time, sample->pid, vcpu_record->vcpu_id,
 366                                 decode, time_diff / NSEC_PER_USEC);
 367                }
 368        }
 369
 370        return update_kvm_event(event, vcpu, time_diff);
 371}
 372
 373static
 374struct vcpu_event_record *per_vcpu_record(struct thread *thread,
 375                                          struct perf_evsel *evsel,
 376                                          struct perf_sample *sample)
 377{
 378        /* Only kvm_entry records vcpu id. */
 379        if (!thread__priv(thread) && kvm_entry_event(evsel)) {
 380                struct vcpu_event_record *vcpu_record;
 381
 382                vcpu_record = zalloc(sizeof(*vcpu_record));
 383                if (!vcpu_record) {
 384                        pr_err("%s: Not enough memory\n", __func__);
 385                        return NULL;
 386                }
 387
 388                vcpu_record->vcpu_id = perf_evsel__intval(evsel, sample,
 389                                                          vcpu_id_str);
 390                thread__set_priv(thread, vcpu_record);
 391        }
 392
 393        return thread__priv(thread);
 394}
 395
 396static bool handle_kvm_event(struct perf_kvm_stat *kvm,
 397                             struct thread *thread,
 398                             struct perf_evsel *evsel,
 399                             struct perf_sample *sample)
 400{
 401        struct vcpu_event_record *vcpu_record;
 402        struct event_key key = { .key = INVALID_KEY,
 403                                 .exit_reasons = kvm->exit_reasons };
 404
 405        vcpu_record = per_vcpu_record(thread, evsel, sample);
 406        if (!vcpu_record)
 407                return true;
 408
 409        /* only process events for vcpus user cares about */
 410        if ((kvm->trace_vcpu != -1) &&
 411            (kvm->trace_vcpu != vcpu_record->vcpu_id))
 412                return true;
 413
 414        if (kvm->events_ops->is_begin_event(evsel, sample, &key))
 415                return handle_begin_event(kvm, vcpu_record, &key, sample->time);
 416
 417        if (is_child_event(kvm, evsel, sample, &key))
 418                return handle_child_event(kvm, vcpu_record, &key, sample);
 419
 420        if (kvm->events_ops->is_end_event(evsel, sample, &key))
 421                return handle_end_event(kvm, vcpu_record, &key, sample);
 422
 423        return true;
 424}
 425
 426#define GET_EVENT_KEY(func, field)                                      \
 427static u64 get_event_ ##func(struct kvm_event *event, int vcpu)         \
 428{                                                                       \
 429        if (vcpu == -1)                                                 \
 430                return event->total.field;                              \
 431                                                                        \
 432        if (vcpu >= event->max_vcpu)                                    \
 433                return 0;                                               \
 434                                                                        \
 435        return event->vcpu[vcpu].field;                                 \
 436}
 437
 438#define COMPARE_EVENT_KEY(func, field)                                  \
 439GET_EVENT_KEY(func, field)                                              \
 440static int compare_kvm_event_ ## func(struct kvm_event *one,            \
 441                                        struct kvm_event *two, int vcpu)\
 442{                                                                       \
 443        return get_event_ ##func(one, vcpu) >                           \
 444                                get_event_ ##func(two, vcpu);           \
 445}
 446
 447GET_EVENT_KEY(time, time);
 448COMPARE_EVENT_KEY(count, stats.n);
 449COMPARE_EVENT_KEY(mean, stats.mean);
 450GET_EVENT_KEY(max, stats.max);
 451GET_EVENT_KEY(min, stats.min);
 452
 453#define DEF_SORT_NAME_KEY(name, compare_key)                            \
 454        { #name, compare_kvm_event_ ## compare_key }
 455
 456static struct kvm_event_key keys[] = {
 457        DEF_SORT_NAME_KEY(sample, count),
 458        DEF_SORT_NAME_KEY(time, mean),
 459        { NULL, NULL }
 460};
 461
 462static bool select_key(struct perf_kvm_stat *kvm)
 463{
 464        int i;
 465
 466        for (i = 0; keys[i].name; i++) {
 467                if (!strcmp(keys[i].name, kvm->sort_key)) {
 468                        kvm->compare = keys[i].key;
 469                        return true;
 470                }
 471        }
 472
 473        pr_err("Unknown compare key:%s\n", kvm->sort_key);
 474        return false;
 475}
 476
 477static void insert_to_result(struct rb_root *result, struct kvm_event *event,
 478                             key_cmp_fun bigger, int vcpu)
 479{
 480        struct rb_node **rb = &result->rb_node;
 481        struct rb_node *parent = NULL;
 482        struct kvm_event *p;
 483
 484        while (*rb) {
 485                p = container_of(*rb, struct kvm_event, rb);
 486                parent = *rb;
 487
 488                if (bigger(event, p, vcpu))
 489                        rb = &(*rb)->rb_left;
 490                else
 491                        rb = &(*rb)->rb_right;
 492        }
 493
 494        rb_link_node(&event->rb, parent, rb);
 495        rb_insert_color(&event->rb, result);
 496}
 497
 498static void
 499update_total_count(struct perf_kvm_stat *kvm, struct kvm_event *event)
 500{
 501        int vcpu = kvm->trace_vcpu;
 502
 503        kvm->total_count += get_event_count(event, vcpu);
 504        kvm->total_time += get_event_time(event, vcpu);
 505}
 506
 507static bool event_is_valid(struct kvm_event *event, int vcpu)
 508{
 509        return !!get_event_count(event, vcpu);
 510}
 511
 512static void sort_result(struct perf_kvm_stat *kvm)
 513{
 514        unsigned int i;
 515        int vcpu = kvm->trace_vcpu;
 516        struct kvm_event *event;
 517
 518        for (i = 0; i < EVENTS_CACHE_SIZE; i++) {
 519                list_for_each_entry(event, &kvm->kvm_events_cache[i], hash_entry) {
 520                        if (event_is_valid(event, vcpu)) {
 521                                update_total_count(kvm, event);
 522                                insert_to_result(&kvm->result, event,
 523                                                 kvm->compare, vcpu);
 524                        }
 525                }
 526        }
 527}
 528
 529/* returns left most element of result, and erase it */
 530static struct kvm_event *pop_from_result(struct rb_root *result)
 531{
 532        struct rb_node *node = rb_first(result);
 533
 534        if (!node)
 535                return NULL;
 536
 537        rb_erase(node, result);
 538        return container_of(node, struct kvm_event, rb);
 539}
 540
 541static void print_vcpu_info(struct perf_kvm_stat *kvm)
 542{
 543        int vcpu = kvm->trace_vcpu;
 544
 545        pr_info("Analyze events for ");
 546
 547        if (kvm->opts.target.system_wide)
 548                pr_info("all VMs, ");
 549        else if (kvm->opts.target.pid)
 550                pr_info("pid(s) %s, ", kvm->opts.target.pid);
 551        else
 552                pr_info("dazed and confused on what is monitored, ");
 553
 554        if (vcpu == -1)
 555                pr_info("all VCPUs:\n\n");
 556        else
 557                pr_info("VCPU %d:\n\n", vcpu);
 558}
 559
 560static void show_timeofday(void)
 561{
 562        char date[64];
 563        struct timeval tv;
 564        struct tm ltime;
 565
 566        gettimeofday(&tv, NULL);
 567        if (localtime_r(&tv.tv_sec, &ltime)) {
 568                strftime(date, sizeof(date), "%H:%M:%S", &ltime);
 569                pr_info("%s.%06ld", date, tv.tv_usec);
 570        } else
 571                pr_info("00:00:00.000000");
 572
 573        return;
 574}
 575
 576static void print_result(struct perf_kvm_stat *kvm)
 577{
 578        char decode[decode_str_len];
 579        struct kvm_event *event;
 580        int vcpu = kvm->trace_vcpu;
 581
 582        if (kvm->live) {
 583                puts(CONSOLE_CLEAR);
 584                show_timeofday();
 585        }
 586
 587        pr_info("\n\n");
 588        print_vcpu_info(kvm);
 589        pr_info("%*s ", decode_str_len, kvm->events_ops->name);
 590        pr_info("%10s ", "Samples");
 591        pr_info("%9s ", "Samples%");
 592
 593        pr_info("%9s ", "Time%");
 594        pr_info("%11s ", "Min Time");
 595        pr_info("%11s ", "Max Time");
 596        pr_info("%16s ", "Avg time");
 597        pr_info("\n\n");
 598
 599        while ((event = pop_from_result(&kvm->result))) {
 600                u64 ecount, etime, max, min;
 601
 602                ecount = get_event_count(event, vcpu);
 603                etime = get_event_time(event, vcpu);
 604                max = get_event_max(event, vcpu);
 605                min = get_event_min(event, vcpu);
 606
 607                kvm->events_ops->decode_key(kvm, &event->key, decode);
 608                pr_info("%*s ", decode_str_len, decode);
 609                pr_info("%10llu ", (unsigned long long)ecount);
 610                pr_info("%8.2f%% ", (double)ecount / kvm->total_count * 100);
 611                pr_info("%8.2f%% ", (double)etime / kvm->total_time * 100);
 612                pr_info("%9.2fus ", (double)min / NSEC_PER_USEC);
 613                pr_info("%9.2fus ", (double)max / NSEC_PER_USEC);
 614                pr_info("%9.2fus ( +-%7.2f%% )", (double)etime / ecount / NSEC_PER_USEC,
 615                        kvm_event_rel_stddev(vcpu, event));
 616                pr_info("\n");
 617        }
 618
 619        pr_info("\nTotal Samples:%" PRIu64 ", Total events handled time:%.2fus.\n\n",
 620                kvm->total_count, kvm->total_time / (double)NSEC_PER_USEC);
 621
 622        if (kvm->lost_events)
 623                pr_info("\nLost events: %" PRIu64 "\n\n", kvm->lost_events);
 624}
 625
 626#ifdef HAVE_TIMERFD_SUPPORT
 627static int process_lost_event(struct perf_tool *tool,
 628                              union perf_event *event __maybe_unused,
 629                              struct perf_sample *sample __maybe_unused,
 630                              struct machine *machine __maybe_unused)
 631{
 632        struct perf_kvm_stat *kvm = container_of(tool, struct perf_kvm_stat, tool);
 633
 634        kvm->lost_events++;
 635        return 0;
 636}
 637#endif
 638
 639static bool skip_sample(struct perf_kvm_stat *kvm,
 640                        struct perf_sample *sample)
 641{
 642        if (kvm->pid_list && intlist__find(kvm->pid_list, sample->pid) == NULL)
 643                return true;
 644
 645        return false;
 646}
 647
 648static int process_sample_event(struct perf_tool *tool,
 649                                union perf_event *event,
 650                                struct perf_sample *sample,
 651                                struct perf_evsel *evsel,
 652                                struct machine *machine)
 653{
 654        int err = 0;
 655        struct thread *thread;
 656        struct perf_kvm_stat *kvm = container_of(tool, struct perf_kvm_stat,
 657                                                 tool);
 658
 659        if (skip_sample(kvm, sample))
 660                return 0;
 661
 662        thread = machine__findnew_thread(machine, sample->pid, sample->tid);
 663        if (thread == NULL) {
 664                pr_debug("problem processing %d event, skipping it.\n",
 665                        event->header.type);
 666                return -1;
 667        }
 668
 669        if (!handle_kvm_event(kvm, thread, evsel, sample))
 670                err = -1;
 671
 672        thread__put(thread);
 673        return err;
 674}
 675
 676static int cpu_isa_config(struct perf_kvm_stat *kvm)
 677{
 678        char buf[64], *cpuid;
 679        int err;
 680
 681        if (kvm->live) {
 682                err = get_cpuid(buf, sizeof(buf));
 683                if (err != 0) {
 684                        pr_err("Failed to look up CPU type\n");
 685                        return err;
 686                }
 687                cpuid = buf;
 688        } else
 689                cpuid = kvm->session->header.env.cpuid;
 690
 691        if (!cpuid) {
 692                pr_err("Failed to look up CPU type\n");
 693                return -EINVAL;
 694        }
 695
 696        err = cpu_isa_init(kvm, cpuid);
 697        if (err == -ENOTSUP)
 698                pr_err("CPU %s is not supported.\n", cpuid);
 699
 700        return err;
 701}
 702
 703static bool verify_vcpu(int vcpu)
 704{
 705        if (vcpu != -1 && vcpu < 0) {
 706                pr_err("Invalid vcpu:%d.\n", vcpu);
 707                return false;
 708        }
 709
 710        return true;
 711}
 712
 713#ifdef HAVE_TIMERFD_SUPPORT
 714/* keeping the max events to a modest level to keep
 715 * the processing of samples per mmap smooth.
 716 */
 717#define PERF_KVM__MAX_EVENTS_PER_MMAP  25
 718
 719static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
 720                                   u64 *mmap_time)
 721{
 722        union perf_event *event;
 723        struct perf_sample sample;
 724        s64 n = 0;
 725        int err;
 726
 727        *mmap_time = ULLONG_MAX;
 728        while ((event = perf_evlist__mmap_read(kvm->evlist, idx)) != NULL) {
 729                err = perf_evlist__parse_sample(kvm->evlist, event, &sample);
 730                if (err) {
 731                        perf_evlist__mmap_consume(kvm->evlist, idx);
 732                        pr_err("Failed to parse sample\n");
 733                        return -1;
 734                }
 735
 736                err = perf_session__queue_event(kvm->session, event, &sample, 0);
 737                /*
 738                 * FIXME: Here we can't consume the event, as perf_session__queue_event will
 739                 *        point to it, and it'll get possibly overwritten by the kernel.
 740                 */
 741                perf_evlist__mmap_consume(kvm->evlist, idx);
 742
 743                if (err) {
 744                        pr_err("Failed to enqueue sample: %d\n", err);
 745                        return -1;
 746                }
 747
 748                /* save time stamp of our first sample for this mmap */
 749                if (n == 0)
 750                        *mmap_time = sample.time;
 751
 752                /* limit events per mmap handled all at once */
 753                n++;
 754                if (n == PERF_KVM__MAX_EVENTS_PER_MMAP)
 755                        break;
 756        }
 757
 758        return n;
 759}
 760
 761static int perf_kvm__mmap_read(struct perf_kvm_stat *kvm)
 762{
 763        int i, err, throttled = 0;
 764        s64 n, ntotal = 0;
 765        u64 flush_time = ULLONG_MAX, mmap_time;
 766
 767        for (i = 0; i < kvm->evlist->nr_mmaps; i++) {
 768                n = perf_kvm__mmap_read_idx(kvm, i, &mmap_time);
 769                if (n < 0)
 770                        return -1;
 771
 772                /* flush time is going to be the minimum of all the individual
 773                 * mmap times. Essentially, we flush all the samples queued up
 774                 * from the last pass under our minimal start time -- that leaves
 775                 * a very small race for samples to come in with a lower timestamp.
 776                 * The ioctl to return the perf_clock timestamp should close the
 777                 * race entirely.
 778                 */
 779                if (mmap_time < flush_time)
 780                        flush_time = mmap_time;
 781
 782                ntotal += n;
 783                if (n == PERF_KVM__MAX_EVENTS_PER_MMAP)
 784                        throttled = 1;
 785        }
 786
 787        /* flush queue after each round in which we processed events */
 788        if (ntotal) {
 789                struct ordered_events *oe = &kvm->session->ordered_events;
 790
 791                oe->next_flush = flush_time;
 792                err = ordered_events__flush(oe, OE_FLUSH__ROUND);
 793                if (err) {
 794                        if (kvm->lost_events)
 795                                pr_info("\nLost events: %" PRIu64 "\n\n",
 796                                        kvm->lost_events);
 797                        return err;
 798                }
 799        }
 800
 801        return throttled;
 802}
 803
 804static volatile int done;
 805
 806static void sig_handler(int sig __maybe_unused)
 807{
 808        done = 1;
 809}
 810
 811static int perf_kvm__timerfd_create(struct perf_kvm_stat *kvm)
 812{
 813        struct itimerspec new_value;
 814        int rc = -1;
 815
 816        kvm->timerfd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK);
 817        if (kvm->timerfd < 0) {
 818                pr_err("timerfd_create failed\n");
 819                goto out;
 820        }
 821
 822        new_value.it_value.tv_sec = kvm->display_time;
 823        new_value.it_value.tv_nsec = 0;
 824        new_value.it_interval.tv_sec = kvm->display_time;
 825        new_value.it_interval.tv_nsec = 0;
 826
 827        if (timerfd_settime(kvm->timerfd, 0, &new_value, NULL) != 0) {
 828                pr_err("timerfd_settime failed: %d\n", errno);
 829                close(kvm->timerfd);
 830                goto out;
 831        }
 832
 833        rc = 0;
 834out:
 835        return rc;
 836}
 837
 838static int perf_kvm__handle_timerfd(struct perf_kvm_stat *kvm)
 839{
 840        uint64_t c;
 841        int rc;
 842
 843        rc = read(kvm->timerfd, &c, sizeof(uint64_t));
 844        if (rc < 0) {
 845                if (errno == EAGAIN)
 846                        return 0;
 847
 848                pr_err("Failed to read timer fd: %d\n", errno);
 849                return -1;
 850        }
 851
 852        if (rc != sizeof(uint64_t)) {
 853                pr_err("Error reading timer fd - invalid size returned\n");
 854                return -1;
 855        }
 856
 857        if (c != 1)
 858                pr_debug("Missed timer beats: %" PRIu64 "\n", c-1);
 859
 860        /* update display */
 861        sort_result(kvm);
 862        print_result(kvm);
 863
 864        /* reset counts */
 865        clear_events_cache_stats(kvm->kvm_events_cache);
 866        kvm->total_count = 0;
 867        kvm->total_time = 0;
 868        kvm->lost_events = 0;
 869
 870        return 0;
 871}
 872
 873static int fd_set_nonblock(int fd)
 874{
 875        long arg = 0;
 876
 877        arg = fcntl(fd, F_GETFL);
 878        if (arg < 0) {
 879                pr_err("Failed to get current flags for fd %d\n", fd);
 880                return -1;
 881        }
 882
 883        if (fcntl(fd, F_SETFL, arg | O_NONBLOCK) < 0) {
 884                pr_err("Failed to set non-block option on fd %d\n", fd);
 885                return -1;
 886        }
 887
 888        return 0;
 889}
 890
 891static int perf_kvm__handle_stdin(void)
 892{
 893        int c;
 894
 895        c = getc(stdin);
 896        if (c == 'q')
 897                return 1;
 898
 899        return 0;
 900}
 901
 902static int kvm_events_live_report(struct perf_kvm_stat *kvm)
 903{
 904        int nr_stdin, ret, err = -EINVAL;
 905        struct termios save;
 906
 907        /* live flag must be set first */
 908        kvm->live = true;
 909
 910        ret = cpu_isa_config(kvm);
 911        if (ret < 0)
 912                return ret;
 913
 914        if (!verify_vcpu(kvm->trace_vcpu) ||
 915            !select_key(kvm) ||
 916            !register_kvm_events_ops(kvm)) {
 917                goto out;
 918        }
 919
 920        set_term_quiet_input(&save);
 921        init_kvm_event_record(kvm);
 922
 923        signal(SIGINT, sig_handler);
 924        signal(SIGTERM, sig_handler);
 925
 926        /* add timer fd */
 927        if (perf_kvm__timerfd_create(kvm) < 0) {
 928                err = -1;
 929                goto out;
 930        }
 931
 932        if (perf_evlist__add_pollfd(kvm->evlist, kvm->timerfd) < 0)
 933                goto out;
 934
 935        nr_stdin = perf_evlist__add_pollfd(kvm->evlist, fileno(stdin));
 936        if (nr_stdin < 0)
 937                goto out;
 938
 939        if (fd_set_nonblock(fileno(stdin)) != 0)
 940                goto out;
 941
 942        /* everything is good - enable the events and process */
 943        perf_evlist__enable(kvm->evlist);
 944
 945        while (!done) {
 946                struct fdarray *fda = &kvm->evlist->pollfd;
 947                int rc;
 948
 949                rc = perf_kvm__mmap_read(kvm);
 950                if (rc < 0)
 951                        break;
 952
 953                err = perf_kvm__handle_timerfd(kvm);
 954                if (err)
 955                        goto out;
 956
 957                if (fda->entries[nr_stdin].revents & POLLIN)
 958                        done = perf_kvm__handle_stdin();
 959
 960                if (!rc && !done)
 961                        err = fdarray__poll(fda, 100);
 962        }
 963
 964        perf_evlist__disable(kvm->evlist);
 965
 966        if (err == 0) {
 967                sort_result(kvm);
 968                print_result(kvm);
 969        }
 970
 971out:
 972        if (kvm->timerfd >= 0)
 973                close(kvm->timerfd);
 974
 975        tcsetattr(0, TCSAFLUSH, &save);
 976        return err;
 977}
 978
 979static int kvm_live_open_events(struct perf_kvm_stat *kvm)
 980{
 981        int err, rc = -1;
 982        struct perf_evsel *pos;
 983        struct perf_evlist *evlist = kvm->evlist;
 984        char sbuf[STRERR_BUFSIZE];
 985
 986        perf_evlist__config(evlist, &kvm->opts, NULL);
 987
 988        /*
 989         * Note: exclude_{guest,host} do not apply here.
 990         *       This command processes KVM tracepoints from host only
 991         */
 992        evlist__for_each_entry(evlist, pos) {
 993                struct perf_event_attr *attr = &pos->attr;
 994
 995                /* make sure these *are* set */
 996                perf_evsel__set_sample_bit(pos, TID);
 997                perf_evsel__set_sample_bit(pos, TIME);
 998                perf_evsel__set_sample_bit(pos, CPU);
 999                perf_evsel__set_sample_bit(pos, RAW);
1000                /* make sure these are *not*; want as small a sample as possible */
1001                perf_evsel__reset_sample_bit(pos, PERIOD);
1002                perf_evsel__reset_sample_bit(pos, IP);
1003                perf_evsel__reset_sample_bit(pos, CALLCHAIN);
1004                perf_evsel__reset_sample_bit(pos, ADDR);
1005                perf_evsel__reset_sample_bit(pos, READ);
1006                attr->mmap = 0;
1007                attr->comm = 0;
1008                attr->task = 0;
1009
1010                attr->sample_period = 1;
1011
1012                attr->watermark = 0;
1013                attr->wakeup_events = 1000;
1014
1015                /* will enable all once we are ready */
1016                attr->disabled = 1;
1017        }
1018
1019        err = perf_evlist__open(evlist);
1020        if (err < 0) {
1021                printf("Couldn't create the events: %s\n",
1022                       str_error_r(errno, sbuf, sizeof(sbuf)));
1023                goto out;
1024        }
1025
1026        if (perf_evlist__mmap(evlist, kvm->opts.mmap_pages, false) < 0) {
1027                ui__error("Failed to mmap the events: %s\n",
1028                          str_error_r(errno, sbuf, sizeof(sbuf)));
1029                perf_evlist__close(evlist);
1030                goto out;
1031        }
1032
1033        rc = 0;
1034
1035out:
1036        return rc;
1037}
1038#endif
1039
1040static int read_events(struct perf_kvm_stat *kvm)
1041{
1042        int ret;
1043
1044        struct perf_tool eops = {
1045                .sample                 = process_sample_event,
1046                .comm                   = perf_event__process_comm,
1047                .ordered_events         = true,
1048        };
1049        struct perf_data_file file = {
1050                .path = kvm->file_name,
1051                .mode = PERF_DATA_MODE_READ,
1052                .force = kvm->force,
1053        };
1054
1055        kvm->tool = eops;
1056        kvm->session = perf_session__new(&file, false, &kvm->tool);
1057        if (!kvm->session) {
1058                pr_err("Initializing perf session failed\n");
1059                return -1;
1060        }
1061
1062        symbol__init(&kvm->session->header.env);
1063
1064        if (!perf_session__has_traces(kvm->session, "kvm record")) {
1065                ret = -EINVAL;
1066                goto out_delete;
1067        }
1068
1069        /*
1070         * Do not use 'isa' recorded in kvm_exit tracepoint since it is not
1071         * traced in the old kernel.
1072         */
1073        ret = cpu_isa_config(kvm);
1074        if (ret < 0)
1075                goto out_delete;
1076
1077        ret = perf_session__process_events(kvm->session);
1078
1079out_delete:
1080        perf_session__delete(kvm->session);
1081        return ret;
1082}
1083
1084static int parse_target_str(struct perf_kvm_stat *kvm)
1085{
1086        if (kvm->opts.target.pid) {
1087                kvm->pid_list = intlist__new(kvm->opts.target.pid);
1088                if (kvm->pid_list == NULL) {
1089                        pr_err("Error parsing process id string\n");
1090                        return -EINVAL;
1091                }
1092        }
1093
1094        return 0;
1095}
1096
1097static int kvm_events_report_vcpu(struct perf_kvm_stat *kvm)
1098{
1099        int ret = -EINVAL;
1100        int vcpu = kvm->trace_vcpu;
1101
1102        if (parse_target_str(kvm) != 0)
1103                goto exit;
1104
1105        if (!verify_vcpu(vcpu))
1106                goto exit;
1107
1108        if (!select_key(kvm))
1109                goto exit;
1110
1111        if (!register_kvm_events_ops(kvm))
1112                goto exit;
1113
1114        init_kvm_event_record(kvm);
1115        setup_pager();
1116
1117        ret = read_events(kvm);
1118        if (ret)
1119                goto exit;
1120
1121        sort_result(kvm);
1122        print_result(kvm);
1123
1124exit:
1125        return ret;
1126}
1127
1128#define STRDUP_FAIL_EXIT(s)             \
1129        ({      char *_p;               \
1130        _p = strdup(s);         \
1131                if (!_p)                \
1132                        return -ENOMEM; \
1133                _p;                     \
1134        })
1135
1136int __weak setup_kvm_events_tp(struct perf_kvm_stat *kvm __maybe_unused)
1137{
1138        return 0;
1139}
1140
1141static int
1142kvm_events_record(struct perf_kvm_stat *kvm, int argc, const char **argv)
1143{
1144        unsigned int rec_argc, i, j, events_tp_size;
1145        const char **rec_argv;
1146        const char * const record_args[] = {
1147                "record",
1148                "-R",
1149                "-m", "1024",
1150                "-c", "1",
1151        };
1152        const char * const kvm_stat_record_usage[] = {
1153                "perf kvm stat record [<options>]",
1154                NULL
1155        };
1156        const char * const *events_tp;
1157        int ret;
1158
1159        events_tp_size = 0;
1160        ret = setup_kvm_events_tp(kvm);
1161        if (ret < 0) {
1162                pr_err("Unable to setup the kvm tracepoints\n");
1163                return ret;
1164        }
1165
1166        for (events_tp = kvm_events_tp; *events_tp; events_tp++)
1167                events_tp_size++;
1168
1169        rec_argc = ARRAY_SIZE(record_args) + argc + 2 +
1170                   2 * events_tp_size;
1171        rec_argv = calloc(rec_argc + 1, sizeof(char *));
1172
1173        if (rec_argv == NULL)
1174                return -ENOMEM;
1175
1176        for (i = 0; i < ARRAY_SIZE(record_args); i++)
1177                rec_argv[i] = STRDUP_FAIL_EXIT(record_args[i]);
1178
1179        for (j = 0; j < events_tp_size; j++) {
1180                rec_argv[i++] = "-e";
1181                rec_argv[i++] = STRDUP_FAIL_EXIT(kvm_events_tp[j]);
1182        }
1183
1184        rec_argv[i++] = STRDUP_FAIL_EXIT("-o");
1185        rec_argv[i++] = STRDUP_FAIL_EXIT(kvm->file_name);
1186
1187        for (j = 1; j < (unsigned int)argc; j++, i++)
1188                rec_argv[i] = argv[j];
1189
1190        set_option_flag(record_options, 'e', "event", PARSE_OPT_HIDDEN);
1191        set_option_flag(record_options, 0, "filter", PARSE_OPT_HIDDEN);
1192        set_option_flag(record_options, 'R', "raw-samples", PARSE_OPT_HIDDEN);
1193
1194        set_option_flag(record_options, 'F', "freq", PARSE_OPT_DISABLED);
1195        set_option_flag(record_options, 0, "group", PARSE_OPT_DISABLED);
1196        set_option_flag(record_options, 'g', NULL, PARSE_OPT_DISABLED);
1197        set_option_flag(record_options, 0, "call-graph", PARSE_OPT_DISABLED);
1198        set_option_flag(record_options, 'd', "data", PARSE_OPT_DISABLED);
1199        set_option_flag(record_options, 'T', "timestamp", PARSE_OPT_DISABLED);
1200        set_option_flag(record_options, 'P', "period", PARSE_OPT_DISABLED);
1201        set_option_flag(record_options, 'n', "no-samples", PARSE_OPT_DISABLED);
1202        set_option_flag(record_options, 'N', "no-buildid-cache", PARSE_OPT_DISABLED);
1203        set_option_flag(record_options, 'B', "no-buildid", PARSE_OPT_DISABLED);
1204        set_option_flag(record_options, 'G', "cgroup", PARSE_OPT_DISABLED);
1205        set_option_flag(record_options, 'b', "branch-any", PARSE_OPT_DISABLED);
1206        set_option_flag(record_options, 'j', "branch-filter", PARSE_OPT_DISABLED);
1207        set_option_flag(record_options, 'W', "weight", PARSE_OPT_DISABLED);
1208        set_option_flag(record_options, 0, "transaction", PARSE_OPT_DISABLED);
1209
1210        record_usage = kvm_stat_record_usage;
1211        return cmd_record(i, rec_argv, NULL);
1212}
1213
1214static int
1215kvm_events_report(struct perf_kvm_stat *kvm, int argc, const char **argv)
1216{
1217        const struct option kvm_events_report_options[] = {
1218                OPT_STRING(0, "event", &kvm->report_event, "report event",
1219                           "event for reporting: vmexit, "
1220                           "mmio (x86 only), ioport (x86 only)"),
1221                OPT_INTEGER(0, "vcpu", &kvm->trace_vcpu,
1222                            "vcpu id to report"),
1223                OPT_STRING('k', "key", &kvm->sort_key, "sort-key",
1224                            "key for sorting: sample(sort by samples number)"
1225                            " time (sort by avg time)"),
1226                OPT_STRING('p', "pid", &kvm->opts.target.pid, "pid",
1227                           "analyze events only for given process id(s)"),
1228                OPT_BOOLEAN('f', "force", &kvm->force, "don't complain, do it"),
1229                OPT_END()
1230        };
1231
1232        const char * const kvm_events_report_usage[] = {
1233                "perf kvm stat report [<options>]",
1234                NULL
1235        };
1236
1237        if (argc) {
1238                argc = parse_options(argc, argv,
1239                                     kvm_events_report_options,
1240                                     kvm_events_report_usage, 0);
1241                if (argc)
1242                        usage_with_options(kvm_events_report_usage,
1243                                           kvm_events_report_options);
1244        }
1245
1246        if (!kvm->opts.target.pid)
1247                kvm->opts.target.system_wide = true;
1248
1249        return kvm_events_report_vcpu(kvm);
1250}
1251
1252#ifdef HAVE_TIMERFD_SUPPORT
1253static struct perf_evlist *kvm_live_event_list(void)
1254{
1255        struct perf_evlist *evlist;
1256        char *tp, *name, *sys;
1257        int err = -1;
1258        const char * const *events_tp;
1259
1260        evlist = perf_evlist__new();
1261        if (evlist == NULL)
1262                return NULL;
1263
1264        for (events_tp = kvm_events_tp; *events_tp; events_tp++) {
1265
1266                tp = strdup(*events_tp);
1267                if (tp == NULL)
1268                        goto out;
1269
1270                /* split tracepoint into subsystem and name */
1271                sys = tp;
1272                name = strchr(tp, ':');
1273                if (name == NULL) {
1274                        pr_err("Error parsing %s tracepoint: subsystem delimiter not found\n",
1275                               *events_tp);
1276                        free(tp);
1277                        goto out;
1278                }
1279                *name = '\0';
1280                name++;
1281
1282                if (perf_evlist__add_newtp(evlist, sys, name, NULL)) {
1283                        pr_err("Failed to add %s tracepoint to the list\n", *events_tp);
1284                        free(tp);
1285                        goto out;
1286                }
1287
1288                free(tp);
1289        }
1290
1291        err = 0;
1292
1293out:
1294        if (err) {
1295                perf_evlist__delete(evlist);
1296                evlist = NULL;
1297        }
1298
1299        return evlist;
1300}
1301
1302static int kvm_events_live(struct perf_kvm_stat *kvm,
1303                           int argc, const char **argv)
1304{
1305        char errbuf[BUFSIZ];
1306        int err;
1307
1308        const struct option live_options[] = {
1309                OPT_STRING('p', "pid", &kvm->opts.target.pid, "pid",
1310                        "record events on existing process id"),
1311                OPT_CALLBACK('m', "mmap-pages", &kvm->opts.mmap_pages, "pages",
1312                        "number of mmap data pages",
1313                        perf_evlist__parse_mmap_pages),
1314                OPT_INCR('v', "verbose", &verbose,
1315                        "be more verbose (show counter open errors, etc)"),
1316                OPT_BOOLEAN('a', "all-cpus", &kvm->opts.target.system_wide,
1317                        "system-wide collection from all CPUs"),
1318                OPT_UINTEGER('d', "display", &kvm->display_time,
1319                        "time in seconds between display updates"),
1320                OPT_STRING(0, "event", &kvm->report_event, "report event",
1321                        "event for reporting: "
1322                        "vmexit, mmio (x86 only), ioport (x86 only)"),
1323                OPT_INTEGER(0, "vcpu", &kvm->trace_vcpu,
1324                        "vcpu id to report"),
1325                OPT_STRING('k', "key", &kvm->sort_key, "sort-key",
1326                        "key for sorting: sample(sort by samples number)"
1327                        " time (sort by avg time)"),
1328                OPT_U64(0, "duration", &kvm->duration,
1329                        "show events other than"
1330                        " HLT (x86 only) or Wait state (s390 only)"
1331                        " that take longer than duration usecs"),
1332                OPT_UINTEGER(0, "proc-map-timeout", &kvm->opts.proc_map_timeout,
1333                                "per thread proc mmap processing timeout in ms"),
1334                OPT_END()
1335        };
1336        const char * const live_usage[] = {
1337                "perf kvm stat live [<options>]",
1338                NULL
1339        };
1340        struct perf_data_file file = {
1341                .mode = PERF_DATA_MODE_WRITE,
1342        };
1343
1344
1345        /* event handling */
1346        kvm->tool.sample = process_sample_event;
1347        kvm->tool.comm   = perf_event__process_comm;
1348        kvm->tool.exit   = perf_event__process_exit;
1349        kvm->tool.fork   = perf_event__process_fork;
1350        kvm->tool.lost   = process_lost_event;
1351        kvm->tool.ordered_events = true;
1352        perf_tool__fill_defaults(&kvm->tool);
1353
1354        /* set defaults */
1355        kvm->display_time = 1;
1356        kvm->opts.user_interval = 1;
1357        kvm->opts.mmap_pages = 512;
1358        kvm->opts.target.uses_mmap = false;
1359        kvm->opts.target.uid_str = NULL;
1360        kvm->opts.target.uid = UINT_MAX;
1361        kvm->opts.proc_map_timeout = 500;
1362
1363        symbol__init(NULL);
1364        disable_buildid_cache();
1365
1366        use_browser = 0;
1367
1368        if (argc) {
1369                argc = parse_options(argc, argv, live_options,
1370                                     live_usage, 0);
1371                if (argc)
1372                        usage_with_options(live_usage, live_options);
1373        }
1374
1375        kvm->duration *= NSEC_PER_USEC;   /* convert usec to nsec */
1376
1377        /*
1378         * target related setups
1379         */
1380        err = target__validate(&kvm->opts.target);
1381        if (err) {
1382                target__strerror(&kvm->opts.target, err, errbuf, BUFSIZ);
1383                ui__warning("%s", errbuf);
1384        }
1385
1386        if (target__none(&kvm->opts.target))
1387                kvm->opts.target.system_wide = true;
1388
1389
1390        /*
1391         * generate the event list
1392         */
1393        err = setup_kvm_events_tp(kvm);
1394        if (err < 0) {
1395                pr_err("Unable to setup the kvm tracepoints\n");
1396                return err;
1397        }
1398
1399        kvm->evlist = kvm_live_event_list();
1400        if (kvm->evlist == NULL) {
1401                err = -1;
1402                goto out;
1403        }
1404
1405        symbol_conf.nr_events = kvm->evlist->nr_entries;
1406
1407        if (perf_evlist__create_maps(kvm->evlist, &kvm->opts.target) < 0)
1408                usage_with_options(live_usage, live_options);
1409
1410        /*
1411         * perf session
1412         */
1413        kvm->session = perf_session__new(&file, false, &kvm->tool);
1414        if (kvm->session == NULL) {
1415                err = -1;
1416                goto out;
1417        }
1418        kvm->session->evlist = kvm->evlist;
1419        perf_session__set_id_hdr_size(kvm->session);
1420        ordered_events__set_copy_on_queue(&kvm->session->ordered_events, true);
1421        machine__synthesize_threads(&kvm->session->machines.host, &kvm->opts.target,
1422                                    kvm->evlist->threads, false, kvm->opts.proc_map_timeout);
1423        err = kvm_live_open_events(kvm);
1424        if (err)
1425                goto out;
1426
1427        err = kvm_events_live_report(kvm);
1428
1429out:
1430        perf_session__delete(kvm->session);
1431        kvm->session = NULL;
1432        perf_evlist__delete(kvm->evlist);
1433
1434        return err;
1435}
1436#endif
1437
1438static void print_kvm_stat_usage(void)
1439{
1440        printf("Usage: perf kvm stat <command>\n\n");
1441
1442        printf("# Available commands:\n");
1443        printf("\trecord: record kvm events\n");
1444        printf("\treport: report statistical data of kvm events\n");
1445        printf("\tlive:   live reporting of statistical data of kvm events\n");
1446
1447        printf("\nOtherwise, it is the alias of 'perf stat':\n");
1448}
1449
1450static int kvm_cmd_stat(const char *file_name, int argc, const char **argv)
1451{
1452        struct perf_kvm_stat kvm = {
1453                .file_name = file_name,
1454
1455                .trace_vcpu     = -1,
1456                .report_event   = "vmexit",
1457                .sort_key       = "sample",
1458
1459        };
1460
1461        if (argc == 1) {
1462                print_kvm_stat_usage();
1463                goto perf_stat;
1464        }
1465
1466        if (!strncmp(argv[1], "rec", 3))
1467                return kvm_events_record(&kvm, argc - 1, argv + 1);
1468
1469        if (!strncmp(argv[1], "rep", 3))
1470                return kvm_events_report(&kvm, argc - 1 , argv + 1);
1471
1472#ifdef HAVE_TIMERFD_SUPPORT
1473        if (!strncmp(argv[1], "live", 4))
1474                return kvm_events_live(&kvm, argc - 1 , argv + 1);
1475#endif
1476
1477perf_stat:
1478        return cmd_stat(argc, argv, NULL);
1479}
1480#endif /* HAVE_KVM_STAT_SUPPORT */
1481
1482static int __cmd_record(const char *file_name, int argc, const char **argv)
1483{
1484        int rec_argc, i = 0, j;
1485        const char **rec_argv;
1486
1487        rec_argc = argc + 2;
1488        rec_argv = calloc(rec_argc + 1, sizeof(char *));
1489        rec_argv[i++] = strdup("record");
1490        rec_argv[i++] = strdup("-o");
1491        rec_argv[i++] = strdup(file_name);
1492        for (j = 1; j < argc; j++, i++)
1493                rec_argv[i] = argv[j];
1494
1495        BUG_ON(i != rec_argc);
1496
1497        return cmd_record(i, rec_argv, NULL);
1498}
1499
1500static int __cmd_report(const char *file_name, int argc, const char **argv)
1501{
1502        int rec_argc, i = 0, j;
1503        const char **rec_argv;
1504
1505        rec_argc = argc + 2;
1506        rec_argv = calloc(rec_argc + 1, sizeof(char *));
1507        rec_argv[i++] = strdup("report");
1508        rec_argv[i++] = strdup("-i");
1509        rec_argv[i++] = strdup(file_name);
1510        for (j = 1; j < argc; j++, i++)
1511                rec_argv[i] = argv[j];
1512
1513        BUG_ON(i != rec_argc);
1514
1515        return cmd_report(i, rec_argv, NULL);
1516}
1517
1518static int
1519__cmd_buildid_list(const char *file_name, int argc, const char **argv)
1520{
1521        int rec_argc, i = 0, j;
1522        const char **rec_argv;
1523
1524        rec_argc = argc + 2;
1525        rec_argv = calloc(rec_argc + 1, sizeof(char *));
1526        rec_argv[i++] = strdup("buildid-list");
1527        rec_argv[i++] = strdup("-i");
1528        rec_argv[i++] = strdup(file_name);
1529        for (j = 1; j < argc; j++, i++)
1530                rec_argv[i] = argv[j];
1531
1532        BUG_ON(i != rec_argc);
1533
1534        return cmd_buildid_list(i, rec_argv, NULL);
1535}
1536
1537int cmd_kvm(int argc, const char **argv, const char *prefix __maybe_unused)
1538{
1539        const char *file_name = NULL;
1540        const struct option kvm_options[] = {
1541                OPT_STRING('i', "input", &file_name, "file",
1542                           "Input file name"),
1543                OPT_STRING('o', "output", &file_name, "file",
1544                           "Output file name"),
1545                OPT_BOOLEAN(0, "guest", &perf_guest,
1546                            "Collect guest os data"),
1547                OPT_BOOLEAN(0, "host", &perf_host,
1548                            "Collect host os data"),
1549                OPT_STRING(0, "guestmount", &symbol_conf.guestmount, "directory",
1550                           "guest mount directory under which every guest os"
1551                           " instance has a subdir"),
1552                OPT_STRING(0, "guestvmlinux", &symbol_conf.default_guest_vmlinux_name,
1553                           "file", "file saving guest os vmlinux"),
1554                OPT_STRING(0, "guestkallsyms", &symbol_conf.default_guest_kallsyms,
1555                           "file", "file saving guest os /proc/kallsyms"),
1556                OPT_STRING(0, "guestmodules", &symbol_conf.default_guest_modules,
1557                           "file", "file saving guest os /proc/modules"),
1558                OPT_INCR('v', "verbose", &verbose,
1559                            "be more verbose (show counter open errors, etc)"),
1560                OPT_END()
1561        };
1562
1563        const char *const kvm_subcommands[] = { "top", "record", "report", "diff",
1564                                                "buildid-list", "stat", NULL };
1565        const char *kvm_usage[] = { NULL, NULL };
1566
1567        perf_host  = 0;
1568        perf_guest = 1;
1569
1570        argc = parse_options_subcommand(argc, argv, kvm_options, kvm_subcommands, kvm_usage,
1571                                        PARSE_OPT_STOP_AT_NON_OPTION);
1572        if (!argc)
1573                usage_with_options(kvm_usage, kvm_options);
1574
1575        if (!perf_host)
1576                perf_guest = 1;
1577
1578        if (!file_name) {
1579                file_name = get_filename_for_perf_kvm();
1580
1581                if (!file_name) {
1582                        pr_err("Failed to allocate memory for filename\n");
1583                        return -ENOMEM;
1584                }
1585        }
1586
1587        if (!strncmp(argv[0], "rec", 3))
1588                return __cmd_record(file_name, argc, argv);
1589        else if (!strncmp(argv[0], "rep", 3))
1590                return __cmd_report(file_name, argc, argv);
1591        else if (!strncmp(argv[0], "diff", 4))
1592                return cmd_diff(argc, argv, NULL);
1593        else if (!strncmp(argv[0], "top", 3))
1594                return cmd_top(argc, argv, NULL);
1595        else if (!strncmp(argv[0], "buildid-list", 12))
1596                return __cmd_buildid_list(file_name, argc, argv);
1597#ifdef HAVE_KVM_STAT_SUPPORT
1598        else if (!strncmp(argv[0], "stat", 4))
1599                return kvm_cmd_stat(file_name, argc, argv);
1600#endif
1601        else
1602                usage_with_options(kvm_usage, kvm_options);
1603
1604        return 0;
1605}
1606