linux/tools/perf/builtin-kmem.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2#include "builtin.h"
   3#include "perf.h"
   4
   5#include "util/evlist.h"
   6#include "util/evsel.h"
   7#include "util/util.h"
   8#include "util/config.h"
   9#include "util/symbol.h"
  10#include "util/thread.h"
  11#include "util/header.h"
  12#include "util/session.h"
  13#include "util/tool.h"
  14#include "util/callchain.h"
  15#include "util/time-utils.h"
  16
  17#include <subcmd/parse-options.h>
  18#include "util/trace-event.h"
  19#include "util/data.h"
  20#include "util/cpumap.h"
  21
  22#include "util/debug.h"
  23
  24#include <linux/kernel.h>
  25#include <linux/rbtree.h>
  26#include <linux/string.h>
  27#include <errno.h>
  28#include <inttypes.h>
  29#include <locale.h>
  30#include <regex.h>
  31
  32#include "sane_ctype.h"
  33
  34static int      kmem_slab;
  35static int      kmem_page;
  36
  37static long     kmem_page_size;
  38static enum {
  39        KMEM_SLAB,
  40        KMEM_PAGE,
  41} kmem_default = KMEM_SLAB;  /* for backward compatibility */
  42
  43struct alloc_stat;
  44typedef int (*sort_fn_t)(void *, void *);
  45
  46static int                      alloc_flag;
  47static int                      caller_flag;
  48
  49static int                      alloc_lines = -1;
  50static int                      caller_lines = -1;
  51
  52static bool                     raw_ip;
  53
  54struct alloc_stat {
  55        u64     call_site;
  56        u64     ptr;
  57        u64     bytes_req;
  58        u64     bytes_alloc;
  59        u64     last_alloc;
  60        u32     hit;
  61        u32     pingpong;
  62
  63        short   alloc_cpu;
  64
  65        struct rb_node node;
  66};
  67
  68static struct rb_root root_alloc_stat;
  69static struct rb_root root_alloc_sorted;
  70static struct rb_root root_caller_stat;
  71static struct rb_root root_caller_sorted;
  72
  73static unsigned long total_requested, total_allocated, total_freed;
  74static unsigned long nr_allocs, nr_cross_allocs;
  75
  76/* filters for controlling start and stop of time of analysis */
  77static struct perf_time_interval ptime;
  78const char *time_str;
  79
  80static int insert_alloc_stat(unsigned long call_site, unsigned long ptr,
  81                             int bytes_req, int bytes_alloc, int cpu)
  82{
  83        struct rb_node **node = &root_alloc_stat.rb_node;
  84        struct rb_node *parent = NULL;
  85        struct alloc_stat *data = NULL;
  86
  87        while (*node) {
  88                parent = *node;
  89                data = rb_entry(*node, struct alloc_stat, node);
  90
  91                if (ptr > data->ptr)
  92                        node = &(*node)->rb_right;
  93                else if (ptr < data->ptr)
  94                        node = &(*node)->rb_left;
  95                else
  96                        break;
  97        }
  98
  99        if (data && data->ptr == ptr) {
 100                data->hit++;
 101                data->bytes_req += bytes_req;
 102                data->bytes_alloc += bytes_alloc;
 103        } else {
 104                data = malloc(sizeof(*data));
 105                if (!data) {
 106                        pr_err("%s: malloc failed\n", __func__);
 107                        return -1;
 108                }
 109                data->ptr = ptr;
 110                data->pingpong = 0;
 111                data->hit = 1;
 112                data->bytes_req = bytes_req;
 113                data->bytes_alloc = bytes_alloc;
 114
 115                rb_link_node(&data->node, parent, node);
 116                rb_insert_color(&data->node, &root_alloc_stat);
 117        }
 118        data->call_site = call_site;
 119        data->alloc_cpu = cpu;
 120        data->last_alloc = bytes_alloc;
 121
 122        return 0;
 123}
 124
 125static int insert_caller_stat(unsigned long call_site,
 126                              int bytes_req, int bytes_alloc)
 127{
 128        struct rb_node **node = &root_caller_stat.rb_node;
 129        struct rb_node *parent = NULL;
 130        struct alloc_stat *data = NULL;
 131
 132        while (*node) {
 133                parent = *node;
 134                data = rb_entry(*node, struct alloc_stat, node);
 135
 136                if (call_site > data->call_site)
 137                        node = &(*node)->rb_right;
 138                else if (call_site < data->call_site)
 139                        node = &(*node)->rb_left;
 140                else
 141                        break;
 142        }
 143
 144        if (data && data->call_site == call_site) {
 145                data->hit++;
 146                data->bytes_req += bytes_req;
 147                data->bytes_alloc += bytes_alloc;
 148        } else {
 149                data = malloc(sizeof(*data));
 150                if (!data) {
 151                        pr_err("%s: malloc failed\n", __func__);
 152                        return -1;
 153                }
 154                data->call_site = call_site;
 155                data->pingpong = 0;
 156                data->hit = 1;
 157                data->bytes_req = bytes_req;
 158                data->bytes_alloc = bytes_alloc;
 159
 160                rb_link_node(&data->node, parent, node);
 161                rb_insert_color(&data->node, &root_caller_stat);
 162        }
 163
 164        return 0;
 165}
 166
 167static int perf_evsel__process_alloc_event(struct perf_evsel *evsel,
 168                                           struct perf_sample *sample)
 169{
 170        unsigned long ptr = perf_evsel__intval(evsel, sample, "ptr"),
 171                      call_site = perf_evsel__intval(evsel, sample, "call_site");
 172        int bytes_req = perf_evsel__intval(evsel, sample, "bytes_req"),
 173            bytes_alloc = perf_evsel__intval(evsel, sample, "bytes_alloc");
 174
 175        if (insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, sample->cpu) ||
 176            insert_caller_stat(call_site, bytes_req, bytes_alloc))
 177                return -1;
 178
 179        total_requested += bytes_req;
 180        total_allocated += bytes_alloc;
 181
 182        nr_allocs++;
 183        return 0;
 184}
 185
 186static int perf_evsel__process_alloc_node_event(struct perf_evsel *evsel,
 187                                                struct perf_sample *sample)
 188{
 189        int ret = perf_evsel__process_alloc_event(evsel, sample);
 190
 191        if (!ret) {
 192                int node1 = cpu__get_node(sample->cpu),
 193                    node2 = perf_evsel__intval(evsel, sample, "node");
 194
 195                if (node1 != node2)
 196                        nr_cross_allocs++;
 197        }
 198
 199        return ret;
 200}
 201
 202static int ptr_cmp(void *, void *);
 203static int slab_callsite_cmp(void *, void *);
 204
 205static struct alloc_stat *search_alloc_stat(unsigned long ptr,
 206                                            unsigned long call_site,
 207                                            struct rb_root *root,
 208                                            sort_fn_t sort_fn)
 209{
 210        struct rb_node *node = root->rb_node;
 211        struct alloc_stat key = { .ptr = ptr, .call_site = call_site };
 212
 213        while (node) {
 214                struct alloc_stat *data;
 215                int cmp;
 216
 217                data = rb_entry(node, struct alloc_stat, node);
 218
 219                cmp = sort_fn(&key, data);
 220                if (cmp < 0)
 221                        node = node->rb_left;
 222                else if (cmp > 0)
 223                        node = node->rb_right;
 224                else
 225                        return data;
 226        }
 227        return NULL;
 228}
 229
 230static int perf_evsel__process_free_event(struct perf_evsel *evsel,
 231                                          struct perf_sample *sample)
 232{
 233        unsigned long ptr = perf_evsel__intval(evsel, sample, "ptr");
 234        struct alloc_stat *s_alloc, *s_caller;
 235
 236        s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp);
 237        if (!s_alloc)
 238                return 0;
 239
 240        total_freed += s_alloc->last_alloc;
 241
 242        if ((short)sample->cpu != s_alloc->alloc_cpu) {
 243                s_alloc->pingpong++;
 244
 245                s_caller = search_alloc_stat(0, s_alloc->call_site,
 246                                             &root_caller_stat,
 247                                             slab_callsite_cmp);
 248                if (!s_caller)
 249                        return -1;
 250                s_caller->pingpong++;
 251        }
 252        s_alloc->alloc_cpu = -1;
 253
 254        return 0;
 255}
 256
 257static u64 total_page_alloc_bytes;
 258static u64 total_page_free_bytes;
 259static u64 total_page_nomatch_bytes;
 260static u64 total_page_fail_bytes;
 261static unsigned long nr_page_allocs;
 262static unsigned long nr_page_frees;
 263static unsigned long nr_page_fails;
 264static unsigned long nr_page_nomatch;
 265
 266static bool use_pfn;
 267static bool live_page;
 268static struct perf_session *kmem_session;
 269
 270#define MAX_MIGRATE_TYPES  6
 271#define MAX_PAGE_ORDER     11
 272
 273static int order_stats[MAX_PAGE_ORDER][MAX_MIGRATE_TYPES];
 274
 275struct page_stat {
 276        struct rb_node  node;
 277        u64             page;
 278        u64             callsite;
 279        int             order;
 280        unsigned        gfp_flags;
 281        unsigned        migrate_type;
 282        u64             alloc_bytes;
 283        u64             free_bytes;
 284        int             nr_alloc;
 285        int             nr_free;
 286};
 287
 288static struct rb_root page_live_tree;
 289static struct rb_root page_alloc_tree;
 290static struct rb_root page_alloc_sorted;
 291static struct rb_root page_caller_tree;
 292static struct rb_root page_caller_sorted;
 293
 294struct alloc_func {
 295        u64 start;
 296        u64 end;
 297        char *name;
 298};
 299
 300static int nr_alloc_funcs;
 301static struct alloc_func *alloc_func_list;
 302
 303static int funcmp(const void *a, const void *b)
 304{
 305        const struct alloc_func *fa = a;
 306        const struct alloc_func *fb = b;
 307
 308        if (fa->start > fb->start)
 309                return 1;
 310        else
 311                return -1;
 312}
 313
 314static int callcmp(const void *a, const void *b)
 315{
 316        const struct alloc_func *fa = a;
 317        const struct alloc_func *fb = b;
 318
 319        if (fb->start <= fa->start && fa->end < fb->end)
 320                return 0;
 321
 322        if (fa->start > fb->start)
 323                return 1;
 324        else
 325                return -1;
 326}
 327
 328static int build_alloc_func_list(void)
 329{
 330        int ret;
 331        struct map *kernel_map;
 332        struct symbol *sym;
 333        struct rb_node *node;
 334        struct alloc_func *func;
 335        struct machine *machine = &kmem_session->machines.host;
 336        regex_t alloc_func_regex;
 337        const char pattern[] = "^_?_?(alloc|get_free|get_zeroed)_pages?";
 338
 339        ret = regcomp(&alloc_func_regex, pattern, REG_EXTENDED);
 340        if (ret) {
 341                char err[BUFSIZ];
 342
 343                regerror(ret, &alloc_func_regex, err, sizeof(err));
 344                pr_err("Invalid regex: %s\n%s", pattern, err);
 345                return -EINVAL;
 346        }
 347
 348        kernel_map = machine__kernel_map(machine);
 349        if (map__load(kernel_map) < 0) {
 350                pr_err("cannot load kernel map\n");
 351                return -ENOENT;
 352        }
 353
 354        map__for_each_symbol(kernel_map, sym, node) {
 355                if (regexec(&alloc_func_regex, sym->name, 0, NULL, 0))
 356                        continue;
 357
 358                func = realloc(alloc_func_list,
 359                               (nr_alloc_funcs + 1) * sizeof(*func));
 360                if (func == NULL)
 361                        return -ENOMEM;
 362
 363                pr_debug("alloc func: %s\n", sym->name);
 364                func[nr_alloc_funcs].start = sym->start;
 365                func[nr_alloc_funcs].end   = sym->end;
 366                func[nr_alloc_funcs].name  = sym->name;
 367
 368                alloc_func_list = func;
 369                nr_alloc_funcs++;
 370        }
 371
 372        qsort(alloc_func_list, nr_alloc_funcs, sizeof(*func), funcmp);
 373
 374        regfree(&alloc_func_regex);
 375        return 0;
 376}
 377
 378/*
 379 * Find first non-memory allocation function from callchain.
 380 * The allocation functions are in the 'alloc_func_list'.
 381 */
 382static u64 find_callsite(struct perf_evsel *evsel, struct perf_sample *sample)
 383{
 384        struct addr_location al;
 385        struct machine *machine = &kmem_session->machines.host;
 386        struct callchain_cursor_node *node;
 387
 388        if (alloc_func_list == NULL) {
 389                if (build_alloc_func_list() < 0)
 390                        goto out;
 391        }
 392
 393        al.thread = machine__findnew_thread(machine, sample->pid, sample->tid);
 394        sample__resolve_callchain(sample, &callchain_cursor, NULL, evsel, &al, 16);
 395
 396        callchain_cursor_commit(&callchain_cursor);
 397        while (true) {
 398                struct alloc_func key, *caller;
 399                u64 addr;
 400
 401                node = callchain_cursor_current(&callchain_cursor);
 402                if (node == NULL)
 403                        break;
 404
 405                key.start = key.end = node->ip;
 406                caller = bsearch(&key, alloc_func_list, nr_alloc_funcs,
 407                                 sizeof(key), callcmp);
 408                if (!caller) {
 409                        /* found */
 410                        if (node->map)
 411                                addr = map__unmap_ip(node->map, node->ip);
 412                        else
 413                                addr = node->ip;
 414
 415                        return addr;
 416                } else
 417                        pr_debug3("skipping alloc function: %s\n", caller->name);
 418
 419                callchain_cursor_advance(&callchain_cursor);
 420        }
 421
 422out:
 423        pr_debug2("unknown callsite: %"PRIx64 "\n", sample->ip);
 424        return sample->ip;
 425}
 426
 427struct sort_dimension {
 428        const char              name[20];
 429        sort_fn_t               cmp;
 430        struct list_head        list;
 431};
 432
 433static LIST_HEAD(page_alloc_sort_input);
 434static LIST_HEAD(page_caller_sort_input);
 435
 436static struct page_stat *
 437__page_stat__findnew_page(struct page_stat *pstat, bool create)
 438{
 439        struct rb_node **node = &page_live_tree.rb_node;
 440        struct rb_node *parent = NULL;
 441        struct page_stat *data;
 442
 443        while (*node) {
 444                s64 cmp;
 445
 446                parent = *node;
 447                data = rb_entry(*node, struct page_stat, node);
 448
 449                cmp = data->page - pstat->page;
 450                if (cmp < 0)
 451                        node = &parent->rb_left;
 452                else if (cmp > 0)
 453                        node = &parent->rb_right;
 454                else
 455                        return data;
 456        }
 457
 458        if (!create)
 459                return NULL;
 460
 461        data = zalloc(sizeof(*data));
 462        if (data != NULL) {
 463                data->page = pstat->page;
 464                data->order = pstat->order;
 465                data->gfp_flags = pstat->gfp_flags;
 466                data->migrate_type = pstat->migrate_type;
 467
 468                rb_link_node(&data->node, parent, node);
 469                rb_insert_color(&data->node, &page_live_tree);
 470        }
 471
 472        return data;
 473}
 474
 475static struct page_stat *page_stat__find_page(struct page_stat *pstat)
 476{
 477        return __page_stat__findnew_page(pstat, false);
 478}
 479
 480static struct page_stat *page_stat__findnew_page(struct page_stat *pstat)
 481{
 482        return __page_stat__findnew_page(pstat, true);
 483}
 484
 485static struct page_stat *
 486__page_stat__findnew_alloc(struct page_stat *pstat, bool create)
 487{
 488        struct rb_node **node = &page_alloc_tree.rb_node;
 489        struct rb_node *parent = NULL;
 490        struct page_stat *data;
 491        struct sort_dimension *sort;
 492
 493        while (*node) {
 494                int cmp = 0;
 495
 496                parent = *node;
 497                data = rb_entry(*node, struct page_stat, node);
 498
 499                list_for_each_entry(sort, &page_alloc_sort_input, list) {
 500                        cmp = sort->cmp(pstat, data);
 501                        if (cmp)
 502                                break;
 503                }
 504
 505                if (cmp < 0)
 506                        node = &parent->rb_left;
 507                else if (cmp > 0)
 508                        node = &parent->rb_right;
 509                else
 510                        return data;
 511        }
 512
 513        if (!create)
 514                return NULL;
 515
 516        data = zalloc(sizeof(*data));
 517        if (data != NULL) {
 518                data->page = pstat->page;
 519                data->order = pstat->order;
 520                data->gfp_flags = pstat->gfp_flags;
 521                data->migrate_type = pstat->migrate_type;
 522
 523                rb_link_node(&data->node, parent, node);
 524                rb_insert_color(&data->node, &page_alloc_tree);
 525        }
 526
 527        return data;
 528}
 529
 530static struct page_stat *page_stat__find_alloc(struct page_stat *pstat)
 531{
 532        return __page_stat__findnew_alloc(pstat, false);
 533}
 534
 535static struct page_stat *page_stat__findnew_alloc(struct page_stat *pstat)
 536{
 537        return __page_stat__findnew_alloc(pstat, true);
 538}
 539
 540static struct page_stat *
 541__page_stat__findnew_caller(struct page_stat *pstat, bool create)
 542{
 543        struct rb_node **node = &page_caller_tree.rb_node;
 544        struct rb_node *parent = NULL;
 545        struct page_stat *data;
 546        struct sort_dimension *sort;
 547
 548        while (*node) {
 549                int cmp = 0;
 550
 551                parent = *node;
 552                data = rb_entry(*node, struct page_stat, node);
 553
 554                list_for_each_entry(sort, &page_caller_sort_input, list) {
 555                        cmp = sort->cmp(pstat, data);
 556                        if (cmp)
 557                                break;
 558                }
 559
 560                if (cmp < 0)
 561                        node = &parent->rb_left;
 562                else if (cmp > 0)
 563                        node = &parent->rb_right;
 564                else
 565                        return data;
 566        }
 567
 568        if (!create)
 569                return NULL;
 570
 571        data = zalloc(sizeof(*data));
 572        if (data != NULL) {
 573                data->callsite = pstat->callsite;
 574                data->order = pstat->order;
 575                data->gfp_flags = pstat->gfp_flags;
 576                data->migrate_type = pstat->migrate_type;
 577
 578                rb_link_node(&data->node, parent, node);
 579                rb_insert_color(&data->node, &page_caller_tree);
 580        }
 581
 582        return data;
 583}
 584
 585static struct page_stat *page_stat__find_caller(struct page_stat *pstat)
 586{
 587        return __page_stat__findnew_caller(pstat, false);
 588}
 589
 590static struct page_stat *page_stat__findnew_caller(struct page_stat *pstat)
 591{
 592        return __page_stat__findnew_caller(pstat, true);
 593}
 594
 595static bool valid_page(u64 pfn_or_page)
 596{
 597        if (use_pfn && pfn_or_page == -1UL)
 598                return false;
 599        if (!use_pfn && pfn_or_page == 0)
 600                return false;
 601        return true;
 602}
 603
 604struct gfp_flag {
 605        unsigned int flags;
 606        char *compact_str;
 607        char *human_readable;
 608};
 609
 610static struct gfp_flag *gfps;
 611static int nr_gfps;
 612
 613static int gfpcmp(const void *a, const void *b)
 614{
 615        const struct gfp_flag *fa = a;
 616        const struct gfp_flag *fb = b;
 617
 618        return fa->flags - fb->flags;
 619}
 620
 621/* see include/trace/events/mmflags.h */
 622static const struct {
 623        const char *original;
 624        const char *compact;
 625} gfp_compact_table[] = {
 626        { "GFP_TRANSHUGE",              "THP" },
 627        { "GFP_TRANSHUGE_LIGHT",        "THL" },
 628        { "GFP_HIGHUSER_MOVABLE",       "HUM" },
 629        { "GFP_HIGHUSER",               "HU" },
 630        { "GFP_USER",                   "U" },
 631        { "GFP_KERNEL_ACCOUNT",         "KAC" },
 632        { "GFP_KERNEL",                 "K" },
 633        { "GFP_NOFS",                   "NF" },
 634        { "GFP_ATOMIC",                 "A" },
 635        { "GFP_NOIO",                   "NI" },
 636        { "GFP_NOWAIT",                 "NW" },
 637        { "GFP_DMA",                    "D" },
 638        { "__GFP_HIGHMEM",              "HM" },
 639        { "GFP_DMA32",                  "D32" },
 640        { "__GFP_HIGH",                 "H" },
 641        { "__GFP_ATOMIC",               "_A" },
 642        { "__GFP_IO",                   "I" },
 643        { "__GFP_FS",                   "F" },
 644        { "__GFP_COLD",                 "CO" },
 645        { "__GFP_NOWARN",               "NWR" },
 646        { "__GFP_RETRY_MAYFAIL",        "R" },
 647        { "__GFP_NOFAIL",               "NF" },
 648        { "__GFP_NORETRY",              "NR" },
 649        { "__GFP_COMP",                 "C" },
 650        { "__GFP_ZERO",                 "Z" },
 651        { "__GFP_NOMEMALLOC",           "NMA" },
 652        { "__GFP_MEMALLOC",             "MA" },
 653        { "__GFP_HARDWALL",             "HW" },
 654        { "__GFP_THISNODE",             "TN" },
 655        { "__GFP_RECLAIMABLE",          "RC" },
 656        { "__GFP_MOVABLE",              "M" },
 657        { "__GFP_ACCOUNT",              "AC" },
 658        { "__GFP_NOTRACK",              "NT" },
 659        { "__GFP_WRITE",                "WR" },
 660        { "__GFP_RECLAIM",              "R" },
 661        { "__GFP_DIRECT_RECLAIM",       "DR" },
 662        { "__GFP_KSWAPD_RECLAIM",       "KR" },
 663};
 664
 665static size_t max_gfp_len;
 666
 667static char *compact_gfp_flags(char *gfp_flags)
 668{
 669        char *orig_flags = strdup(gfp_flags);
 670        char *new_flags = NULL;
 671        char *str, *pos = NULL;
 672        size_t len = 0;
 673
 674        if (orig_flags == NULL)
 675                return NULL;
 676
 677        str = strtok_r(orig_flags, "|", &pos);
 678        while (str) {
 679                size_t i;
 680                char *new;
 681                const char *cpt;
 682
 683                for (i = 0; i < ARRAY_SIZE(gfp_compact_table); i++) {
 684                        if (strcmp(gfp_compact_table[i].original, str))
 685                                continue;
 686
 687                        cpt = gfp_compact_table[i].compact;
 688                        new = realloc(new_flags, len + strlen(cpt) + 2);
 689                        if (new == NULL) {
 690                                free(new_flags);
 691                                return NULL;
 692                        }
 693
 694                        new_flags = new;
 695
 696                        if (!len) {
 697                                strcpy(new_flags, cpt);
 698                        } else {
 699                                strcat(new_flags, "|");
 700                                strcat(new_flags, cpt);
 701                                len++;
 702                        }
 703
 704                        len += strlen(cpt);
 705                }
 706
 707                str = strtok_r(NULL, "|", &pos);
 708        }
 709
 710        if (max_gfp_len < len)
 711                max_gfp_len = len;
 712
 713        free(orig_flags);
 714        return new_flags;
 715}
 716
 717static char *compact_gfp_string(unsigned long gfp_flags)
 718{
 719        struct gfp_flag key = {
 720                .flags = gfp_flags,
 721        };
 722        struct gfp_flag *gfp;
 723
 724        gfp = bsearch(&key, gfps, nr_gfps, sizeof(*gfps), gfpcmp);
 725        if (gfp)
 726                return gfp->compact_str;
 727
 728        return NULL;
 729}
 730
 731static int parse_gfp_flags(struct perf_evsel *evsel, struct perf_sample *sample,
 732                           unsigned int gfp_flags)
 733{
 734        struct pevent_record record = {
 735                .cpu = sample->cpu,
 736                .data = sample->raw_data,
 737                .size = sample->raw_size,
 738        };
 739        struct trace_seq seq;
 740        char *str, *pos = NULL;
 741
 742        if (nr_gfps) {
 743                struct gfp_flag key = {
 744                        .flags = gfp_flags,
 745                };
 746
 747                if (bsearch(&key, gfps, nr_gfps, sizeof(*gfps), gfpcmp))
 748                        return 0;
 749        }
 750
 751        trace_seq_init(&seq);
 752        pevent_event_info(&seq, evsel->tp_format, &record);
 753
 754        str = strtok_r(seq.buffer, " ", &pos);
 755        while (str) {
 756                if (!strncmp(str, "gfp_flags=", 10)) {
 757                        struct gfp_flag *new;
 758
 759                        new = realloc(gfps, (nr_gfps + 1) * sizeof(*gfps));
 760                        if (new == NULL)
 761                                return -ENOMEM;
 762
 763                        gfps = new;
 764                        new += nr_gfps++;
 765
 766                        new->flags = gfp_flags;
 767                        new->human_readable = strdup(str + 10);
 768                        new->compact_str = compact_gfp_flags(str + 10);
 769                        if (!new->human_readable || !new->compact_str)
 770                                return -ENOMEM;
 771
 772                        qsort(gfps, nr_gfps, sizeof(*gfps), gfpcmp);
 773                }
 774
 775                str = strtok_r(NULL, " ", &pos);
 776        }
 777
 778        trace_seq_destroy(&seq);
 779        return 0;
 780}
 781
 782static int perf_evsel__process_page_alloc_event(struct perf_evsel *evsel,
 783                                                struct perf_sample *sample)
 784{
 785        u64 page;
 786        unsigned int order = perf_evsel__intval(evsel, sample, "order");
 787        unsigned int gfp_flags = perf_evsel__intval(evsel, sample, "gfp_flags");
 788        unsigned int migrate_type = perf_evsel__intval(evsel, sample,
 789                                                       "migratetype");
 790        u64 bytes = kmem_page_size << order;
 791        u64 callsite;
 792        struct page_stat *pstat;
 793        struct page_stat this = {
 794                .order = order,
 795                .gfp_flags = gfp_flags,
 796                .migrate_type = migrate_type,
 797        };
 798
 799        if (use_pfn)
 800                page = perf_evsel__intval(evsel, sample, "pfn");
 801        else
 802                page = perf_evsel__intval(evsel, sample, "page");
 803
 804        nr_page_allocs++;
 805        total_page_alloc_bytes += bytes;
 806
 807        if (!valid_page(page)) {
 808                nr_page_fails++;
 809                total_page_fail_bytes += bytes;
 810
 811                return 0;
 812        }
 813
 814        if (parse_gfp_flags(evsel, sample, gfp_flags) < 0)
 815                return -1;
 816
 817        callsite = find_callsite(evsel, sample);
 818
 819        /*
 820         * This is to find the current page (with correct gfp flags and
 821         * migrate type) at free event.
 822         */
 823        this.page = page;
 824        pstat = page_stat__findnew_page(&this);
 825        if (pstat == NULL)
 826                return -ENOMEM;
 827
 828        pstat->nr_alloc++;
 829        pstat->alloc_bytes += bytes;
 830        pstat->callsite = callsite;
 831
 832        if (!live_page) {
 833                pstat = page_stat__findnew_alloc(&this);
 834                if (pstat == NULL)
 835                        return -ENOMEM;
 836
 837                pstat->nr_alloc++;
 838                pstat->alloc_bytes += bytes;
 839                pstat->callsite = callsite;
 840        }
 841
 842        this.callsite = callsite;
 843        pstat = page_stat__findnew_caller(&this);
 844        if (pstat == NULL)
 845                return -ENOMEM;
 846
 847        pstat->nr_alloc++;
 848        pstat->alloc_bytes += bytes;
 849
 850        order_stats[order][migrate_type]++;
 851
 852        return 0;
 853}
 854
 855static int perf_evsel__process_page_free_event(struct perf_evsel *evsel,
 856                                                struct perf_sample *sample)
 857{
 858        u64 page;
 859        unsigned int order = perf_evsel__intval(evsel, sample, "order");
 860        u64 bytes = kmem_page_size << order;
 861        struct page_stat *pstat;
 862        struct page_stat this = {
 863                .order = order,
 864        };
 865
 866        if (use_pfn)
 867                page = perf_evsel__intval(evsel, sample, "pfn");
 868        else
 869                page = perf_evsel__intval(evsel, sample, "page");
 870
 871        nr_page_frees++;
 872        total_page_free_bytes += bytes;
 873
 874        this.page = page;
 875        pstat = page_stat__find_page(&this);
 876        if (pstat == NULL) {
 877                pr_debug2("missing free at page %"PRIx64" (order: %d)\n",
 878                          page, order);
 879
 880                nr_page_nomatch++;
 881                total_page_nomatch_bytes += bytes;
 882
 883                return 0;
 884        }
 885
 886        this.gfp_flags = pstat->gfp_flags;
 887        this.migrate_type = pstat->migrate_type;
 888        this.callsite = pstat->callsite;
 889
 890        rb_erase(&pstat->node, &page_live_tree);
 891        free(pstat);
 892
 893        if (live_page) {
 894                order_stats[this.order][this.migrate_type]--;
 895        } else {
 896                pstat = page_stat__find_alloc(&this);
 897                if (pstat == NULL)
 898                        return -ENOMEM;
 899
 900                pstat->nr_free++;
 901                pstat->free_bytes += bytes;
 902        }
 903
 904        pstat = page_stat__find_caller(&this);
 905        if (pstat == NULL)
 906                return -ENOENT;
 907
 908        pstat->nr_free++;
 909        pstat->free_bytes += bytes;
 910
 911        if (live_page) {
 912                pstat->nr_alloc--;
 913                pstat->alloc_bytes -= bytes;
 914
 915                if (pstat->nr_alloc == 0) {
 916                        rb_erase(&pstat->node, &page_caller_tree);
 917                        free(pstat);
 918                }
 919        }
 920
 921        return 0;
 922}
 923
 924static bool perf_kmem__skip_sample(struct perf_sample *sample)
 925{
 926        /* skip sample based on time? */
 927        if (perf_time__skip_sample(&ptime, sample->time))
 928                return true;
 929
 930        return false;
 931}
 932
 933typedef int (*tracepoint_handler)(struct perf_evsel *evsel,
 934                                  struct perf_sample *sample);
 935
 936static int process_sample_event(struct perf_tool *tool __maybe_unused,
 937                                union perf_event *event,
 938                                struct perf_sample *sample,
 939                                struct perf_evsel *evsel,
 940                                struct machine *machine)
 941{
 942        int err = 0;
 943        struct thread *thread = machine__findnew_thread(machine, sample->pid,
 944                                                        sample->tid);
 945
 946        if (thread == NULL) {
 947                pr_debug("problem processing %d event, skipping it.\n",
 948                         event->header.type);
 949                return -1;
 950        }
 951
 952        if (perf_kmem__skip_sample(sample))
 953                return 0;
 954
 955        dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid);
 956
 957        if (evsel->handler != NULL) {
 958                tracepoint_handler f = evsel->handler;
 959                err = f(evsel, sample);
 960        }
 961
 962        thread__put(thread);
 963
 964        return err;
 965}
 966
 967static struct perf_tool perf_kmem = {
 968        .sample          = process_sample_event,
 969        .comm            = perf_event__process_comm,
 970        .mmap            = perf_event__process_mmap,
 971        .mmap2           = perf_event__process_mmap2,
 972        .namespaces      = perf_event__process_namespaces,
 973        .ordered_events  = true,
 974};
 975
 976static double fragmentation(unsigned long n_req, unsigned long n_alloc)
 977{
 978        if (n_alloc == 0)
 979                return 0.0;
 980        else
 981                return 100.0 - (100.0 * n_req / n_alloc);
 982}
 983
 984static void __print_slab_result(struct rb_root *root,
 985                                struct perf_session *session,
 986                                int n_lines, int is_caller)
 987{
 988        struct rb_node *next;
 989        struct machine *machine = &session->machines.host;
 990
 991        printf("%.105s\n", graph_dotted_line);
 992        printf(" %-34s |",  is_caller ? "Callsite": "Alloc Ptr");
 993        printf(" Total_alloc/Per | Total_req/Per   | Hit      | Ping-pong | Frag\n");
 994        printf("%.105s\n", graph_dotted_line);
 995
 996        next = rb_first(root);
 997
 998        while (next && n_lines--) {
 999                struct alloc_stat *data = rb_entry(next, struct alloc_stat,
1000                                                   node);
1001                struct symbol *sym = NULL;
1002                struct map *map;
1003                char buf[BUFSIZ];
1004                u64 addr;
1005
1006                if (is_caller) {
1007                        addr = data->call_site;
1008                        if (!raw_ip)
1009                                sym = machine__find_kernel_function(machine, addr, &map);
1010                } else
1011                        addr = data->ptr;
1012
1013                if (sym != NULL)
1014                        snprintf(buf, sizeof(buf), "%s+%" PRIx64 "", sym->name,
1015                                 addr - map->unmap_ip(map, sym->start));
1016                else
1017                        snprintf(buf, sizeof(buf), "%#" PRIx64 "", addr);
1018                printf(" %-34s |", buf);
1019
1020                printf(" %9llu/%-5lu | %9llu/%-5lu | %8lu | %9lu | %6.3f%%\n",
1021                       (unsigned long long)data->bytes_alloc,
1022                       (unsigned long)data->bytes_alloc / data->hit,
1023                       (unsigned long long)data->bytes_req,
1024                       (unsigned long)data->bytes_req / data->hit,
1025                       (unsigned long)data->hit,
1026                       (unsigned long)data->pingpong,
1027                       fragmentation(data->bytes_req, data->bytes_alloc));
1028
1029                next = rb_next(next);
1030        }
1031
1032        if (n_lines == -1)
1033                printf(" ...                                | ...             | ...             | ...      | ...       | ...   \n");
1034
1035        printf("%.105s\n", graph_dotted_line);
1036}
1037
1038static const char * const migrate_type_str[] = {
1039        "UNMOVABL",
1040        "RECLAIM",
1041        "MOVABLE",
1042        "RESERVED",
1043        "CMA/ISLT",
1044        "UNKNOWN",
1045};
1046
1047static void __print_page_alloc_result(struct perf_session *session, int n_lines)
1048{
1049        struct rb_node *next = rb_first(&page_alloc_sorted);
1050        struct machine *machine = &session->machines.host;
1051        const char *format;
1052        int gfp_len = max(strlen("GFP flags"), max_gfp_len);
1053
1054        printf("\n%.105s\n", graph_dotted_line);
1055        printf(" %-16s | %5s alloc (KB) | Hits      | Order | Mig.type | %-*s | Callsite\n",
1056               use_pfn ? "PFN" : "Page", live_page ? "Live" : "Total",
1057               gfp_len, "GFP flags");
1058        printf("%.105s\n", graph_dotted_line);
1059
1060        if (use_pfn)
1061                format = " %16llu | %'16llu | %'9d | %5d | %8s | %-*s | %s\n";
1062        else
1063                format = " %016llx | %'16llu | %'9d | %5d | %8s | %-*s | %s\n";
1064
1065        while (next && n_lines--) {
1066                struct page_stat *data;
1067                struct symbol *sym;
1068                struct map *map;
1069                char buf[32];
1070                char *caller = buf;
1071
1072                data = rb_entry(next, struct page_stat, node);
1073                sym = machine__find_kernel_function(machine, data->callsite, &map);
1074                if (sym)
1075                        caller = sym->name;
1076                else
1077                        scnprintf(buf, sizeof(buf), "%"PRIx64, data->callsite);
1078
1079                printf(format, (unsigned long long)data->page,
1080                       (unsigned long long)data->alloc_bytes / 1024,
1081                       data->nr_alloc, data->order,
1082                       migrate_type_str[data->migrate_type],
1083                       gfp_len, compact_gfp_string(data->gfp_flags), caller);
1084
1085                next = rb_next(next);
1086        }
1087
1088        if (n_lines == -1) {
1089                printf(" ...              | ...              | ...       | ...   | ...      | %-*s | ...\n",
1090                       gfp_len, "...");
1091        }
1092
1093        printf("%.105s\n", graph_dotted_line);
1094}
1095
1096static void __print_page_caller_result(struct perf_session *session, int n_lines)
1097{
1098        struct rb_node *next = rb_first(&page_caller_sorted);
1099        struct machine *machine = &session->machines.host;
1100        int gfp_len = max(strlen("GFP flags"), max_gfp_len);
1101
1102        printf("\n%.105s\n", graph_dotted_line);
1103        printf(" %5s alloc (KB) | Hits      | Order | Mig.type | %-*s | Callsite\n",
1104               live_page ? "Live" : "Total", gfp_len, "GFP flags");
1105        printf("%.105s\n", graph_dotted_line);
1106
1107        while (next && n_lines--) {
1108                struct page_stat *data;
1109                struct symbol *sym;
1110                struct map *map;
1111                char buf[32];
1112                char *caller = buf;
1113
1114                data = rb_entry(next, struct page_stat, node);
1115                sym = machine__find_kernel_function(machine, data->callsite, &map);
1116                if (sym)
1117                        caller = sym->name;
1118                else
1119                        scnprintf(buf, sizeof(buf), "%"PRIx64, data->callsite);
1120
1121                printf(" %'16llu | %'9d | %5d | %8s | %-*s | %s\n",
1122                       (unsigned long long)data->alloc_bytes / 1024,
1123                       data->nr_alloc, data->order,
1124                       migrate_type_str[data->migrate_type],
1125                       gfp_len, compact_gfp_string(data->gfp_flags), caller);
1126
1127                next = rb_next(next);
1128        }
1129
1130        if (n_lines == -1) {
1131                printf(" ...              | ...       | ...   | ...      | %-*s | ...\n",
1132                       gfp_len, "...");
1133        }
1134
1135        printf("%.105s\n", graph_dotted_line);
1136}
1137
1138static void print_gfp_flags(void)
1139{
1140        int i;
1141
1142        printf("#\n");
1143        printf("# GFP flags\n");
1144        printf("# ---------\n");
1145        for (i = 0; i < nr_gfps; i++) {
1146                printf("# %08x: %*s: %s\n", gfps[i].flags,
1147                       (int) max_gfp_len, gfps[i].compact_str,
1148                       gfps[i].human_readable);
1149        }
1150}
1151
1152static void print_slab_summary(void)
1153{
1154        printf("\nSUMMARY (SLAB allocator)");
1155        printf("\n========================\n");
1156        printf("Total bytes requested: %'lu\n", total_requested);
1157        printf("Total bytes allocated: %'lu\n", total_allocated);
1158        printf("Total bytes freed:     %'lu\n", total_freed);
1159        if (total_allocated > total_freed) {
1160                printf("Net total bytes allocated: %'lu\n",
1161                total_allocated - total_freed);
1162        }
1163        printf("Total bytes wasted on internal fragmentation: %'lu\n",
1164               total_allocated - total_requested);
1165        printf("Internal fragmentation: %f%%\n",
1166               fragmentation(total_requested, total_allocated));
1167        printf("Cross CPU allocations: %'lu/%'lu\n", nr_cross_allocs, nr_allocs);
1168}
1169
1170static void print_page_summary(void)
1171{
1172        int o, m;
1173        u64 nr_alloc_freed = nr_page_frees - nr_page_nomatch;
1174        u64 total_alloc_freed_bytes = total_page_free_bytes - total_page_nomatch_bytes;
1175
1176        printf("\nSUMMARY (page allocator)");
1177        printf("\n========================\n");
1178        printf("%-30s: %'16lu   [ %'16"PRIu64" KB ]\n", "Total allocation requests",
1179               nr_page_allocs, total_page_alloc_bytes / 1024);
1180        printf("%-30s: %'16lu   [ %'16"PRIu64" KB ]\n", "Total free requests",
1181               nr_page_frees, total_page_free_bytes / 1024);
1182        printf("\n");
1183
1184        printf("%-30s: %'16"PRIu64"   [ %'16"PRIu64" KB ]\n", "Total alloc+freed requests",
1185               nr_alloc_freed, (total_alloc_freed_bytes) / 1024);
1186        printf("%-30s: %'16"PRIu64"   [ %'16"PRIu64" KB ]\n", "Total alloc-only requests",
1187               nr_page_allocs - nr_alloc_freed,
1188               (total_page_alloc_bytes - total_alloc_freed_bytes) / 1024);
1189        printf("%-30s: %'16lu   [ %'16"PRIu64" KB ]\n", "Total free-only requests",
1190               nr_page_nomatch, total_page_nomatch_bytes / 1024);
1191        printf("\n");
1192
1193        printf("%-30s: %'16lu   [ %'16"PRIu64" KB ]\n", "Total allocation failures",
1194               nr_page_fails, total_page_fail_bytes / 1024);
1195        printf("\n");
1196
1197        printf("%5s  %12s  %12s  %12s  %12s  %12s\n", "Order",  "Unmovable",
1198               "Reclaimable", "Movable", "Reserved", "CMA/Isolated");
1199        printf("%.5s  %.12s  %.12s  %.12s  %.12s  %.12s\n", graph_dotted_line,
1200               graph_dotted_line, graph_dotted_line, graph_dotted_line,
1201               graph_dotted_line, graph_dotted_line);
1202
1203        for (o = 0; o < MAX_PAGE_ORDER; o++) {
1204                printf("%5d", o);
1205                for (m = 0; m < MAX_MIGRATE_TYPES - 1; m++) {
1206                        if (order_stats[o][m])
1207                                printf("  %'12d", order_stats[o][m]);
1208                        else
1209                                printf("  %12c", '.');
1210                }
1211                printf("\n");
1212        }
1213}
1214
1215static void print_slab_result(struct perf_session *session)
1216{
1217        if (caller_flag)
1218                __print_slab_result(&root_caller_sorted, session, caller_lines, 1);
1219        if (alloc_flag)
1220                __print_slab_result(&root_alloc_sorted, session, alloc_lines, 0);
1221        print_slab_summary();
1222}
1223
1224static void print_page_result(struct perf_session *session)
1225{
1226        if (caller_flag || alloc_flag)
1227                print_gfp_flags();
1228        if (caller_flag)
1229                __print_page_caller_result(session, caller_lines);
1230        if (alloc_flag)
1231                __print_page_alloc_result(session, alloc_lines);
1232        print_page_summary();
1233}
1234
1235static void print_result(struct perf_session *session)
1236{
1237        if (kmem_slab)
1238                print_slab_result(session);
1239        if (kmem_page)
1240                print_page_result(session);
1241}
1242
1243static LIST_HEAD(slab_caller_sort);
1244static LIST_HEAD(slab_alloc_sort);
1245static LIST_HEAD(page_caller_sort);
1246static LIST_HEAD(page_alloc_sort);
1247
1248static void sort_slab_insert(struct rb_root *root, struct alloc_stat *data,
1249                             struct list_head *sort_list)
1250{
1251        struct rb_node **new = &(root->rb_node);
1252        struct rb_node *parent = NULL;
1253        struct sort_dimension *sort;
1254
1255        while (*new) {
1256                struct alloc_stat *this;
1257                int cmp = 0;
1258
1259                this = rb_entry(*new, struct alloc_stat, node);
1260                parent = *new;
1261
1262                list_for_each_entry(sort, sort_list, list) {
1263                        cmp = sort->cmp(data, this);
1264                        if (cmp)
1265                                break;
1266                }
1267
1268                if (cmp > 0)
1269                        new = &((*new)->rb_left);
1270                else
1271                        new = &((*new)->rb_right);
1272        }
1273
1274        rb_link_node(&data->node, parent, new);
1275        rb_insert_color(&data->node, root);
1276}
1277
1278static void __sort_slab_result(struct rb_root *root, struct rb_root *root_sorted,
1279                               struct list_head *sort_list)
1280{
1281        struct rb_node *node;
1282        struct alloc_stat *data;
1283
1284        for (;;) {
1285                node = rb_first(root);
1286                if (!node)
1287                        break;
1288
1289                rb_erase(node, root);
1290                data = rb_entry(node, struct alloc_stat, node);
1291                sort_slab_insert(root_sorted, data, sort_list);
1292        }
1293}
1294
1295static void sort_page_insert(struct rb_root *root, struct page_stat *data,
1296                             struct list_head *sort_list)
1297{
1298        struct rb_node **new = &root->rb_node;
1299        struct rb_node *parent = NULL;
1300        struct sort_dimension *sort;
1301
1302        while (*new) {
1303                struct page_stat *this;
1304                int cmp = 0;
1305
1306                this = rb_entry(*new, struct page_stat, node);
1307                parent = *new;
1308
1309                list_for_each_entry(sort, sort_list, list) {
1310                        cmp = sort->cmp(data, this);
1311                        if (cmp)
1312                                break;
1313                }
1314
1315                if (cmp > 0)
1316                        new = &parent->rb_left;
1317                else
1318                        new = &parent->rb_right;
1319        }
1320
1321        rb_link_node(&data->node, parent, new);
1322        rb_insert_color(&data->node, root);
1323}
1324
1325static void __sort_page_result(struct rb_root *root, struct rb_root *root_sorted,
1326                               struct list_head *sort_list)
1327{
1328        struct rb_node *node;
1329        struct page_stat *data;
1330
1331        for (;;) {
1332                node = rb_first(root);
1333                if (!node)
1334                        break;
1335
1336                rb_erase(node, root);
1337                data = rb_entry(node, struct page_stat, node);
1338                sort_page_insert(root_sorted, data, sort_list);
1339        }
1340}
1341
1342static void sort_result(void)
1343{
1344        if (kmem_slab) {
1345                __sort_slab_result(&root_alloc_stat, &root_alloc_sorted,
1346                                   &slab_alloc_sort);
1347                __sort_slab_result(&root_caller_stat, &root_caller_sorted,
1348                                   &slab_caller_sort);
1349        }
1350        if (kmem_page) {
1351                if (live_page)
1352                        __sort_page_result(&page_live_tree, &page_alloc_sorted,
1353                                           &page_alloc_sort);
1354                else
1355                        __sort_page_result(&page_alloc_tree, &page_alloc_sorted,
1356                                           &page_alloc_sort);
1357
1358                __sort_page_result(&page_caller_tree, &page_caller_sorted,
1359                                   &page_caller_sort);
1360        }
1361}
1362
1363static int __cmd_kmem(struct perf_session *session)
1364{
1365        int err = -EINVAL;
1366        struct perf_evsel *evsel;
1367        const struct perf_evsel_str_handler kmem_tracepoints[] = {
1368                /* slab allocator */
1369                { "kmem:kmalloc",               perf_evsel__process_alloc_event, },
1370                { "kmem:kmem_cache_alloc",      perf_evsel__process_alloc_event, },
1371                { "kmem:kmalloc_node",          perf_evsel__process_alloc_node_event, },
1372                { "kmem:kmem_cache_alloc_node", perf_evsel__process_alloc_node_event, },
1373                { "kmem:kfree",                 perf_evsel__process_free_event, },
1374                { "kmem:kmem_cache_free",       perf_evsel__process_free_event, },
1375                /* page allocator */
1376                { "kmem:mm_page_alloc",         perf_evsel__process_page_alloc_event, },
1377                { "kmem:mm_page_free",          perf_evsel__process_page_free_event, },
1378        };
1379
1380        if (!perf_session__has_traces(session, "kmem record"))
1381                goto out;
1382
1383        if (perf_session__set_tracepoints_handlers(session, kmem_tracepoints)) {
1384                pr_err("Initializing perf session tracepoint handlers failed\n");
1385                goto out;
1386        }
1387
1388        evlist__for_each_entry(session->evlist, evsel) {
1389                if (!strcmp(perf_evsel__name(evsel), "kmem:mm_page_alloc") &&
1390                    perf_evsel__field(evsel, "pfn")) {
1391                        use_pfn = true;
1392                        break;
1393                }
1394        }
1395
1396        setup_pager();
1397        err = perf_session__process_events(session);
1398        if (err != 0) {
1399                pr_err("error during process events: %d\n", err);
1400                goto out;
1401        }
1402        sort_result();
1403        print_result(session);
1404out:
1405        return err;
1406}
1407
1408/* slab sort keys */
1409static int ptr_cmp(void *a, void *b)
1410{
1411        struct alloc_stat *l = a;
1412        struct alloc_stat *r = b;
1413
1414        if (l->ptr < r->ptr)
1415                return -1;
1416        else if (l->ptr > r->ptr)
1417                return 1;
1418        return 0;
1419}
1420
1421static struct sort_dimension ptr_sort_dimension = {
1422        .name   = "ptr",
1423        .cmp    = ptr_cmp,
1424};
1425
1426static int slab_callsite_cmp(void *a, void *b)
1427{
1428        struct alloc_stat *l = a;
1429        struct alloc_stat *r = b;
1430
1431        if (l->call_site < r->call_site)
1432                return -1;
1433        else if (l->call_site > r->call_site)
1434                return 1;
1435        return 0;
1436}
1437
1438static struct sort_dimension callsite_sort_dimension = {
1439        .name   = "callsite",
1440        .cmp    = slab_callsite_cmp,
1441};
1442
1443static int hit_cmp(void *a, void *b)
1444{
1445        struct alloc_stat *l = a;
1446        struct alloc_stat *r = b;
1447
1448        if (l->hit < r->hit)
1449                return -1;
1450        else if (l->hit > r->hit)
1451                return 1;
1452        return 0;
1453}
1454
1455static struct sort_dimension hit_sort_dimension = {
1456        .name   = "hit",
1457        .cmp    = hit_cmp,
1458};
1459
1460static int bytes_cmp(void *a, void *b)
1461{
1462        struct alloc_stat *l = a;
1463        struct alloc_stat *r = b;
1464
1465        if (l->bytes_alloc < r->bytes_alloc)
1466                return -1;
1467        else if (l->bytes_alloc > r->bytes_alloc)
1468                return 1;
1469        return 0;
1470}
1471
1472static struct sort_dimension bytes_sort_dimension = {
1473        .name   = "bytes",
1474        .cmp    = bytes_cmp,
1475};
1476
1477static int frag_cmp(void *a, void *b)
1478{
1479        double x, y;
1480        struct alloc_stat *l = a;
1481        struct alloc_stat *r = b;
1482
1483        x = fragmentation(l->bytes_req, l->bytes_alloc);
1484        y = fragmentation(r->bytes_req, r->bytes_alloc);
1485
1486        if (x < y)
1487                return -1;
1488        else if (x > y)
1489                return 1;
1490        return 0;
1491}
1492
1493static struct sort_dimension frag_sort_dimension = {
1494        .name   = "frag",
1495        .cmp    = frag_cmp,
1496};
1497
1498static int pingpong_cmp(void *a, void *b)
1499{
1500        struct alloc_stat *l = a;
1501        struct alloc_stat *r = b;
1502
1503        if (l->pingpong < r->pingpong)
1504                return -1;
1505        else if (l->pingpong > r->pingpong)
1506                return 1;
1507        return 0;
1508}
1509
1510static struct sort_dimension pingpong_sort_dimension = {
1511        .name   = "pingpong",
1512        .cmp    = pingpong_cmp,
1513};
1514
1515/* page sort keys */
1516static int page_cmp(void *a, void *b)
1517{
1518        struct page_stat *l = a;
1519        struct page_stat *r = b;
1520
1521        if (l->page < r->page)
1522                return -1;
1523        else if (l->page > r->page)
1524                return 1;
1525        return 0;
1526}
1527
1528static struct sort_dimension page_sort_dimension = {
1529        .name   = "page",
1530        .cmp    = page_cmp,
1531};
1532
1533static int page_callsite_cmp(void *a, void *b)
1534{
1535        struct page_stat *l = a;
1536        struct page_stat *r = b;
1537
1538        if (l->callsite < r->callsite)
1539                return -1;
1540        else if (l->callsite > r->callsite)
1541                return 1;
1542        return 0;
1543}
1544
1545static struct sort_dimension page_callsite_sort_dimension = {
1546        .name   = "callsite",
1547        .cmp    = page_callsite_cmp,
1548};
1549
1550static int page_hit_cmp(void *a, void *b)
1551{
1552        struct page_stat *l = a;
1553        struct page_stat *r = b;
1554
1555        if (l->nr_alloc < r->nr_alloc)
1556                return -1;
1557        else if (l->nr_alloc > r->nr_alloc)
1558                return 1;
1559        return 0;
1560}
1561
1562static struct sort_dimension page_hit_sort_dimension = {
1563        .name   = "hit",
1564        .cmp    = page_hit_cmp,
1565};
1566
1567static int page_bytes_cmp(void *a, void *b)
1568{
1569        struct page_stat *l = a;
1570        struct page_stat *r = b;
1571
1572        if (l->alloc_bytes < r->alloc_bytes)
1573                return -1;
1574        else if (l->alloc_bytes > r->alloc_bytes)
1575                return 1;
1576        return 0;
1577}
1578
1579static struct sort_dimension page_bytes_sort_dimension = {
1580        .name   = "bytes",
1581        .cmp    = page_bytes_cmp,
1582};
1583
1584static int page_order_cmp(void *a, void *b)
1585{
1586        struct page_stat *l = a;
1587        struct page_stat *r = b;
1588
1589        if (l->order < r->order)
1590                return -1;
1591        else if (l->order > r->order)
1592                return 1;
1593        return 0;
1594}
1595
1596static struct sort_dimension page_order_sort_dimension = {
1597        .name   = "order",
1598        .cmp    = page_order_cmp,
1599};
1600
1601static int migrate_type_cmp(void *a, void *b)
1602{
1603        struct page_stat *l = a;
1604        struct page_stat *r = b;
1605
1606        /* for internal use to find free'd page */
1607        if (l->migrate_type == -1U)
1608                return 0;
1609
1610        if (l->migrate_type < r->migrate_type)
1611                return -1;
1612        else if (l->migrate_type > r->migrate_type)
1613                return 1;
1614        return 0;
1615}
1616
1617static struct sort_dimension migrate_type_sort_dimension = {
1618        .name   = "migtype",
1619        .cmp    = migrate_type_cmp,
1620};
1621
1622static int gfp_flags_cmp(void *a, void *b)
1623{
1624        struct page_stat *l = a;
1625        struct page_stat *r = b;
1626
1627        /* for internal use to find free'd page */
1628        if (l->gfp_flags == -1U)
1629                return 0;
1630
1631        if (l->gfp_flags < r->gfp_flags)
1632                return -1;
1633        else if (l->gfp_flags > r->gfp_flags)
1634                return 1;
1635        return 0;
1636}
1637
1638static struct sort_dimension gfp_flags_sort_dimension = {
1639        .name   = "gfp",
1640        .cmp    = gfp_flags_cmp,
1641};
1642
1643static struct sort_dimension *slab_sorts[] = {
1644        &ptr_sort_dimension,
1645        &callsite_sort_dimension,
1646        &hit_sort_dimension,
1647        &bytes_sort_dimension,
1648        &frag_sort_dimension,
1649        &pingpong_sort_dimension,
1650};
1651
1652static struct sort_dimension *page_sorts[] = {
1653        &page_sort_dimension,
1654        &page_callsite_sort_dimension,
1655        &page_hit_sort_dimension,
1656        &page_bytes_sort_dimension,
1657        &page_order_sort_dimension,
1658        &migrate_type_sort_dimension,
1659        &gfp_flags_sort_dimension,
1660};
1661
1662static int slab_sort_dimension__add(const char *tok, struct list_head *list)
1663{
1664        struct sort_dimension *sort;
1665        int i;
1666
1667        for (i = 0; i < (int)ARRAY_SIZE(slab_sorts); i++) {
1668                if (!strcmp(slab_sorts[i]->name, tok)) {
1669                        sort = memdup(slab_sorts[i], sizeof(*slab_sorts[i]));
1670                        if (!sort) {
1671                                pr_err("%s: memdup failed\n", __func__);
1672                                return -1;
1673                        }
1674                        list_add_tail(&sort->list, list);
1675                        return 0;
1676                }
1677        }
1678
1679        return -1;
1680}
1681
1682static int page_sort_dimension__add(const char *tok, struct list_head *list)
1683{
1684        struct sort_dimension *sort;
1685        int i;
1686
1687        for (i = 0; i < (int)ARRAY_SIZE(page_sorts); i++) {
1688                if (!strcmp(page_sorts[i]->name, tok)) {
1689                        sort = memdup(page_sorts[i], sizeof(*page_sorts[i]));
1690                        if (!sort) {
1691                                pr_err("%s: memdup failed\n", __func__);
1692                                return -1;
1693                        }
1694                        list_add_tail(&sort->list, list);
1695                        return 0;
1696                }
1697        }
1698
1699        return -1;
1700}
1701
1702static int setup_slab_sorting(struct list_head *sort_list, const char *arg)
1703{
1704        char *tok;
1705        char *str = strdup(arg);
1706        char *pos = str;
1707
1708        if (!str) {
1709                pr_err("%s: strdup failed\n", __func__);
1710                return -1;
1711        }
1712
1713        while (true) {
1714                tok = strsep(&pos, ",");
1715                if (!tok)
1716                        break;
1717                if (slab_sort_dimension__add(tok, sort_list) < 0) {
1718                        pr_err("Unknown slab --sort key: '%s'", tok);
1719                        free(str);
1720                        return -1;
1721                }
1722        }
1723
1724        free(str);
1725        return 0;
1726}
1727
1728static int setup_page_sorting(struct list_head *sort_list, const char *arg)
1729{
1730        char *tok;
1731        char *str = strdup(arg);
1732        char *pos = str;
1733
1734        if (!str) {
1735                pr_err("%s: strdup failed\n", __func__);
1736                return -1;
1737        }
1738
1739        while (true) {
1740                tok = strsep(&pos, ",");
1741                if (!tok)
1742                        break;
1743                if (page_sort_dimension__add(tok, sort_list) < 0) {
1744                        pr_err("Unknown page --sort key: '%s'", tok);
1745                        free(str);
1746                        return -1;
1747                }
1748        }
1749
1750        free(str);
1751        return 0;
1752}
1753
1754static int parse_sort_opt(const struct option *opt __maybe_unused,
1755                          const char *arg, int unset __maybe_unused)
1756{
1757        if (!arg)
1758                return -1;
1759
1760        if (kmem_page > kmem_slab ||
1761            (kmem_page == 0 && kmem_slab == 0 && kmem_default == KMEM_PAGE)) {
1762                if (caller_flag > alloc_flag)
1763                        return setup_page_sorting(&page_caller_sort, arg);
1764                else
1765                        return setup_page_sorting(&page_alloc_sort, arg);
1766        } else {
1767                if (caller_flag > alloc_flag)
1768                        return setup_slab_sorting(&slab_caller_sort, arg);
1769                else
1770                        return setup_slab_sorting(&slab_alloc_sort, arg);
1771        }
1772
1773        return 0;
1774}
1775
1776static int parse_caller_opt(const struct option *opt __maybe_unused,
1777                            const char *arg __maybe_unused,
1778                            int unset __maybe_unused)
1779{
1780        caller_flag = (alloc_flag + 1);
1781        return 0;
1782}
1783
1784static int parse_alloc_opt(const struct option *opt __maybe_unused,
1785                           const char *arg __maybe_unused,
1786                           int unset __maybe_unused)
1787{
1788        alloc_flag = (caller_flag + 1);
1789        return 0;
1790}
1791
1792static int parse_slab_opt(const struct option *opt __maybe_unused,
1793                          const char *arg __maybe_unused,
1794                          int unset __maybe_unused)
1795{
1796        kmem_slab = (kmem_page + 1);
1797        return 0;
1798}
1799
1800static int parse_page_opt(const struct option *opt __maybe_unused,
1801                          const char *arg __maybe_unused,
1802                          int unset __maybe_unused)
1803{
1804        kmem_page = (kmem_slab + 1);
1805        return 0;
1806}
1807
1808static int parse_line_opt(const struct option *opt __maybe_unused,
1809                          const char *arg, int unset __maybe_unused)
1810{
1811        int lines;
1812
1813        if (!arg)
1814                return -1;
1815
1816        lines = strtoul(arg, NULL, 10);
1817
1818        if (caller_flag > alloc_flag)
1819                caller_lines = lines;
1820        else
1821                alloc_lines = lines;
1822
1823        return 0;
1824}
1825
1826static int __cmd_record(int argc, const char **argv)
1827{
1828        const char * const record_args[] = {
1829        "record", "-a", "-R", "-c", "1",
1830        };
1831        const char * const slab_events[] = {
1832        "-e", "kmem:kmalloc",
1833        "-e", "kmem:kmalloc_node",
1834        "-e", "kmem:kfree",
1835        "-e", "kmem:kmem_cache_alloc",
1836        "-e", "kmem:kmem_cache_alloc_node",
1837        "-e", "kmem:kmem_cache_free",
1838        };
1839        const char * const page_events[] = {
1840        "-e", "kmem:mm_page_alloc",
1841        "-e", "kmem:mm_page_free",
1842        };
1843        unsigned int rec_argc, i, j;
1844        const char **rec_argv;
1845
1846        rec_argc = ARRAY_SIZE(record_args) + argc - 1;
1847        if (kmem_slab)
1848                rec_argc += ARRAY_SIZE(slab_events);
1849        if (kmem_page)
1850                rec_argc += ARRAY_SIZE(page_events) + 1; /* for -g */
1851
1852        rec_argv = calloc(rec_argc + 1, sizeof(char *));
1853
1854        if (rec_argv == NULL)
1855                return -ENOMEM;
1856
1857        for (i = 0; i < ARRAY_SIZE(record_args); i++)
1858                rec_argv[i] = strdup(record_args[i]);
1859
1860        if (kmem_slab) {
1861                for (j = 0; j < ARRAY_SIZE(slab_events); j++, i++)
1862                        rec_argv[i] = strdup(slab_events[j]);
1863        }
1864        if (kmem_page) {
1865                rec_argv[i++] = strdup("-g");
1866
1867                for (j = 0; j < ARRAY_SIZE(page_events); j++, i++)
1868                        rec_argv[i] = strdup(page_events[j]);
1869        }
1870
1871        for (j = 1; j < (unsigned int)argc; j++, i++)
1872                rec_argv[i] = argv[j];
1873
1874        return cmd_record(i, rec_argv);
1875}
1876
1877static int kmem_config(const char *var, const char *value, void *cb __maybe_unused)
1878{
1879        if (!strcmp(var, "kmem.default")) {
1880                if (!strcmp(value, "slab"))
1881                        kmem_default = KMEM_SLAB;
1882                else if (!strcmp(value, "page"))
1883                        kmem_default = KMEM_PAGE;
1884                else
1885                        pr_err("invalid default value ('slab' or 'page' required): %s\n",
1886                               value);
1887                return 0;
1888        }
1889
1890        return 0;
1891}
1892
1893int cmd_kmem(int argc, const char **argv)
1894{
1895        const char * const default_slab_sort = "frag,hit,bytes";
1896        const char * const default_page_sort = "bytes,hit";
1897        struct perf_data_file file = {
1898                .mode = PERF_DATA_MODE_READ,
1899        };
1900        const struct option kmem_options[] = {
1901        OPT_STRING('i', "input", &input_name, "file", "input file name"),
1902        OPT_INCR('v', "verbose", &verbose,
1903                    "be more verbose (show symbol address, etc)"),
1904        OPT_CALLBACK_NOOPT(0, "caller", NULL, NULL,
1905                           "show per-callsite statistics", parse_caller_opt),
1906        OPT_CALLBACK_NOOPT(0, "alloc", NULL, NULL,
1907                           "show per-allocation statistics", parse_alloc_opt),
1908        OPT_CALLBACK('s', "sort", NULL, "key[,key2...]",
1909                     "sort by keys: ptr, callsite, bytes, hit, pingpong, frag, "
1910                     "page, order, migtype, gfp", parse_sort_opt),
1911        OPT_CALLBACK('l', "line", NULL, "num", "show n lines", parse_line_opt),
1912        OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"),
1913        OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"),
1914        OPT_CALLBACK_NOOPT(0, "slab", NULL, NULL, "Analyze slab allocator",
1915                           parse_slab_opt),
1916        OPT_CALLBACK_NOOPT(0, "page", NULL, NULL, "Analyze page allocator",
1917                           parse_page_opt),
1918        OPT_BOOLEAN(0, "live", &live_page, "Show live page stat"),
1919        OPT_STRING(0, "time", &time_str, "str",
1920                   "Time span of interest (start,stop)"),
1921        OPT_END()
1922        };
1923        const char *const kmem_subcommands[] = { "record", "stat", NULL };
1924        const char *kmem_usage[] = {
1925                NULL,
1926                NULL
1927        };
1928        struct perf_session *session;
1929        const char errmsg[] = "No %s allocation events found.  Have you run 'perf kmem record --%s'?\n";
1930        int ret = perf_config(kmem_config, NULL);
1931
1932        if (ret)
1933                return ret;
1934
1935        argc = parse_options_subcommand(argc, argv, kmem_options,
1936                                        kmem_subcommands, kmem_usage, 0);
1937
1938        if (!argc)
1939                usage_with_options(kmem_usage, kmem_options);
1940
1941        if (kmem_slab == 0 && kmem_page == 0) {
1942                if (kmem_default == KMEM_SLAB)
1943                        kmem_slab = 1;
1944                else
1945                        kmem_page = 1;
1946        }
1947
1948        if (!strncmp(argv[0], "rec", 3)) {
1949                symbol__init(NULL);
1950                return __cmd_record(argc, argv);
1951        }
1952
1953        file.path = input_name;
1954
1955        kmem_session = session = perf_session__new(&file, false, &perf_kmem);
1956        if (session == NULL)
1957                return -1;
1958
1959        ret = -1;
1960
1961        if (kmem_slab) {
1962                if (!perf_evlist__find_tracepoint_by_name(session->evlist,
1963                                                          "kmem:kmalloc")) {
1964                        pr_err(errmsg, "slab", "slab");
1965                        goto out_delete;
1966                }
1967        }
1968
1969        if (kmem_page) {
1970                struct perf_evsel *evsel;
1971
1972                evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
1973                                                             "kmem:mm_page_alloc");
1974                if (evsel == NULL) {
1975                        pr_err(errmsg, "page", "page");
1976                        goto out_delete;
1977                }
1978
1979                kmem_page_size = pevent_get_page_size(evsel->tp_format->pevent);
1980                symbol_conf.use_callchain = true;
1981        }
1982
1983        symbol__init(&session->header.env);
1984
1985        if (perf_time__parse_str(&ptime, time_str) != 0) {
1986                pr_err("Invalid time string\n");
1987                return -EINVAL;
1988        }
1989
1990        if (!strcmp(argv[0], "stat")) {
1991                setlocale(LC_ALL, "");
1992
1993                if (cpu__setup_cpunode_map())
1994                        goto out_delete;
1995
1996                if (list_empty(&slab_caller_sort))
1997                        setup_slab_sorting(&slab_caller_sort, default_slab_sort);
1998                if (list_empty(&slab_alloc_sort))
1999                        setup_slab_sorting(&slab_alloc_sort, default_slab_sort);
2000                if (list_empty(&page_caller_sort))
2001                        setup_page_sorting(&page_caller_sort, default_page_sort);
2002                if (list_empty(&page_alloc_sort))
2003                        setup_page_sorting(&page_alloc_sort, default_page_sort);
2004
2005                if (kmem_page) {
2006                        setup_page_sorting(&page_alloc_sort_input,
2007                                           "page,order,migtype,gfp");
2008                        setup_page_sorting(&page_caller_sort_input,
2009                                           "callsite,order,migtype,gfp");
2010                }
2011                ret = __cmd_kmem(session);
2012        } else
2013                usage_with_options(kmem_usage, kmem_options);
2014
2015out_delete:
2016        perf_session__delete(session);
2017
2018        return ret;
2019}
2020
2021