linux/tools/perf/builtin-kmem.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2#include "builtin.h"
   3#include "perf.h"
   4
   5#include "util/evlist.h"
   6#include "util/evsel.h"
   7#include "util/util.h"
   8#include "util/config.h"
   9#include "util/symbol.h"
  10#include "util/thread.h"
  11#include "util/header.h"
  12#include "util/session.h"
  13#include "util/tool.h"
  14#include "util/callchain.h"
  15#include "util/time-utils.h"
  16
  17#include <subcmd/parse-options.h>
  18#include "util/trace-event.h"
  19#include "util/data.h"
  20#include "util/cpumap.h"
  21
  22#include "util/debug.h"
  23
  24#include <linux/kernel.h>
  25#include <linux/rbtree.h>
  26#include <linux/string.h>
  27#include <errno.h>
  28#include <inttypes.h>
  29#include <locale.h>
  30#include <regex.h>
  31
  32#include "sane_ctype.h"
  33
  34static int      kmem_slab;
  35static int      kmem_page;
  36
  37static long     kmem_page_size;
  38static enum {
  39        KMEM_SLAB,
  40        KMEM_PAGE,
  41} kmem_default = KMEM_SLAB;  /* for backward compatibility */
  42
  43struct alloc_stat;
  44typedef int (*sort_fn_t)(void *, void *);
  45
  46static int                      alloc_flag;
  47static int                      caller_flag;
  48
  49static int                      alloc_lines = -1;
  50static int                      caller_lines = -1;
  51
  52static bool                     raw_ip;
  53
  54struct alloc_stat {
  55        u64     call_site;
  56        u64     ptr;
  57        u64     bytes_req;
  58        u64     bytes_alloc;
  59        u64     last_alloc;
  60        u32     hit;
  61        u32     pingpong;
  62
  63        short   alloc_cpu;
  64
  65        struct rb_node node;
  66};
  67
  68static struct rb_root root_alloc_stat;
  69static struct rb_root root_alloc_sorted;
  70static struct rb_root root_caller_stat;
  71static struct rb_root root_caller_sorted;
  72
  73static unsigned long total_requested, total_allocated, total_freed;
  74static unsigned long nr_allocs, nr_cross_allocs;
  75
  76/* filters for controlling start and stop of time of analysis */
  77static struct perf_time_interval ptime;
  78const char *time_str;
  79
  80static int insert_alloc_stat(unsigned long call_site, unsigned long ptr,
  81                             int bytes_req, int bytes_alloc, int cpu)
  82{
  83        struct rb_node **node = &root_alloc_stat.rb_node;
  84        struct rb_node *parent = NULL;
  85        struct alloc_stat *data = NULL;
  86
  87        while (*node) {
  88                parent = *node;
  89                data = rb_entry(*node, struct alloc_stat, node);
  90
  91                if (ptr > data->ptr)
  92                        node = &(*node)->rb_right;
  93                else if (ptr < data->ptr)
  94                        node = &(*node)->rb_left;
  95                else
  96                        break;
  97        }
  98
  99        if (data && data->ptr == ptr) {
 100                data->hit++;
 101                data->bytes_req += bytes_req;
 102                data->bytes_alloc += bytes_alloc;
 103        } else {
 104                data = malloc(sizeof(*data));
 105                if (!data) {
 106                        pr_err("%s: malloc failed\n", __func__);
 107                        return -1;
 108                }
 109                data->ptr = ptr;
 110                data->pingpong = 0;
 111                data->hit = 1;
 112                data->bytes_req = bytes_req;
 113                data->bytes_alloc = bytes_alloc;
 114
 115                rb_link_node(&data->node, parent, node);
 116                rb_insert_color(&data->node, &root_alloc_stat);
 117        }
 118        data->call_site = call_site;
 119        data->alloc_cpu = cpu;
 120        data->last_alloc = bytes_alloc;
 121
 122        return 0;
 123}
 124
 125static int insert_caller_stat(unsigned long call_site,
 126                              int bytes_req, int bytes_alloc)
 127{
 128        struct rb_node **node = &root_caller_stat.rb_node;
 129        struct rb_node *parent = NULL;
 130        struct alloc_stat *data = NULL;
 131
 132        while (*node) {
 133                parent = *node;
 134                data = rb_entry(*node, struct alloc_stat, node);
 135
 136                if (call_site > data->call_site)
 137                        node = &(*node)->rb_right;
 138                else if (call_site < data->call_site)
 139                        node = &(*node)->rb_left;
 140                else
 141                        break;
 142        }
 143
 144        if (data && data->call_site == call_site) {
 145                data->hit++;
 146                data->bytes_req += bytes_req;
 147                data->bytes_alloc += bytes_alloc;
 148        } else {
 149                data = malloc(sizeof(*data));
 150                if (!data) {
 151                        pr_err("%s: malloc failed\n", __func__);
 152                        return -1;
 153                }
 154                data->call_site = call_site;
 155                data->pingpong = 0;
 156                data->hit = 1;
 157                data->bytes_req = bytes_req;
 158                data->bytes_alloc = bytes_alloc;
 159
 160                rb_link_node(&data->node, parent, node);
 161                rb_insert_color(&data->node, &root_caller_stat);
 162        }
 163
 164        return 0;
 165}
 166
 167static int perf_evsel__process_alloc_event(struct perf_evsel *evsel,
 168                                           struct perf_sample *sample)
 169{
 170        unsigned long ptr = perf_evsel__intval(evsel, sample, "ptr"),
 171                      call_site = perf_evsel__intval(evsel, sample, "call_site");
 172        int bytes_req = perf_evsel__intval(evsel, sample, "bytes_req"),
 173            bytes_alloc = perf_evsel__intval(evsel, sample, "bytes_alloc");
 174
 175        if (insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, sample->cpu) ||
 176            insert_caller_stat(call_site, bytes_req, bytes_alloc))
 177                return -1;
 178
 179        total_requested += bytes_req;
 180        total_allocated += bytes_alloc;
 181
 182        nr_allocs++;
 183        return 0;
 184}
 185
 186static int perf_evsel__process_alloc_node_event(struct perf_evsel *evsel,
 187                                                struct perf_sample *sample)
 188{
 189        int ret = perf_evsel__process_alloc_event(evsel, sample);
 190
 191        if (!ret) {
 192                int node1 = cpu__get_node(sample->cpu),
 193                    node2 = perf_evsel__intval(evsel, sample, "node");
 194
 195                if (node1 != node2)
 196                        nr_cross_allocs++;
 197        }
 198
 199        return ret;
 200}
 201
 202static int ptr_cmp(void *, void *);
 203static int slab_callsite_cmp(void *, void *);
 204
 205static struct alloc_stat *search_alloc_stat(unsigned long ptr,
 206                                            unsigned long call_site,
 207                                            struct rb_root *root,
 208                                            sort_fn_t sort_fn)
 209{
 210        struct rb_node *node = root->rb_node;
 211        struct alloc_stat key = { .ptr = ptr, .call_site = call_site };
 212
 213        while (node) {
 214                struct alloc_stat *data;
 215                int cmp;
 216
 217                data = rb_entry(node, struct alloc_stat, node);
 218
 219                cmp = sort_fn(&key, data);
 220                if (cmp < 0)
 221                        node = node->rb_left;
 222                else if (cmp > 0)
 223                        node = node->rb_right;
 224                else
 225                        return data;
 226        }
 227        return NULL;
 228}
 229
 230static int perf_evsel__process_free_event(struct perf_evsel *evsel,
 231                                          struct perf_sample *sample)
 232{
 233        unsigned long ptr = perf_evsel__intval(evsel, sample, "ptr");
 234        struct alloc_stat *s_alloc, *s_caller;
 235
 236        s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp);
 237        if (!s_alloc)
 238                return 0;
 239
 240        total_freed += s_alloc->last_alloc;
 241
 242        if ((short)sample->cpu != s_alloc->alloc_cpu) {
 243                s_alloc->pingpong++;
 244
 245                s_caller = search_alloc_stat(0, s_alloc->call_site,
 246                                             &root_caller_stat,
 247                                             slab_callsite_cmp);
 248                if (!s_caller)
 249                        return -1;
 250                s_caller->pingpong++;
 251        }
 252        s_alloc->alloc_cpu = -1;
 253
 254        return 0;
 255}
 256
 257static u64 total_page_alloc_bytes;
 258static u64 total_page_free_bytes;
 259static u64 total_page_nomatch_bytes;
 260static u64 total_page_fail_bytes;
 261static unsigned long nr_page_allocs;
 262static unsigned long nr_page_frees;
 263static unsigned long nr_page_fails;
 264static unsigned long nr_page_nomatch;
 265
 266static bool use_pfn;
 267static bool live_page;
 268static struct perf_session *kmem_session;
 269
 270#define MAX_MIGRATE_TYPES  6
 271#define MAX_PAGE_ORDER     11
 272
 273static int order_stats[MAX_PAGE_ORDER][MAX_MIGRATE_TYPES];
 274
 275struct page_stat {
 276        struct rb_node  node;
 277        u64             page;
 278        u64             callsite;
 279        int             order;
 280        unsigned        gfp_flags;
 281        unsigned        migrate_type;
 282        u64             alloc_bytes;
 283        u64             free_bytes;
 284        int             nr_alloc;
 285        int             nr_free;
 286};
 287
 288static struct rb_root page_live_tree;
 289static struct rb_root page_alloc_tree;
 290static struct rb_root page_alloc_sorted;
 291static struct rb_root page_caller_tree;
 292static struct rb_root page_caller_sorted;
 293
 294struct alloc_func {
 295        u64 start;
 296        u64 end;
 297        char *name;
 298};
 299
 300static int nr_alloc_funcs;
 301static struct alloc_func *alloc_func_list;
 302
 303static int funcmp(const void *a, const void *b)
 304{
 305        const struct alloc_func *fa = a;
 306        const struct alloc_func *fb = b;
 307
 308        if (fa->start > fb->start)
 309                return 1;
 310        else
 311                return -1;
 312}
 313
 314static int callcmp(const void *a, const void *b)
 315{
 316        const struct alloc_func *fa = a;
 317        const struct alloc_func *fb = b;
 318
 319        if (fb->start <= fa->start && fa->end < fb->end)
 320                return 0;
 321
 322        if (fa->start > fb->start)
 323                return 1;
 324        else
 325                return -1;
 326}
 327
 328static int build_alloc_func_list(void)
 329{
 330        int ret;
 331        struct map *kernel_map;
 332        struct symbol *sym;
 333        struct rb_node *node;
 334        struct alloc_func *func;
 335        struct machine *machine = &kmem_session->machines.host;
 336        regex_t alloc_func_regex;
 337        const char pattern[] = "^_?_?(alloc|get_free|get_zeroed)_pages?";
 338
 339        ret = regcomp(&alloc_func_regex, pattern, REG_EXTENDED);
 340        if (ret) {
 341                char err[BUFSIZ];
 342
 343                regerror(ret, &alloc_func_regex, err, sizeof(err));
 344                pr_err("Invalid regex: %s\n%s", pattern, err);
 345                return -EINVAL;
 346        }
 347
 348        kernel_map = machine__kernel_map(machine);
 349        if (map__load(kernel_map) < 0) {
 350                pr_err("cannot load kernel map\n");
 351                return -ENOENT;
 352        }
 353
 354        map__for_each_symbol(kernel_map, sym, node) {
 355                if (regexec(&alloc_func_regex, sym->name, 0, NULL, 0))
 356                        continue;
 357
 358                func = realloc(alloc_func_list,
 359                               (nr_alloc_funcs + 1) * sizeof(*func));
 360                if (func == NULL)
 361                        return -ENOMEM;
 362
 363                pr_debug("alloc func: %s\n", sym->name);
 364                func[nr_alloc_funcs].start = sym->start;
 365                func[nr_alloc_funcs].end   = sym->end;
 366                func[nr_alloc_funcs].name  = sym->name;
 367
 368                alloc_func_list = func;
 369                nr_alloc_funcs++;
 370        }
 371
 372        qsort(alloc_func_list, nr_alloc_funcs, sizeof(*func), funcmp);
 373
 374        regfree(&alloc_func_regex);
 375        return 0;
 376}
 377
 378/*
 379 * Find first non-memory allocation function from callchain.
 380 * The allocation functions are in the 'alloc_func_list'.
 381 */
 382static u64 find_callsite(struct perf_evsel *evsel, struct perf_sample *sample)
 383{
 384        struct addr_location al;
 385        struct machine *machine = &kmem_session->machines.host;
 386        struct callchain_cursor_node *node;
 387
 388        if (alloc_func_list == NULL) {
 389                if (build_alloc_func_list() < 0)
 390                        goto out;
 391        }
 392
 393        al.thread = machine__findnew_thread(machine, sample->pid, sample->tid);
 394        sample__resolve_callchain(sample, &callchain_cursor, NULL, evsel, &al, 16);
 395
 396        callchain_cursor_commit(&callchain_cursor);
 397        while (true) {
 398                struct alloc_func key, *caller;
 399                u64 addr;
 400
 401                node = callchain_cursor_current(&callchain_cursor);
 402                if (node == NULL)
 403                        break;
 404
 405                key.start = key.end = node->ip;
 406                caller = bsearch(&key, alloc_func_list, nr_alloc_funcs,
 407                                 sizeof(key), callcmp);
 408                if (!caller) {
 409                        /* found */
 410                        if (node->map)
 411                                addr = map__unmap_ip(node->map, node->ip);
 412                        else
 413                                addr = node->ip;
 414
 415                        return addr;
 416                } else
 417                        pr_debug3("skipping alloc function: %s\n", caller->name);
 418
 419                callchain_cursor_advance(&callchain_cursor);
 420        }
 421
 422out:
 423        pr_debug2("unknown callsite: %"PRIx64 "\n", sample->ip);
 424        return sample->ip;
 425}
 426
 427struct sort_dimension {
 428        const char              name[20];
 429        sort_fn_t               cmp;
 430        struct list_head        list;
 431};
 432
 433static LIST_HEAD(page_alloc_sort_input);
 434static LIST_HEAD(page_caller_sort_input);
 435
 436static struct page_stat *
 437__page_stat__findnew_page(struct page_stat *pstat, bool create)
 438{
 439        struct rb_node **node = &page_live_tree.rb_node;
 440        struct rb_node *parent = NULL;
 441        struct page_stat *data;
 442
 443        while (*node) {
 444                s64 cmp;
 445
 446                parent = *node;
 447                data = rb_entry(*node, struct page_stat, node);
 448
 449                cmp = data->page - pstat->page;
 450                if (cmp < 0)
 451                        node = &parent->rb_left;
 452                else if (cmp > 0)
 453                        node = &parent->rb_right;
 454                else
 455                        return data;
 456        }
 457
 458        if (!create)
 459                return NULL;
 460
 461        data = zalloc(sizeof(*data));
 462        if (data != NULL) {
 463                data->page = pstat->page;
 464                data->order = pstat->order;
 465                data->gfp_flags = pstat->gfp_flags;
 466                data->migrate_type = pstat->migrate_type;
 467
 468                rb_link_node(&data->node, parent, node);
 469                rb_insert_color(&data->node, &page_live_tree);
 470        }
 471
 472        return data;
 473}
 474
 475static struct page_stat *page_stat__find_page(struct page_stat *pstat)
 476{
 477        return __page_stat__findnew_page(pstat, false);
 478}
 479
 480static struct page_stat *page_stat__findnew_page(struct page_stat *pstat)
 481{
 482        return __page_stat__findnew_page(pstat, true);
 483}
 484
 485static struct page_stat *
 486__page_stat__findnew_alloc(struct page_stat *pstat, bool create)
 487{
 488        struct rb_node **node = &page_alloc_tree.rb_node;
 489        struct rb_node *parent = NULL;
 490        struct page_stat *data;
 491        struct sort_dimension *sort;
 492
 493        while (*node) {
 494                int cmp = 0;
 495
 496                parent = *node;
 497                data = rb_entry(*node, struct page_stat, node);
 498
 499                list_for_each_entry(sort, &page_alloc_sort_input, list) {
 500                        cmp = sort->cmp(pstat, data);
 501                        if (cmp)
 502                                break;
 503                }
 504
 505                if (cmp < 0)
 506                        node = &parent->rb_left;
 507                else if (cmp > 0)
 508                        node = &parent->rb_right;
 509                else
 510                        return data;
 511        }
 512
 513        if (!create)
 514                return NULL;
 515
 516        data = zalloc(sizeof(*data));
 517        if (data != NULL) {
 518                data->page = pstat->page;
 519                data->order = pstat->order;
 520                data->gfp_flags = pstat->gfp_flags;
 521                data->migrate_type = pstat->migrate_type;
 522
 523                rb_link_node(&data->node, parent, node);
 524                rb_insert_color(&data->node, &page_alloc_tree);
 525        }
 526
 527        return data;
 528}
 529
 530static struct page_stat *page_stat__find_alloc(struct page_stat *pstat)
 531{
 532        return __page_stat__findnew_alloc(pstat, false);
 533}
 534
 535static struct page_stat *page_stat__findnew_alloc(struct page_stat *pstat)
 536{
 537        return __page_stat__findnew_alloc(pstat, true);
 538}
 539
 540static struct page_stat *
 541__page_stat__findnew_caller(struct page_stat *pstat, bool create)
 542{
 543        struct rb_node **node = &page_caller_tree.rb_node;
 544        struct rb_node *parent = NULL;
 545        struct page_stat *data;
 546        struct sort_dimension *sort;
 547
 548        while (*node) {
 549                int cmp = 0;
 550
 551                parent = *node;
 552                data = rb_entry(*node, struct page_stat, node);
 553
 554                list_for_each_entry(sort, &page_caller_sort_input, list) {
 555                        cmp = sort->cmp(pstat, data);
 556                        if (cmp)
 557                                break;
 558                }
 559
 560                if (cmp < 0)
 561                        node = &parent->rb_left;
 562                else if (cmp > 0)
 563                        node = &parent->rb_right;
 564                else
 565                        return data;
 566        }
 567
 568        if (!create)
 569                return NULL;
 570
 571        data = zalloc(sizeof(*data));
 572        if (data != NULL) {
 573                data->callsite = pstat->callsite;
 574                data->order = pstat->order;
 575                data->gfp_flags = pstat->gfp_flags;
 576                data->migrate_type = pstat->migrate_type;
 577
 578                rb_link_node(&data->node, parent, node);
 579                rb_insert_color(&data->node, &page_caller_tree);
 580        }
 581
 582        return data;
 583}
 584
 585static struct page_stat *page_stat__find_caller(struct page_stat *pstat)
 586{
 587        return __page_stat__findnew_caller(pstat, false);
 588}
 589
 590static struct page_stat *page_stat__findnew_caller(struct page_stat *pstat)
 591{
 592        return __page_stat__findnew_caller(pstat, true);
 593}
 594
 595static bool valid_page(u64 pfn_or_page)
 596{
 597        if (use_pfn && pfn_or_page == -1UL)
 598                return false;
 599        if (!use_pfn && pfn_or_page == 0)
 600                return false;
 601        return true;
 602}
 603
 604struct gfp_flag {
 605        unsigned int flags;
 606        char *compact_str;
 607        char *human_readable;
 608};
 609
 610static struct gfp_flag *gfps;
 611static int nr_gfps;
 612
 613static int gfpcmp(const void *a, const void *b)
 614{
 615        const struct gfp_flag *fa = a;
 616        const struct gfp_flag *fb = b;
 617
 618        return fa->flags - fb->flags;
 619}
 620
 621/* see include/trace/events/mmflags.h */
 622static const struct {
 623        const char *original;
 624        const char *compact;
 625} gfp_compact_table[] = {
 626        { "GFP_TRANSHUGE",              "THP" },
 627        { "GFP_TRANSHUGE_LIGHT",        "THL" },
 628        { "GFP_HIGHUSER_MOVABLE",       "HUM" },
 629        { "GFP_HIGHUSER",               "HU" },
 630        { "GFP_USER",                   "U" },
 631        { "GFP_KERNEL_ACCOUNT",         "KAC" },
 632        { "GFP_KERNEL",                 "K" },
 633        { "GFP_NOFS",                   "NF" },
 634        { "GFP_ATOMIC",                 "A" },
 635        { "GFP_NOIO",                   "NI" },
 636        { "GFP_NOWAIT",                 "NW" },
 637        { "GFP_DMA",                    "D" },
 638        { "__GFP_HIGHMEM",              "HM" },
 639        { "GFP_DMA32",                  "D32" },
 640        { "__GFP_HIGH",                 "H" },
 641        { "__GFP_ATOMIC",               "_A" },
 642        { "__GFP_IO",                   "I" },
 643        { "__GFP_FS",                   "F" },
 644        { "__GFP_NOWARN",               "NWR" },
 645        { "__GFP_RETRY_MAYFAIL",        "R" },
 646        { "__GFP_NOFAIL",               "NF" },
 647        { "__GFP_NORETRY",              "NR" },
 648        { "__GFP_COMP",                 "C" },
 649        { "__GFP_ZERO",                 "Z" },
 650        { "__GFP_NOMEMALLOC",           "NMA" },
 651        { "__GFP_MEMALLOC",             "MA" },
 652        { "__GFP_HARDWALL",             "HW" },
 653        { "__GFP_THISNODE",             "TN" },
 654        { "__GFP_RECLAIMABLE",          "RC" },
 655        { "__GFP_MOVABLE",              "M" },
 656        { "__GFP_ACCOUNT",              "AC" },
 657        { "__GFP_WRITE",                "WR" },
 658        { "__GFP_RECLAIM",              "R" },
 659        { "__GFP_DIRECT_RECLAIM",       "DR" },
 660        { "__GFP_KSWAPD_RECLAIM",       "KR" },
 661};
 662
 663static size_t max_gfp_len;
 664
 665static char *compact_gfp_flags(char *gfp_flags)
 666{
 667        char *orig_flags = strdup(gfp_flags);
 668        char *new_flags = NULL;
 669        char *str, *pos = NULL;
 670        size_t len = 0;
 671
 672        if (orig_flags == NULL)
 673                return NULL;
 674
 675        str = strtok_r(orig_flags, "|", &pos);
 676        while (str) {
 677                size_t i;
 678                char *new;
 679                const char *cpt;
 680
 681                for (i = 0; i < ARRAY_SIZE(gfp_compact_table); i++) {
 682                        if (strcmp(gfp_compact_table[i].original, str))
 683                                continue;
 684
 685                        cpt = gfp_compact_table[i].compact;
 686                        new = realloc(new_flags, len + strlen(cpt) + 2);
 687                        if (new == NULL) {
 688                                free(new_flags);
 689                                return NULL;
 690                        }
 691
 692                        new_flags = new;
 693
 694                        if (!len) {
 695                                strcpy(new_flags, cpt);
 696                        } else {
 697                                strcat(new_flags, "|");
 698                                strcat(new_flags, cpt);
 699                                len++;
 700                        }
 701
 702                        len += strlen(cpt);
 703                }
 704
 705                str = strtok_r(NULL, "|", &pos);
 706        }
 707
 708        if (max_gfp_len < len)
 709                max_gfp_len = len;
 710
 711        free(orig_flags);
 712        return new_flags;
 713}
 714
 715static char *compact_gfp_string(unsigned long gfp_flags)
 716{
 717        struct gfp_flag key = {
 718                .flags = gfp_flags,
 719        };
 720        struct gfp_flag *gfp;
 721
 722        gfp = bsearch(&key, gfps, nr_gfps, sizeof(*gfps), gfpcmp);
 723        if (gfp)
 724                return gfp->compact_str;
 725
 726        return NULL;
 727}
 728
 729static int parse_gfp_flags(struct perf_evsel *evsel, struct perf_sample *sample,
 730                           unsigned int gfp_flags)
 731{
 732        struct pevent_record record = {
 733                .cpu = sample->cpu,
 734                .data = sample->raw_data,
 735                .size = sample->raw_size,
 736        };
 737        struct trace_seq seq;
 738        char *str, *pos = NULL;
 739
 740        if (nr_gfps) {
 741                struct gfp_flag key = {
 742                        .flags = gfp_flags,
 743                };
 744
 745                if (bsearch(&key, gfps, nr_gfps, sizeof(*gfps), gfpcmp))
 746                        return 0;
 747        }
 748
 749        trace_seq_init(&seq);
 750        pevent_event_info(&seq, evsel->tp_format, &record);
 751
 752        str = strtok_r(seq.buffer, " ", &pos);
 753        while (str) {
 754                if (!strncmp(str, "gfp_flags=", 10)) {
 755                        struct gfp_flag *new;
 756
 757                        new = realloc(gfps, (nr_gfps + 1) * sizeof(*gfps));
 758                        if (new == NULL)
 759                                return -ENOMEM;
 760
 761                        gfps = new;
 762                        new += nr_gfps++;
 763
 764                        new->flags = gfp_flags;
 765                        new->human_readable = strdup(str + 10);
 766                        new->compact_str = compact_gfp_flags(str + 10);
 767                        if (!new->human_readable || !new->compact_str)
 768                                return -ENOMEM;
 769
 770                        qsort(gfps, nr_gfps, sizeof(*gfps), gfpcmp);
 771                }
 772
 773                str = strtok_r(NULL, " ", &pos);
 774        }
 775
 776        trace_seq_destroy(&seq);
 777        return 0;
 778}
 779
 780static int perf_evsel__process_page_alloc_event(struct perf_evsel *evsel,
 781                                                struct perf_sample *sample)
 782{
 783        u64 page;
 784        unsigned int order = perf_evsel__intval(evsel, sample, "order");
 785        unsigned int gfp_flags = perf_evsel__intval(evsel, sample, "gfp_flags");
 786        unsigned int migrate_type = perf_evsel__intval(evsel, sample,
 787                                                       "migratetype");
 788        u64 bytes = kmem_page_size << order;
 789        u64 callsite;
 790        struct page_stat *pstat;
 791        struct page_stat this = {
 792                .order = order,
 793                .gfp_flags = gfp_flags,
 794                .migrate_type = migrate_type,
 795        };
 796
 797        if (use_pfn)
 798                page = perf_evsel__intval(evsel, sample, "pfn");
 799        else
 800                page = perf_evsel__intval(evsel, sample, "page");
 801
 802        nr_page_allocs++;
 803        total_page_alloc_bytes += bytes;
 804
 805        if (!valid_page(page)) {
 806                nr_page_fails++;
 807                total_page_fail_bytes += bytes;
 808
 809                return 0;
 810        }
 811
 812        if (parse_gfp_flags(evsel, sample, gfp_flags) < 0)
 813                return -1;
 814
 815        callsite = find_callsite(evsel, sample);
 816
 817        /*
 818         * This is to find the current page (with correct gfp flags and
 819         * migrate type) at free event.
 820         */
 821        this.page = page;
 822        pstat = page_stat__findnew_page(&this);
 823        if (pstat == NULL)
 824                return -ENOMEM;
 825
 826        pstat->nr_alloc++;
 827        pstat->alloc_bytes += bytes;
 828        pstat->callsite = callsite;
 829
 830        if (!live_page) {
 831                pstat = page_stat__findnew_alloc(&this);
 832                if (pstat == NULL)
 833                        return -ENOMEM;
 834
 835                pstat->nr_alloc++;
 836                pstat->alloc_bytes += bytes;
 837                pstat->callsite = callsite;
 838        }
 839
 840        this.callsite = callsite;
 841        pstat = page_stat__findnew_caller(&this);
 842        if (pstat == NULL)
 843                return -ENOMEM;
 844
 845        pstat->nr_alloc++;
 846        pstat->alloc_bytes += bytes;
 847
 848        order_stats[order][migrate_type]++;
 849
 850        return 0;
 851}
 852
 853static int perf_evsel__process_page_free_event(struct perf_evsel *evsel,
 854                                                struct perf_sample *sample)
 855{
 856        u64 page;
 857        unsigned int order = perf_evsel__intval(evsel, sample, "order");
 858        u64 bytes = kmem_page_size << order;
 859        struct page_stat *pstat;
 860        struct page_stat this = {
 861                .order = order,
 862        };
 863
 864        if (use_pfn)
 865                page = perf_evsel__intval(evsel, sample, "pfn");
 866        else
 867                page = perf_evsel__intval(evsel, sample, "page");
 868
 869        nr_page_frees++;
 870        total_page_free_bytes += bytes;
 871
 872        this.page = page;
 873        pstat = page_stat__find_page(&this);
 874        if (pstat == NULL) {
 875                pr_debug2("missing free at page %"PRIx64" (order: %d)\n",
 876                          page, order);
 877
 878                nr_page_nomatch++;
 879                total_page_nomatch_bytes += bytes;
 880
 881                return 0;
 882        }
 883
 884        this.gfp_flags = pstat->gfp_flags;
 885        this.migrate_type = pstat->migrate_type;
 886        this.callsite = pstat->callsite;
 887
 888        rb_erase(&pstat->node, &page_live_tree);
 889        free(pstat);
 890
 891        if (live_page) {
 892                order_stats[this.order][this.migrate_type]--;
 893        } else {
 894                pstat = page_stat__find_alloc(&this);
 895                if (pstat == NULL)
 896                        return -ENOMEM;
 897
 898                pstat->nr_free++;
 899                pstat->free_bytes += bytes;
 900        }
 901
 902        pstat = page_stat__find_caller(&this);
 903        if (pstat == NULL)
 904                return -ENOENT;
 905
 906        pstat->nr_free++;
 907        pstat->free_bytes += bytes;
 908
 909        if (live_page) {
 910                pstat->nr_alloc--;
 911                pstat->alloc_bytes -= bytes;
 912
 913                if (pstat->nr_alloc == 0) {
 914                        rb_erase(&pstat->node, &page_caller_tree);
 915                        free(pstat);
 916                }
 917        }
 918
 919        return 0;
 920}
 921
 922static bool perf_kmem__skip_sample(struct perf_sample *sample)
 923{
 924        /* skip sample based on time? */
 925        if (perf_time__skip_sample(&ptime, sample->time))
 926                return true;
 927
 928        return false;
 929}
 930
 931typedef int (*tracepoint_handler)(struct perf_evsel *evsel,
 932                                  struct perf_sample *sample);
 933
 934static int process_sample_event(struct perf_tool *tool __maybe_unused,
 935                                union perf_event *event,
 936                                struct perf_sample *sample,
 937                                struct perf_evsel *evsel,
 938                                struct machine *machine)
 939{
 940        int err = 0;
 941        struct thread *thread = machine__findnew_thread(machine, sample->pid,
 942                                                        sample->tid);
 943
 944        if (thread == NULL) {
 945                pr_debug("problem processing %d event, skipping it.\n",
 946                         event->header.type);
 947                return -1;
 948        }
 949
 950        if (perf_kmem__skip_sample(sample))
 951                return 0;
 952
 953        dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid);
 954
 955        if (evsel->handler != NULL) {
 956                tracepoint_handler f = evsel->handler;
 957                err = f(evsel, sample);
 958        }
 959
 960        thread__put(thread);
 961
 962        return err;
 963}
 964
 965static struct perf_tool perf_kmem = {
 966        .sample          = process_sample_event,
 967        .comm            = perf_event__process_comm,
 968        .mmap            = perf_event__process_mmap,
 969        .mmap2           = perf_event__process_mmap2,
 970        .namespaces      = perf_event__process_namespaces,
 971        .ordered_events  = true,
 972};
 973
 974static double fragmentation(unsigned long n_req, unsigned long n_alloc)
 975{
 976        if (n_alloc == 0)
 977                return 0.0;
 978        else
 979                return 100.0 - (100.0 * n_req / n_alloc);
 980}
 981
 982static void __print_slab_result(struct rb_root *root,
 983                                struct perf_session *session,
 984                                int n_lines, int is_caller)
 985{
 986        struct rb_node *next;
 987        struct machine *machine = &session->machines.host;
 988
 989        printf("%.105s\n", graph_dotted_line);
 990        printf(" %-34s |",  is_caller ? "Callsite": "Alloc Ptr");
 991        printf(" Total_alloc/Per | Total_req/Per   | Hit      | Ping-pong | Frag\n");
 992        printf("%.105s\n", graph_dotted_line);
 993
 994        next = rb_first(root);
 995
 996        while (next && n_lines--) {
 997                struct alloc_stat *data = rb_entry(next, struct alloc_stat,
 998                                                   node);
 999                struct symbol *sym = NULL;
1000                struct map *map;
1001                char buf[BUFSIZ];
1002                u64 addr;
1003
1004                if (is_caller) {
1005                        addr = data->call_site;
1006                        if (!raw_ip)
1007                                sym = machine__find_kernel_symbol(machine, addr, &map);
1008                } else
1009                        addr = data->ptr;
1010
1011                if (sym != NULL)
1012                        snprintf(buf, sizeof(buf), "%s+%" PRIx64 "", sym->name,
1013                                 addr - map->unmap_ip(map, sym->start));
1014                else
1015                        snprintf(buf, sizeof(buf), "%#" PRIx64 "", addr);
1016                printf(" %-34s |", buf);
1017
1018                printf(" %9llu/%-5lu | %9llu/%-5lu | %8lu | %9lu | %6.3f%%\n",
1019                       (unsigned long long)data->bytes_alloc,
1020                       (unsigned long)data->bytes_alloc / data->hit,
1021                       (unsigned long long)data->bytes_req,
1022                       (unsigned long)data->bytes_req / data->hit,
1023                       (unsigned long)data->hit,
1024                       (unsigned long)data->pingpong,
1025                       fragmentation(data->bytes_req, data->bytes_alloc));
1026
1027                next = rb_next(next);
1028        }
1029
1030        if (n_lines == -1)
1031                printf(" ...                                | ...             | ...             | ...      | ...       | ...   \n");
1032
1033        printf("%.105s\n", graph_dotted_line);
1034}
1035
1036static const char * const migrate_type_str[] = {
1037        "UNMOVABL",
1038        "RECLAIM",
1039        "MOVABLE",
1040        "RESERVED",
1041        "CMA/ISLT",
1042        "UNKNOWN",
1043};
1044
1045static void __print_page_alloc_result(struct perf_session *session, int n_lines)
1046{
1047        struct rb_node *next = rb_first(&page_alloc_sorted);
1048        struct machine *machine = &session->machines.host;
1049        const char *format;
1050        int gfp_len = max(strlen("GFP flags"), max_gfp_len);
1051
1052        printf("\n%.105s\n", graph_dotted_line);
1053        printf(" %-16s | %5s alloc (KB) | Hits      | Order | Mig.type | %-*s | Callsite\n",
1054               use_pfn ? "PFN" : "Page", live_page ? "Live" : "Total",
1055               gfp_len, "GFP flags");
1056        printf("%.105s\n", graph_dotted_line);
1057
1058        if (use_pfn)
1059                format = " %16llu | %'16llu | %'9d | %5d | %8s | %-*s | %s\n";
1060        else
1061                format = " %016llx | %'16llu | %'9d | %5d | %8s | %-*s | %s\n";
1062
1063        while (next && n_lines--) {
1064                struct page_stat *data;
1065                struct symbol *sym;
1066                struct map *map;
1067                char buf[32];
1068                char *caller = buf;
1069
1070                data = rb_entry(next, struct page_stat, node);
1071                sym = machine__find_kernel_symbol(machine, data->callsite, &map);
1072                if (sym)
1073                        caller = sym->name;
1074                else
1075                        scnprintf(buf, sizeof(buf), "%"PRIx64, data->callsite);
1076
1077                printf(format, (unsigned long long)data->page,
1078                       (unsigned long long)data->alloc_bytes / 1024,
1079                       data->nr_alloc, data->order,
1080                       migrate_type_str[data->migrate_type],
1081                       gfp_len, compact_gfp_string(data->gfp_flags), caller);
1082
1083                next = rb_next(next);
1084        }
1085
1086        if (n_lines == -1) {
1087                printf(" ...              | ...              | ...       | ...   | ...      | %-*s | ...\n",
1088                       gfp_len, "...");
1089        }
1090
1091        printf("%.105s\n", graph_dotted_line);
1092}
1093
1094static void __print_page_caller_result(struct perf_session *session, int n_lines)
1095{
1096        struct rb_node *next = rb_first(&page_caller_sorted);
1097        struct machine *machine = &session->machines.host;
1098        int gfp_len = max(strlen("GFP flags"), max_gfp_len);
1099
1100        printf("\n%.105s\n", graph_dotted_line);
1101        printf(" %5s alloc (KB) | Hits      | Order | Mig.type | %-*s | Callsite\n",
1102               live_page ? "Live" : "Total", gfp_len, "GFP flags");
1103        printf("%.105s\n", graph_dotted_line);
1104
1105        while (next && n_lines--) {
1106                struct page_stat *data;
1107                struct symbol *sym;
1108                struct map *map;
1109                char buf[32];
1110                char *caller = buf;
1111
1112                data = rb_entry(next, struct page_stat, node);
1113                sym = machine__find_kernel_symbol(machine, data->callsite, &map);
1114                if (sym)
1115                        caller = sym->name;
1116                else
1117                        scnprintf(buf, sizeof(buf), "%"PRIx64, data->callsite);
1118
1119                printf(" %'16llu | %'9d | %5d | %8s | %-*s | %s\n",
1120                       (unsigned long long)data->alloc_bytes / 1024,
1121                       data->nr_alloc, data->order,
1122                       migrate_type_str[data->migrate_type],
1123                       gfp_len, compact_gfp_string(data->gfp_flags), caller);
1124
1125                next = rb_next(next);
1126        }
1127
1128        if (n_lines == -1) {
1129                printf(" ...              | ...       | ...   | ...      | %-*s | ...\n",
1130                       gfp_len, "...");
1131        }
1132
1133        printf("%.105s\n", graph_dotted_line);
1134}
1135
1136static void print_gfp_flags(void)
1137{
1138        int i;
1139
1140        printf("#\n");
1141        printf("# GFP flags\n");
1142        printf("# ---------\n");
1143        for (i = 0; i < nr_gfps; i++) {
1144                printf("# %08x: %*s: %s\n", gfps[i].flags,
1145                       (int) max_gfp_len, gfps[i].compact_str,
1146                       gfps[i].human_readable);
1147        }
1148}
1149
1150static void print_slab_summary(void)
1151{
1152        printf("\nSUMMARY (SLAB allocator)");
1153        printf("\n========================\n");
1154        printf("Total bytes requested: %'lu\n", total_requested);
1155        printf("Total bytes allocated: %'lu\n", total_allocated);
1156        printf("Total bytes freed:     %'lu\n", total_freed);
1157        if (total_allocated > total_freed) {
1158                printf("Net total bytes allocated: %'lu\n",
1159                total_allocated - total_freed);
1160        }
1161        printf("Total bytes wasted on internal fragmentation: %'lu\n",
1162               total_allocated - total_requested);
1163        printf("Internal fragmentation: %f%%\n",
1164               fragmentation(total_requested, total_allocated));
1165        printf("Cross CPU allocations: %'lu/%'lu\n", nr_cross_allocs, nr_allocs);
1166}
1167
1168static void print_page_summary(void)
1169{
1170        int o, m;
1171        u64 nr_alloc_freed = nr_page_frees - nr_page_nomatch;
1172        u64 total_alloc_freed_bytes = total_page_free_bytes - total_page_nomatch_bytes;
1173
1174        printf("\nSUMMARY (page allocator)");
1175        printf("\n========================\n");
1176        printf("%-30s: %'16lu   [ %'16"PRIu64" KB ]\n", "Total allocation requests",
1177               nr_page_allocs, total_page_alloc_bytes / 1024);
1178        printf("%-30s: %'16lu   [ %'16"PRIu64" KB ]\n", "Total free requests",
1179               nr_page_frees, total_page_free_bytes / 1024);
1180        printf("\n");
1181
1182        printf("%-30s: %'16"PRIu64"   [ %'16"PRIu64" KB ]\n", "Total alloc+freed requests",
1183               nr_alloc_freed, (total_alloc_freed_bytes) / 1024);
1184        printf("%-30s: %'16"PRIu64"   [ %'16"PRIu64" KB ]\n", "Total alloc-only requests",
1185               nr_page_allocs - nr_alloc_freed,
1186               (total_page_alloc_bytes - total_alloc_freed_bytes) / 1024);
1187        printf("%-30s: %'16lu   [ %'16"PRIu64" KB ]\n", "Total free-only requests",
1188               nr_page_nomatch, total_page_nomatch_bytes / 1024);
1189        printf("\n");
1190
1191        printf("%-30s: %'16lu   [ %'16"PRIu64" KB ]\n", "Total allocation failures",
1192               nr_page_fails, total_page_fail_bytes / 1024);
1193        printf("\n");
1194
1195        printf("%5s  %12s  %12s  %12s  %12s  %12s\n", "Order",  "Unmovable",
1196               "Reclaimable", "Movable", "Reserved", "CMA/Isolated");
1197        printf("%.5s  %.12s  %.12s  %.12s  %.12s  %.12s\n", graph_dotted_line,
1198               graph_dotted_line, graph_dotted_line, graph_dotted_line,
1199               graph_dotted_line, graph_dotted_line);
1200
1201        for (o = 0; o < MAX_PAGE_ORDER; o++) {
1202                printf("%5d", o);
1203                for (m = 0; m < MAX_MIGRATE_TYPES - 1; m++) {
1204                        if (order_stats[o][m])
1205                                printf("  %'12d", order_stats[o][m]);
1206                        else
1207                                printf("  %12c", '.');
1208                }
1209                printf("\n");
1210        }
1211}
1212
1213static void print_slab_result(struct perf_session *session)
1214{
1215        if (caller_flag)
1216                __print_slab_result(&root_caller_sorted, session, caller_lines, 1);
1217        if (alloc_flag)
1218                __print_slab_result(&root_alloc_sorted, session, alloc_lines, 0);
1219        print_slab_summary();
1220}
1221
1222static void print_page_result(struct perf_session *session)
1223{
1224        if (caller_flag || alloc_flag)
1225                print_gfp_flags();
1226        if (caller_flag)
1227                __print_page_caller_result(session, caller_lines);
1228        if (alloc_flag)
1229                __print_page_alloc_result(session, alloc_lines);
1230        print_page_summary();
1231}
1232
1233static void print_result(struct perf_session *session)
1234{
1235        if (kmem_slab)
1236                print_slab_result(session);
1237        if (kmem_page)
1238                print_page_result(session);
1239}
1240
1241static LIST_HEAD(slab_caller_sort);
1242static LIST_HEAD(slab_alloc_sort);
1243static LIST_HEAD(page_caller_sort);
1244static LIST_HEAD(page_alloc_sort);
1245
1246static void sort_slab_insert(struct rb_root *root, struct alloc_stat *data,
1247                             struct list_head *sort_list)
1248{
1249        struct rb_node **new = &(root->rb_node);
1250        struct rb_node *parent = NULL;
1251        struct sort_dimension *sort;
1252
1253        while (*new) {
1254                struct alloc_stat *this;
1255                int cmp = 0;
1256
1257                this = rb_entry(*new, struct alloc_stat, node);
1258                parent = *new;
1259
1260                list_for_each_entry(sort, sort_list, list) {
1261                        cmp = sort->cmp(data, this);
1262                        if (cmp)
1263                                break;
1264                }
1265
1266                if (cmp > 0)
1267                        new = &((*new)->rb_left);
1268                else
1269                        new = &((*new)->rb_right);
1270        }
1271
1272        rb_link_node(&data->node, parent, new);
1273        rb_insert_color(&data->node, root);
1274}
1275
1276static void __sort_slab_result(struct rb_root *root, struct rb_root *root_sorted,
1277                               struct list_head *sort_list)
1278{
1279        struct rb_node *node;
1280        struct alloc_stat *data;
1281
1282        for (;;) {
1283                node = rb_first(root);
1284                if (!node)
1285                        break;
1286
1287                rb_erase(node, root);
1288                data = rb_entry(node, struct alloc_stat, node);
1289                sort_slab_insert(root_sorted, data, sort_list);
1290        }
1291}
1292
1293static void sort_page_insert(struct rb_root *root, struct page_stat *data,
1294                             struct list_head *sort_list)
1295{
1296        struct rb_node **new = &root->rb_node;
1297        struct rb_node *parent = NULL;
1298        struct sort_dimension *sort;
1299
1300        while (*new) {
1301                struct page_stat *this;
1302                int cmp = 0;
1303
1304                this = rb_entry(*new, struct page_stat, node);
1305                parent = *new;
1306
1307                list_for_each_entry(sort, sort_list, list) {
1308                        cmp = sort->cmp(data, this);
1309                        if (cmp)
1310                                break;
1311                }
1312
1313                if (cmp > 0)
1314                        new = &parent->rb_left;
1315                else
1316                        new = &parent->rb_right;
1317        }
1318
1319        rb_link_node(&data->node, parent, new);
1320        rb_insert_color(&data->node, root);
1321}
1322
1323static void __sort_page_result(struct rb_root *root, struct rb_root *root_sorted,
1324                               struct list_head *sort_list)
1325{
1326        struct rb_node *node;
1327        struct page_stat *data;
1328
1329        for (;;) {
1330                node = rb_first(root);
1331                if (!node)
1332                        break;
1333
1334                rb_erase(node, root);
1335                data = rb_entry(node, struct page_stat, node);
1336                sort_page_insert(root_sorted, data, sort_list);
1337        }
1338}
1339
1340static void sort_result(void)
1341{
1342        if (kmem_slab) {
1343                __sort_slab_result(&root_alloc_stat, &root_alloc_sorted,
1344                                   &slab_alloc_sort);
1345                __sort_slab_result(&root_caller_stat, &root_caller_sorted,
1346                                   &slab_caller_sort);
1347        }
1348        if (kmem_page) {
1349                if (live_page)
1350                        __sort_page_result(&page_live_tree, &page_alloc_sorted,
1351                                           &page_alloc_sort);
1352                else
1353                        __sort_page_result(&page_alloc_tree, &page_alloc_sorted,
1354                                           &page_alloc_sort);
1355
1356                __sort_page_result(&page_caller_tree, &page_caller_sorted,
1357                                   &page_caller_sort);
1358        }
1359}
1360
1361static int __cmd_kmem(struct perf_session *session)
1362{
1363        int err = -EINVAL;
1364        struct perf_evsel *evsel;
1365        const struct perf_evsel_str_handler kmem_tracepoints[] = {
1366                /* slab allocator */
1367                { "kmem:kmalloc",               perf_evsel__process_alloc_event, },
1368                { "kmem:kmem_cache_alloc",      perf_evsel__process_alloc_event, },
1369                { "kmem:kmalloc_node",          perf_evsel__process_alloc_node_event, },
1370                { "kmem:kmem_cache_alloc_node", perf_evsel__process_alloc_node_event, },
1371                { "kmem:kfree",                 perf_evsel__process_free_event, },
1372                { "kmem:kmem_cache_free",       perf_evsel__process_free_event, },
1373                /* page allocator */
1374                { "kmem:mm_page_alloc",         perf_evsel__process_page_alloc_event, },
1375                { "kmem:mm_page_free",          perf_evsel__process_page_free_event, },
1376        };
1377
1378        if (!perf_session__has_traces(session, "kmem record"))
1379                goto out;
1380
1381        if (perf_session__set_tracepoints_handlers(session, kmem_tracepoints)) {
1382                pr_err("Initializing perf session tracepoint handlers failed\n");
1383                goto out;
1384        }
1385
1386        evlist__for_each_entry(session->evlist, evsel) {
1387                if (!strcmp(perf_evsel__name(evsel), "kmem:mm_page_alloc") &&
1388                    perf_evsel__field(evsel, "pfn")) {
1389                        use_pfn = true;
1390                        break;
1391                }
1392        }
1393
1394        setup_pager();
1395        err = perf_session__process_events(session);
1396        if (err != 0) {
1397                pr_err("error during process events: %d\n", err);
1398                goto out;
1399        }
1400        sort_result();
1401        print_result(session);
1402out:
1403        return err;
1404}
1405
1406/* slab sort keys */
1407static int ptr_cmp(void *a, void *b)
1408{
1409        struct alloc_stat *l = a;
1410        struct alloc_stat *r = b;
1411
1412        if (l->ptr < r->ptr)
1413                return -1;
1414        else if (l->ptr > r->ptr)
1415                return 1;
1416        return 0;
1417}
1418
1419static struct sort_dimension ptr_sort_dimension = {
1420        .name   = "ptr",
1421        .cmp    = ptr_cmp,
1422};
1423
1424static int slab_callsite_cmp(void *a, void *b)
1425{
1426        struct alloc_stat *l = a;
1427        struct alloc_stat *r = b;
1428
1429        if (l->call_site < r->call_site)
1430                return -1;
1431        else if (l->call_site > r->call_site)
1432                return 1;
1433        return 0;
1434}
1435
1436static struct sort_dimension callsite_sort_dimension = {
1437        .name   = "callsite",
1438        .cmp    = slab_callsite_cmp,
1439};
1440
1441static int hit_cmp(void *a, void *b)
1442{
1443        struct alloc_stat *l = a;
1444        struct alloc_stat *r = b;
1445
1446        if (l->hit < r->hit)
1447                return -1;
1448        else if (l->hit > r->hit)
1449                return 1;
1450        return 0;
1451}
1452
1453static struct sort_dimension hit_sort_dimension = {
1454        .name   = "hit",
1455        .cmp    = hit_cmp,
1456};
1457
1458static int bytes_cmp(void *a, void *b)
1459{
1460        struct alloc_stat *l = a;
1461        struct alloc_stat *r = b;
1462
1463        if (l->bytes_alloc < r->bytes_alloc)
1464                return -1;
1465        else if (l->bytes_alloc > r->bytes_alloc)
1466                return 1;
1467        return 0;
1468}
1469
1470static struct sort_dimension bytes_sort_dimension = {
1471        .name   = "bytes",
1472        .cmp    = bytes_cmp,
1473};
1474
1475static int frag_cmp(void *a, void *b)
1476{
1477        double x, y;
1478        struct alloc_stat *l = a;
1479        struct alloc_stat *r = b;
1480
1481        x = fragmentation(l->bytes_req, l->bytes_alloc);
1482        y = fragmentation(r->bytes_req, r->bytes_alloc);
1483
1484        if (x < y)
1485                return -1;
1486        else if (x > y)
1487                return 1;
1488        return 0;
1489}
1490
1491static struct sort_dimension frag_sort_dimension = {
1492        .name   = "frag",
1493        .cmp    = frag_cmp,
1494};
1495
1496static int pingpong_cmp(void *a, void *b)
1497{
1498        struct alloc_stat *l = a;
1499        struct alloc_stat *r = b;
1500
1501        if (l->pingpong < r->pingpong)
1502                return -1;
1503        else if (l->pingpong > r->pingpong)
1504                return 1;
1505        return 0;
1506}
1507
1508static struct sort_dimension pingpong_sort_dimension = {
1509        .name   = "pingpong",
1510        .cmp    = pingpong_cmp,
1511};
1512
1513/* page sort keys */
1514static int page_cmp(void *a, void *b)
1515{
1516        struct page_stat *l = a;
1517        struct page_stat *r = b;
1518
1519        if (l->page < r->page)
1520                return -1;
1521        else if (l->page > r->page)
1522                return 1;
1523        return 0;
1524}
1525
1526static struct sort_dimension page_sort_dimension = {
1527        .name   = "page",
1528        .cmp    = page_cmp,
1529};
1530
1531static int page_callsite_cmp(void *a, void *b)
1532{
1533        struct page_stat *l = a;
1534        struct page_stat *r = b;
1535
1536        if (l->callsite < r->callsite)
1537                return -1;
1538        else if (l->callsite > r->callsite)
1539                return 1;
1540        return 0;
1541}
1542
1543static struct sort_dimension page_callsite_sort_dimension = {
1544        .name   = "callsite",
1545        .cmp    = page_callsite_cmp,
1546};
1547
1548static int page_hit_cmp(void *a, void *b)
1549{
1550        struct page_stat *l = a;
1551        struct page_stat *r = b;
1552
1553        if (l->nr_alloc < r->nr_alloc)
1554                return -1;
1555        else if (l->nr_alloc > r->nr_alloc)
1556                return 1;
1557        return 0;
1558}
1559
1560static struct sort_dimension page_hit_sort_dimension = {
1561        .name   = "hit",
1562        .cmp    = page_hit_cmp,
1563};
1564
1565static int page_bytes_cmp(void *a, void *b)
1566{
1567        struct page_stat *l = a;
1568        struct page_stat *r = b;
1569
1570        if (l->alloc_bytes < r->alloc_bytes)
1571                return -1;
1572        else if (l->alloc_bytes > r->alloc_bytes)
1573                return 1;
1574        return 0;
1575}
1576
1577static struct sort_dimension page_bytes_sort_dimension = {
1578        .name   = "bytes",
1579        .cmp    = page_bytes_cmp,
1580};
1581
1582static int page_order_cmp(void *a, void *b)
1583{
1584        struct page_stat *l = a;
1585        struct page_stat *r = b;
1586
1587        if (l->order < r->order)
1588                return -1;
1589        else if (l->order > r->order)
1590                return 1;
1591        return 0;
1592}
1593
1594static struct sort_dimension page_order_sort_dimension = {
1595        .name   = "order",
1596        .cmp    = page_order_cmp,
1597};
1598
1599static int migrate_type_cmp(void *a, void *b)
1600{
1601        struct page_stat *l = a;
1602        struct page_stat *r = b;
1603
1604        /* for internal use to find free'd page */
1605        if (l->migrate_type == -1U)
1606                return 0;
1607
1608        if (l->migrate_type < r->migrate_type)
1609                return -1;
1610        else if (l->migrate_type > r->migrate_type)
1611                return 1;
1612        return 0;
1613}
1614
1615static struct sort_dimension migrate_type_sort_dimension = {
1616        .name   = "migtype",
1617        .cmp    = migrate_type_cmp,
1618};
1619
1620static int gfp_flags_cmp(void *a, void *b)
1621{
1622        struct page_stat *l = a;
1623        struct page_stat *r = b;
1624
1625        /* for internal use to find free'd page */
1626        if (l->gfp_flags == -1U)
1627                return 0;
1628
1629        if (l->gfp_flags < r->gfp_flags)
1630                return -1;
1631        else if (l->gfp_flags > r->gfp_flags)
1632                return 1;
1633        return 0;
1634}
1635
1636static struct sort_dimension gfp_flags_sort_dimension = {
1637        .name   = "gfp",
1638        .cmp    = gfp_flags_cmp,
1639};
1640
1641static struct sort_dimension *slab_sorts[] = {
1642        &ptr_sort_dimension,
1643        &callsite_sort_dimension,
1644        &hit_sort_dimension,
1645        &bytes_sort_dimension,
1646        &frag_sort_dimension,
1647        &pingpong_sort_dimension,
1648};
1649
1650static struct sort_dimension *page_sorts[] = {
1651        &page_sort_dimension,
1652        &page_callsite_sort_dimension,
1653        &page_hit_sort_dimension,
1654        &page_bytes_sort_dimension,
1655        &page_order_sort_dimension,
1656        &migrate_type_sort_dimension,
1657        &gfp_flags_sort_dimension,
1658};
1659
1660static int slab_sort_dimension__add(const char *tok, struct list_head *list)
1661{
1662        struct sort_dimension *sort;
1663        int i;
1664
1665        for (i = 0; i < (int)ARRAY_SIZE(slab_sorts); i++) {
1666                if (!strcmp(slab_sorts[i]->name, tok)) {
1667                        sort = memdup(slab_sorts[i], sizeof(*slab_sorts[i]));
1668                        if (!sort) {
1669                                pr_err("%s: memdup failed\n", __func__);
1670                                return -1;
1671                        }
1672                        list_add_tail(&sort->list, list);
1673                        return 0;
1674                }
1675        }
1676
1677        return -1;
1678}
1679
1680static int page_sort_dimension__add(const char *tok, struct list_head *list)
1681{
1682        struct sort_dimension *sort;
1683        int i;
1684
1685        for (i = 0; i < (int)ARRAY_SIZE(page_sorts); i++) {
1686                if (!strcmp(page_sorts[i]->name, tok)) {
1687                        sort = memdup(page_sorts[i], sizeof(*page_sorts[i]));
1688                        if (!sort) {
1689                                pr_err("%s: memdup failed\n", __func__);
1690                                return -1;
1691                        }
1692                        list_add_tail(&sort->list, list);
1693                        return 0;
1694                }
1695        }
1696
1697        return -1;
1698}
1699
1700static int setup_slab_sorting(struct list_head *sort_list, const char *arg)
1701{
1702        char *tok;
1703        char *str = strdup(arg);
1704        char *pos = str;
1705
1706        if (!str) {
1707                pr_err("%s: strdup failed\n", __func__);
1708                return -1;
1709        }
1710
1711        while (true) {
1712                tok = strsep(&pos, ",");
1713                if (!tok)
1714                        break;
1715                if (slab_sort_dimension__add(tok, sort_list) < 0) {
1716                        pr_err("Unknown slab --sort key: '%s'", tok);
1717                        free(str);
1718                        return -1;
1719                }
1720        }
1721
1722        free(str);
1723        return 0;
1724}
1725
1726static int setup_page_sorting(struct list_head *sort_list, const char *arg)
1727{
1728        char *tok;
1729        char *str = strdup(arg);
1730        char *pos = str;
1731
1732        if (!str) {
1733                pr_err("%s: strdup failed\n", __func__);
1734                return -1;
1735        }
1736
1737        while (true) {
1738                tok = strsep(&pos, ",");
1739                if (!tok)
1740                        break;
1741                if (page_sort_dimension__add(tok, sort_list) < 0) {
1742                        pr_err("Unknown page --sort key: '%s'", tok);
1743                        free(str);
1744                        return -1;
1745                }
1746        }
1747
1748        free(str);
1749        return 0;
1750}
1751
1752static int parse_sort_opt(const struct option *opt __maybe_unused,
1753                          const char *arg, int unset __maybe_unused)
1754{
1755        if (!arg)
1756                return -1;
1757
1758        if (kmem_page > kmem_slab ||
1759            (kmem_page == 0 && kmem_slab == 0 && kmem_default == KMEM_PAGE)) {
1760                if (caller_flag > alloc_flag)
1761                        return setup_page_sorting(&page_caller_sort, arg);
1762                else
1763                        return setup_page_sorting(&page_alloc_sort, arg);
1764        } else {
1765                if (caller_flag > alloc_flag)
1766                        return setup_slab_sorting(&slab_caller_sort, arg);
1767                else
1768                        return setup_slab_sorting(&slab_alloc_sort, arg);
1769        }
1770
1771        return 0;
1772}
1773
1774static int parse_caller_opt(const struct option *opt __maybe_unused,
1775                            const char *arg __maybe_unused,
1776                            int unset __maybe_unused)
1777{
1778        caller_flag = (alloc_flag + 1);
1779        return 0;
1780}
1781
1782static int parse_alloc_opt(const struct option *opt __maybe_unused,
1783                           const char *arg __maybe_unused,
1784                           int unset __maybe_unused)
1785{
1786        alloc_flag = (caller_flag + 1);
1787        return 0;
1788}
1789
1790static int parse_slab_opt(const struct option *opt __maybe_unused,
1791                          const char *arg __maybe_unused,
1792                          int unset __maybe_unused)
1793{
1794        kmem_slab = (kmem_page + 1);
1795        return 0;
1796}
1797
1798static int parse_page_opt(const struct option *opt __maybe_unused,
1799                          const char *arg __maybe_unused,
1800                          int unset __maybe_unused)
1801{
1802        kmem_page = (kmem_slab + 1);
1803        return 0;
1804}
1805
1806static int parse_line_opt(const struct option *opt __maybe_unused,
1807                          const char *arg, int unset __maybe_unused)
1808{
1809        int lines;
1810
1811        if (!arg)
1812                return -1;
1813
1814        lines = strtoul(arg, NULL, 10);
1815
1816        if (caller_flag > alloc_flag)
1817                caller_lines = lines;
1818        else
1819                alloc_lines = lines;
1820
1821        return 0;
1822}
1823
1824static int __cmd_record(int argc, const char **argv)
1825{
1826        const char * const record_args[] = {
1827        "record", "-a", "-R", "-c", "1",
1828        };
1829        const char * const slab_events[] = {
1830        "-e", "kmem:kmalloc",
1831        "-e", "kmem:kmalloc_node",
1832        "-e", "kmem:kfree",
1833        "-e", "kmem:kmem_cache_alloc",
1834        "-e", "kmem:kmem_cache_alloc_node",
1835        "-e", "kmem:kmem_cache_free",
1836        };
1837        const char * const page_events[] = {
1838        "-e", "kmem:mm_page_alloc",
1839        "-e", "kmem:mm_page_free",
1840        };
1841        unsigned int rec_argc, i, j;
1842        const char **rec_argv;
1843
1844        rec_argc = ARRAY_SIZE(record_args) + argc - 1;
1845        if (kmem_slab)
1846                rec_argc += ARRAY_SIZE(slab_events);
1847        if (kmem_page)
1848                rec_argc += ARRAY_SIZE(page_events) + 1; /* for -g */
1849
1850        rec_argv = calloc(rec_argc + 1, sizeof(char *));
1851
1852        if (rec_argv == NULL)
1853                return -ENOMEM;
1854
1855        for (i = 0; i < ARRAY_SIZE(record_args); i++)
1856                rec_argv[i] = strdup(record_args[i]);
1857
1858        if (kmem_slab) {
1859                for (j = 0; j < ARRAY_SIZE(slab_events); j++, i++)
1860                        rec_argv[i] = strdup(slab_events[j]);
1861        }
1862        if (kmem_page) {
1863                rec_argv[i++] = strdup("-g");
1864
1865                for (j = 0; j < ARRAY_SIZE(page_events); j++, i++)
1866                        rec_argv[i] = strdup(page_events[j]);
1867        }
1868
1869        for (j = 1; j < (unsigned int)argc; j++, i++)
1870                rec_argv[i] = argv[j];
1871
1872        return cmd_record(i, rec_argv);
1873}
1874
1875static int kmem_config(const char *var, const char *value, void *cb __maybe_unused)
1876{
1877        if (!strcmp(var, "kmem.default")) {
1878                if (!strcmp(value, "slab"))
1879                        kmem_default = KMEM_SLAB;
1880                else if (!strcmp(value, "page"))
1881                        kmem_default = KMEM_PAGE;
1882                else
1883                        pr_err("invalid default value ('slab' or 'page' required): %s\n",
1884                               value);
1885                return 0;
1886        }
1887
1888        return 0;
1889}
1890
1891int cmd_kmem(int argc, const char **argv)
1892{
1893        const char * const default_slab_sort = "frag,hit,bytes";
1894        const char * const default_page_sort = "bytes,hit";
1895        struct perf_data data = {
1896                .mode = PERF_DATA_MODE_READ,
1897        };
1898        const struct option kmem_options[] = {
1899        OPT_STRING('i', "input", &input_name, "file", "input file name"),
1900        OPT_INCR('v', "verbose", &verbose,
1901                    "be more verbose (show symbol address, etc)"),
1902        OPT_CALLBACK_NOOPT(0, "caller", NULL, NULL,
1903                           "show per-callsite statistics", parse_caller_opt),
1904        OPT_CALLBACK_NOOPT(0, "alloc", NULL, NULL,
1905                           "show per-allocation statistics", parse_alloc_opt),
1906        OPT_CALLBACK('s', "sort", NULL, "key[,key2...]",
1907                     "sort by keys: ptr, callsite, bytes, hit, pingpong, frag, "
1908                     "page, order, migtype, gfp", parse_sort_opt),
1909        OPT_CALLBACK('l', "line", NULL, "num", "show n lines", parse_line_opt),
1910        OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"),
1911        OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"),
1912        OPT_CALLBACK_NOOPT(0, "slab", NULL, NULL, "Analyze slab allocator",
1913                           parse_slab_opt),
1914        OPT_CALLBACK_NOOPT(0, "page", NULL, NULL, "Analyze page allocator",
1915                           parse_page_opt),
1916        OPT_BOOLEAN(0, "live", &live_page, "Show live page stat"),
1917        OPT_STRING(0, "time", &time_str, "str",
1918                   "Time span of interest (start,stop)"),
1919        OPT_END()
1920        };
1921        const char *const kmem_subcommands[] = { "record", "stat", NULL };
1922        const char *kmem_usage[] = {
1923                NULL,
1924                NULL
1925        };
1926        struct perf_session *session;
1927        const char errmsg[] = "No %s allocation events found.  Have you run 'perf kmem record --%s'?\n";
1928        int ret = perf_config(kmem_config, NULL);
1929
1930        if (ret)
1931                return ret;
1932
1933        argc = parse_options_subcommand(argc, argv, kmem_options,
1934                                        kmem_subcommands, kmem_usage, 0);
1935
1936        if (!argc)
1937                usage_with_options(kmem_usage, kmem_options);
1938
1939        if (kmem_slab == 0 && kmem_page == 0) {
1940                if (kmem_default == KMEM_SLAB)
1941                        kmem_slab = 1;
1942                else
1943                        kmem_page = 1;
1944        }
1945
1946        if (!strncmp(argv[0], "rec", 3)) {
1947                symbol__init(NULL);
1948                return __cmd_record(argc, argv);
1949        }
1950
1951        data.file.path = input_name;
1952
1953        kmem_session = session = perf_session__new(&data, false, &perf_kmem);
1954        if (session == NULL)
1955                return -1;
1956
1957        ret = -1;
1958
1959        if (kmem_slab) {
1960                if (!perf_evlist__find_tracepoint_by_name(session->evlist,
1961                                                          "kmem:kmalloc")) {
1962                        pr_err(errmsg, "slab", "slab");
1963                        goto out_delete;
1964                }
1965        }
1966
1967        if (kmem_page) {
1968                struct perf_evsel *evsel;
1969
1970                evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
1971                                                             "kmem:mm_page_alloc");
1972                if (evsel == NULL) {
1973                        pr_err(errmsg, "page", "page");
1974                        goto out_delete;
1975                }
1976
1977                kmem_page_size = pevent_get_page_size(evsel->tp_format->pevent);
1978                symbol_conf.use_callchain = true;
1979        }
1980
1981        symbol__init(&session->header.env);
1982
1983        if (perf_time__parse_str(&ptime, time_str) != 0) {
1984                pr_err("Invalid time string\n");
1985                ret = -EINVAL;
1986                goto out_delete;
1987        }
1988
1989        if (!strcmp(argv[0], "stat")) {
1990                setlocale(LC_ALL, "");
1991
1992                if (cpu__setup_cpunode_map())
1993                        goto out_delete;
1994
1995                if (list_empty(&slab_caller_sort))
1996                        setup_slab_sorting(&slab_caller_sort, default_slab_sort);
1997                if (list_empty(&slab_alloc_sort))
1998                        setup_slab_sorting(&slab_alloc_sort, default_slab_sort);
1999                if (list_empty(&page_caller_sort))
2000                        setup_page_sorting(&page_caller_sort, default_page_sort);
2001                if (list_empty(&page_alloc_sort))
2002                        setup_page_sorting(&page_alloc_sort, default_page_sort);
2003
2004                if (kmem_page) {
2005                        setup_page_sorting(&page_alloc_sort_input,
2006                                           "page,order,migtype,gfp");
2007                        setup_page_sorting(&page_caller_sort_input,
2008                                           "callsite,order,migtype,gfp");
2009                }
2010                ret = __cmd_kmem(session);
2011        } else
2012                usage_with_options(kmem_usage, kmem_options);
2013
2014out_delete:
2015        perf_session__delete(session);
2016
2017        return ret;
2018}
2019
2020