linux/tools/perf/builtin-kmem.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2#include "builtin.h"
   3#include "perf.h"
   4
   5#include "util/dso.h"
   6#include "util/evlist.h"
   7#include "util/evsel.h"
   8#include "util/config.h"
   9#include "util/map.h"
  10#include "util/symbol.h"
  11#include "util/thread.h"
  12#include "util/header.h"
  13#include "util/session.h"
  14#include "util/tool.h"
  15#include "util/callchain.h"
  16#include "util/time-utils.h"
  17#include <linux/err.h>
  18
  19#include <subcmd/pager.h>
  20#include <subcmd/parse-options.h>
  21#include "util/trace-event.h"
  22#include "util/data.h"
  23#include "util/cpumap.h"
  24
  25#include "util/debug.h"
  26#include "util/string2.h"
  27
  28#include <linux/kernel.h>
  29#include <linux/rbtree.h>
  30#include <linux/string.h>
  31#include <linux/zalloc.h>
  32#include <errno.h>
  33#include <inttypes.h>
  34#include <locale.h>
  35#include <regex.h>
  36
  37#include <linux/ctype.h>
  38
  39static int      kmem_slab;
  40static int      kmem_page;
  41
  42static long     kmem_page_size;
  43static enum {
  44        KMEM_SLAB,
  45        KMEM_PAGE,
  46} kmem_default = KMEM_SLAB;  /* for backward compatibility */
  47
  48struct alloc_stat;
  49typedef int (*sort_fn_t)(void *, void *);
  50
  51static int                      alloc_flag;
  52static int                      caller_flag;
  53
  54static int                      alloc_lines = -1;
  55static int                      caller_lines = -1;
  56
  57static bool                     raw_ip;
  58
  59struct alloc_stat {
  60        u64     call_site;
  61        u64     ptr;
  62        u64     bytes_req;
  63        u64     bytes_alloc;
  64        u64     last_alloc;
  65        u32     hit;
  66        u32     pingpong;
  67
  68        short   alloc_cpu;
  69
  70        struct rb_node node;
  71};
  72
  73static struct rb_root root_alloc_stat;
  74static struct rb_root root_alloc_sorted;
  75static struct rb_root root_caller_stat;
  76static struct rb_root root_caller_sorted;
  77
  78static unsigned long total_requested, total_allocated, total_freed;
  79static unsigned long nr_allocs, nr_cross_allocs;
  80
  81/* filters for controlling start and stop of time of analysis */
  82static struct perf_time_interval ptime;
  83const char *time_str;
  84
  85static int insert_alloc_stat(unsigned long call_site, unsigned long ptr,
  86                             int bytes_req, int bytes_alloc, int cpu)
  87{
  88        struct rb_node **node = &root_alloc_stat.rb_node;
  89        struct rb_node *parent = NULL;
  90        struct alloc_stat *data = NULL;
  91
  92        while (*node) {
  93                parent = *node;
  94                data = rb_entry(*node, struct alloc_stat, node);
  95
  96                if (ptr > data->ptr)
  97                        node = &(*node)->rb_right;
  98                else if (ptr < data->ptr)
  99                        node = &(*node)->rb_left;
 100                else
 101                        break;
 102        }
 103
 104        if (data && data->ptr == ptr) {
 105                data->hit++;
 106                data->bytes_req += bytes_req;
 107                data->bytes_alloc += bytes_alloc;
 108        } else {
 109                data = malloc(sizeof(*data));
 110                if (!data) {
 111                        pr_err("%s: malloc failed\n", __func__);
 112                        return -1;
 113                }
 114                data->ptr = ptr;
 115                data->pingpong = 0;
 116                data->hit = 1;
 117                data->bytes_req = bytes_req;
 118                data->bytes_alloc = bytes_alloc;
 119
 120                rb_link_node(&data->node, parent, node);
 121                rb_insert_color(&data->node, &root_alloc_stat);
 122        }
 123        data->call_site = call_site;
 124        data->alloc_cpu = cpu;
 125        data->last_alloc = bytes_alloc;
 126
 127        return 0;
 128}
 129
 130static int insert_caller_stat(unsigned long call_site,
 131                              int bytes_req, int bytes_alloc)
 132{
 133        struct rb_node **node = &root_caller_stat.rb_node;
 134        struct rb_node *parent = NULL;
 135        struct alloc_stat *data = NULL;
 136
 137        while (*node) {
 138                parent = *node;
 139                data = rb_entry(*node, struct alloc_stat, node);
 140
 141                if (call_site > data->call_site)
 142                        node = &(*node)->rb_right;
 143                else if (call_site < data->call_site)
 144                        node = &(*node)->rb_left;
 145                else
 146                        break;
 147        }
 148
 149        if (data && data->call_site == call_site) {
 150                data->hit++;
 151                data->bytes_req += bytes_req;
 152                data->bytes_alloc += bytes_alloc;
 153        } else {
 154                data = malloc(sizeof(*data));
 155                if (!data) {
 156                        pr_err("%s: malloc failed\n", __func__);
 157                        return -1;
 158                }
 159                data->call_site = call_site;
 160                data->pingpong = 0;
 161                data->hit = 1;
 162                data->bytes_req = bytes_req;
 163                data->bytes_alloc = bytes_alloc;
 164
 165                rb_link_node(&data->node, parent, node);
 166                rb_insert_color(&data->node, &root_caller_stat);
 167        }
 168
 169        return 0;
 170}
 171
 172static int evsel__process_alloc_event(struct evsel *evsel, struct perf_sample *sample)
 173{
 174        unsigned long ptr = evsel__intval(evsel, sample, "ptr"),
 175                      call_site = evsel__intval(evsel, sample, "call_site");
 176        int bytes_req = evsel__intval(evsel, sample, "bytes_req"),
 177            bytes_alloc = evsel__intval(evsel, sample, "bytes_alloc");
 178
 179        if (insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, sample->cpu) ||
 180            insert_caller_stat(call_site, bytes_req, bytes_alloc))
 181                return -1;
 182
 183        total_requested += bytes_req;
 184        total_allocated += bytes_alloc;
 185
 186        nr_allocs++;
 187        return 0;
 188}
 189
 190static int evsel__process_alloc_node_event(struct evsel *evsel, struct perf_sample *sample)
 191{
 192        int ret = evsel__process_alloc_event(evsel, sample);
 193
 194        if (!ret) {
 195                int node1 = cpu__get_node(sample->cpu),
 196                    node2 = evsel__intval(evsel, sample, "node");
 197
 198                if (node1 != node2)
 199                        nr_cross_allocs++;
 200        }
 201
 202        return ret;
 203}
 204
 205static int ptr_cmp(void *, void *);
 206static int slab_callsite_cmp(void *, void *);
 207
 208static struct alloc_stat *search_alloc_stat(unsigned long ptr,
 209                                            unsigned long call_site,
 210                                            struct rb_root *root,
 211                                            sort_fn_t sort_fn)
 212{
 213        struct rb_node *node = root->rb_node;
 214        struct alloc_stat key = { .ptr = ptr, .call_site = call_site };
 215
 216        while (node) {
 217                struct alloc_stat *data;
 218                int cmp;
 219
 220                data = rb_entry(node, struct alloc_stat, node);
 221
 222                cmp = sort_fn(&key, data);
 223                if (cmp < 0)
 224                        node = node->rb_left;
 225                else if (cmp > 0)
 226                        node = node->rb_right;
 227                else
 228                        return data;
 229        }
 230        return NULL;
 231}
 232
 233static int evsel__process_free_event(struct evsel *evsel, struct perf_sample *sample)
 234{
 235        unsigned long ptr = evsel__intval(evsel, sample, "ptr");
 236        struct alloc_stat *s_alloc, *s_caller;
 237
 238        s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp);
 239        if (!s_alloc)
 240                return 0;
 241
 242        total_freed += s_alloc->last_alloc;
 243
 244        if ((short)sample->cpu != s_alloc->alloc_cpu) {
 245                s_alloc->pingpong++;
 246
 247                s_caller = search_alloc_stat(0, s_alloc->call_site,
 248                                             &root_caller_stat,
 249                                             slab_callsite_cmp);
 250                if (!s_caller)
 251                        return -1;
 252                s_caller->pingpong++;
 253        }
 254        s_alloc->alloc_cpu = -1;
 255
 256        return 0;
 257}
 258
 259static u64 total_page_alloc_bytes;
 260static u64 total_page_free_bytes;
 261static u64 total_page_nomatch_bytes;
 262static u64 total_page_fail_bytes;
 263static unsigned long nr_page_allocs;
 264static unsigned long nr_page_frees;
 265static unsigned long nr_page_fails;
 266static unsigned long nr_page_nomatch;
 267
 268static bool use_pfn;
 269static bool live_page;
 270static struct perf_session *kmem_session;
 271
 272#define MAX_MIGRATE_TYPES  6
 273#define MAX_PAGE_ORDER     11
 274
 275static int order_stats[MAX_PAGE_ORDER][MAX_MIGRATE_TYPES];
 276
 277struct page_stat {
 278        struct rb_node  node;
 279        u64             page;
 280        u64             callsite;
 281        int             order;
 282        unsigned        gfp_flags;
 283        unsigned        migrate_type;
 284        u64             alloc_bytes;
 285        u64             free_bytes;
 286        int             nr_alloc;
 287        int             nr_free;
 288};
 289
 290static struct rb_root page_live_tree;
 291static struct rb_root page_alloc_tree;
 292static struct rb_root page_alloc_sorted;
 293static struct rb_root page_caller_tree;
 294static struct rb_root page_caller_sorted;
 295
 296struct alloc_func {
 297        u64 start;
 298        u64 end;
 299        char *name;
 300};
 301
 302static int nr_alloc_funcs;
 303static struct alloc_func *alloc_func_list;
 304
 305static int funcmp(const void *a, const void *b)
 306{
 307        const struct alloc_func *fa = a;
 308        const struct alloc_func *fb = b;
 309
 310        if (fa->start > fb->start)
 311                return 1;
 312        else
 313                return -1;
 314}
 315
 316static int callcmp(const void *a, const void *b)
 317{
 318        const struct alloc_func *fa = a;
 319        const struct alloc_func *fb = b;
 320
 321        if (fb->start <= fa->start && fa->end < fb->end)
 322                return 0;
 323
 324        if (fa->start > fb->start)
 325                return 1;
 326        else
 327                return -1;
 328}
 329
 330static int build_alloc_func_list(void)
 331{
 332        int ret;
 333        struct map *kernel_map;
 334        struct symbol *sym;
 335        struct rb_node *node;
 336        struct alloc_func *func;
 337        struct machine *machine = &kmem_session->machines.host;
 338        regex_t alloc_func_regex;
 339        static const char pattern[] = "^_?_?(alloc|get_free|get_zeroed)_pages?";
 340
 341        ret = regcomp(&alloc_func_regex, pattern, REG_EXTENDED);
 342        if (ret) {
 343                char err[BUFSIZ];
 344
 345                regerror(ret, &alloc_func_regex, err, sizeof(err));
 346                pr_err("Invalid regex: %s\n%s", pattern, err);
 347                return -EINVAL;
 348        }
 349
 350        kernel_map = machine__kernel_map(machine);
 351        if (map__load(kernel_map) < 0) {
 352                pr_err("cannot load kernel map\n");
 353                return -ENOENT;
 354        }
 355
 356        map__for_each_symbol(kernel_map, sym, node) {
 357                if (regexec(&alloc_func_regex, sym->name, 0, NULL, 0))
 358                        continue;
 359
 360                func = realloc(alloc_func_list,
 361                               (nr_alloc_funcs + 1) * sizeof(*func));
 362                if (func == NULL)
 363                        return -ENOMEM;
 364
 365                pr_debug("alloc func: %s\n", sym->name);
 366                func[nr_alloc_funcs].start = sym->start;
 367                func[nr_alloc_funcs].end   = sym->end;
 368                func[nr_alloc_funcs].name  = sym->name;
 369
 370                alloc_func_list = func;
 371                nr_alloc_funcs++;
 372        }
 373
 374        qsort(alloc_func_list, nr_alloc_funcs, sizeof(*func), funcmp);
 375
 376        regfree(&alloc_func_regex);
 377        return 0;
 378}
 379
 380/*
 381 * Find first non-memory allocation function from callchain.
 382 * The allocation functions are in the 'alloc_func_list'.
 383 */
 384static u64 find_callsite(struct evsel *evsel, struct perf_sample *sample)
 385{
 386        struct addr_location al;
 387        struct machine *machine = &kmem_session->machines.host;
 388        struct callchain_cursor_node *node;
 389
 390        if (alloc_func_list == NULL) {
 391                if (build_alloc_func_list() < 0)
 392                        goto out;
 393        }
 394
 395        al.thread = machine__findnew_thread(machine, sample->pid, sample->tid);
 396        sample__resolve_callchain(sample, &callchain_cursor, NULL, evsel, &al, 16);
 397
 398        callchain_cursor_commit(&callchain_cursor);
 399        while (true) {
 400                struct alloc_func key, *caller;
 401                u64 addr;
 402
 403                node = callchain_cursor_current(&callchain_cursor);
 404                if (node == NULL)
 405                        break;
 406
 407                key.start = key.end = node->ip;
 408                caller = bsearch(&key, alloc_func_list, nr_alloc_funcs,
 409                                 sizeof(key), callcmp);
 410                if (!caller) {
 411                        /* found */
 412                        if (node->ms.map)
 413                                addr = map__unmap_ip(node->ms.map, node->ip);
 414                        else
 415                                addr = node->ip;
 416
 417                        return addr;
 418                } else
 419                        pr_debug3("skipping alloc function: %s\n", caller->name);
 420
 421                callchain_cursor_advance(&callchain_cursor);
 422        }
 423
 424out:
 425        pr_debug2("unknown callsite: %"PRIx64 "\n", sample->ip);
 426        return sample->ip;
 427}
 428
 429struct sort_dimension {
 430        const char              name[20];
 431        sort_fn_t               cmp;
 432        struct list_head        list;
 433};
 434
 435static LIST_HEAD(page_alloc_sort_input);
 436static LIST_HEAD(page_caller_sort_input);
 437
 438static struct page_stat *
 439__page_stat__findnew_page(struct page_stat *pstat, bool create)
 440{
 441        struct rb_node **node = &page_live_tree.rb_node;
 442        struct rb_node *parent = NULL;
 443        struct page_stat *data;
 444
 445        while (*node) {
 446                s64 cmp;
 447
 448                parent = *node;
 449                data = rb_entry(*node, struct page_stat, node);
 450
 451                cmp = data->page - pstat->page;
 452                if (cmp < 0)
 453                        node = &parent->rb_left;
 454                else if (cmp > 0)
 455                        node = &parent->rb_right;
 456                else
 457                        return data;
 458        }
 459
 460        if (!create)
 461                return NULL;
 462
 463        data = zalloc(sizeof(*data));
 464        if (data != NULL) {
 465                data->page = pstat->page;
 466                data->order = pstat->order;
 467                data->gfp_flags = pstat->gfp_flags;
 468                data->migrate_type = pstat->migrate_type;
 469
 470                rb_link_node(&data->node, parent, node);
 471                rb_insert_color(&data->node, &page_live_tree);
 472        }
 473
 474        return data;
 475}
 476
 477static struct page_stat *page_stat__find_page(struct page_stat *pstat)
 478{
 479        return __page_stat__findnew_page(pstat, false);
 480}
 481
 482static struct page_stat *page_stat__findnew_page(struct page_stat *pstat)
 483{
 484        return __page_stat__findnew_page(pstat, true);
 485}
 486
 487static struct page_stat *
 488__page_stat__findnew_alloc(struct page_stat *pstat, bool create)
 489{
 490        struct rb_node **node = &page_alloc_tree.rb_node;
 491        struct rb_node *parent = NULL;
 492        struct page_stat *data;
 493        struct sort_dimension *sort;
 494
 495        while (*node) {
 496                int cmp = 0;
 497
 498                parent = *node;
 499                data = rb_entry(*node, struct page_stat, node);
 500
 501                list_for_each_entry(sort, &page_alloc_sort_input, list) {
 502                        cmp = sort->cmp(pstat, data);
 503                        if (cmp)
 504                                break;
 505                }
 506
 507                if (cmp < 0)
 508                        node = &parent->rb_left;
 509                else if (cmp > 0)
 510                        node = &parent->rb_right;
 511                else
 512                        return data;
 513        }
 514
 515        if (!create)
 516                return NULL;
 517
 518        data = zalloc(sizeof(*data));
 519        if (data != NULL) {
 520                data->page = pstat->page;
 521                data->order = pstat->order;
 522                data->gfp_flags = pstat->gfp_flags;
 523                data->migrate_type = pstat->migrate_type;
 524
 525                rb_link_node(&data->node, parent, node);
 526                rb_insert_color(&data->node, &page_alloc_tree);
 527        }
 528
 529        return data;
 530}
 531
 532static struct page_stat *page_stat__find_alloc(struct page_stat *pstat)
 533{
 534        return __page_stat__findnew_alloc(pstat, false);
 535}
 536
 537static struct page_stat *page_stat__findnew_alloc(struct page_stat *pstat)
 538{
 539        return __page_stat__findnew_alloc(pstat, true);
 540}
 541
 542static struct page_stat *
 543__page_stat__findnew_caller(struct page_stat *pstat, bool create)
 544{
 545        struct rb_node **node = &page_caller_tree.rb_node;
 546        struct rb_node *parent = NULL;
 547        struct page_stat *data;
 548        struct sort_dimension *sort;
 549
 550        while (*node) {
 551                int cmp = 0;
 552
 553                parent = *node;
 554                data = rb_entry(*node, struct page_stat, node);
 555
 556                list_for_each_entry(sort, &page_caller_sort_input, list) {
 557                        cmp = sort->cmp(pstat, data);
 558                        if (cmp)
 559                                break;
 560                }
 561
 562                if (cmp < 0)
 563                        node = &parent->rb_left;
 564                else if (cmp > 0)
 565                        node = &parent->rb_right;
 566                else
 567                        return data;
 568        }
 569
 570        if (!create)
 571                return NULL;
 572
 573        data = zalloc(sizeof(*data));
 574        if (data != NULL) {
 575                data->callsite = pstat->callsite;
 576                data->order = pstat->order;
 577                data->gfp_flags = pstat->gfp_flags;
 578                data->migrate_type = pstat->migrate_type;
 579
 580                rb_link_node(&data->node, parent, node);
 581                rb_insert_color(&data->node, &page_caller_tree);
 582        }
 583
 584        return data;
 585}
 586
 587static struct page_stat *page_stat__find_caller(struct page_stat *pstat)
 588{
 589        return __page_stat__findnew_caller(pstat, false);
 590}
 591
 592static struct page_stat *page_stat__findnew_caller(struct page_stat *pstat)
 593{
 594        return __page_stat__findnew_caller(pstat, true);
 595}
 596
 597static bool valid_page(u64 pfn_or_page)
 598{
 599        if (use_pfn && pfn_or_page == -1UL)
 600                return false;
 601        if (!use_pfn && pfn_or_page == 0)
 602                return false;
 603        return true;
 604}
 605
 606struct gfp_flag {
 607        unsigned int flags;
 608        char *compact_str;
 609        char *human_readable;
 610};
 611
 612static struct gfp_flag *gfps;
 613static int nr_gfps;
 614
 615static int gfpcmp(const void *a, const void *b)
 616{
 617        const struct gfp_flag *fa = a;
 618        const struct gfp_flag *fb = b;
 619
 620        return fa->flags - fb->flags;
 621}
 622
 623/* see include/trace/events/mmflags.h */
 624static const struct {
 625        const char *original;
 626        const char *compact;
 627} gfp_compact_table[] = {
 628        { "GFP_TRANSHUGE",              "THP" },
 629        { "GFP_TRANSHUGE_LIGHT",        "THL" },
 630        { "GFP_HIGHUSER_MOVABLE",       "HUM" },
 631        { "GFP_HIGHUSER",               "HU" },
 632        { "GFP_USER",                   "U" },
 633        { "GFP_KERNEL_ACCOUNT",         "KAC" },
 634        { "GFP_KERNEL",                 "K" },
 635        { "GFP_NOFS",                   "NF" },
 636        { "GFP_ATOMIC",                 "A" },
 637        { "GFP_NOIO",                   "NI" },
 638        { "GFP_NOWAIT",                 "NW" },
 639        { "GFP_DMA",                    "D" },
 640        { "__GFP_HIGHMEM",              "HM" },
 641        { "GFP_DMA32",                  "D32" },
 642        { "__GFP_HIGH",                 "H" },
 643        { "__GFP_ATOMIC",               "_A" },
 644        { "__GFP_IO",                   "I" },
 645        { "__GFP_FS",                   "F" },
 646        { "__GFP_NOWARN",               "NWR" },
 647        { "__GFP_RETRY_MAYFAIL",        "R" },
 648        { "__GFP_NOFAIL",               "NF" },
 649        { "__GFP_NORETRY",              "NR" },
 650        { "__GFP_COMP",                 "C" },
 651        { "__GFP_ZERO",                 "Z" },
 652        { "__GFP_NOMEMALLOC",           "NMA" },
 653        { "__GFP_MEMALLOC",             "MA" },
 654        { "__GFP_HARDWALL",             "HW" },
 655        { "__GFP_THISNODE",             "TN" },
 656        { "__GFP_RECLAIMABLE",          "RC" },
 657        { "__GFP_MOVABLE",              "M" },
 658        { "__GFP_ACCOUNT",              "AC" },
 659        { "__GFP_WRITE",                "WR" },
 660        { "__GFP_RECLAIM",              "R" },
 661        { "__GFP_DIRECT_RECLAIM",       "DR" },
 662        { "__GFP_KSWAPD_RECLAIM",       "KR" },
 663};
 664
 665static size_t max_gfp_len;
 666
 667static char *compact_gfp_flags(char *gfp_flags)
 668{
 669        char *orig_flags = strdup(gfp_flags);
 670        char *new_flags = NULL;
 671        char *str, *pos = NULL;
 672        size_t len = 0;
 673
 674        if (orig_flags == NULL)
 675                return NULL;
 676
 677        str = strtok_r(orig_flags, "|", &pos);
 678        while (str) {
 679                size_t i;
 680                char *new;
 681                const char *cpt;
 682
 683                for (i = 0; i < ARRAY_SIZE(gfp_compact_table); i++) {
 684                        if (strcmp(gfp_compact_table[i].original, str))
 685                                continue;
 686
 687                        cpt = gfp_compact_table[i].compact;
 688                        new = realloc(new_flags, len + strlen(cpt) + 2);
 689                        if (new == NULL) {
 690                                free(new_flags);
 691                                free(orig_flags);
 692                                return NULL;
 693                        }
 694
 695                        new_flags = new;
 696
 697                        if (!len) {
 698                                strcpy(new_flags, cpt);
 699                        } else {
 700                                strcat(new_flags, "|");
 701                                strcat(new_flags, cpt);
 702                                len++;
 703                        }
 704
 705                        len += strlen(cpt);
 706                }
 707
 708                str = strtok_r(NULL, "|", &pos);
 709        }
 710
 711        if (max_gfp_len < len)
 712                max_gfp_len = len;
 713
 714        free(orig_flags);
 715        return new_flags;
 716}
 717
 718static char *compact_gfp_string(unsigned long gfp_flags)
 719{
 720        struct gfp_flag key = {
 721                .flags = gfp_flags,
 722        };
 723        struct gfp_flag *gfp;
 724
 725        gfp = bsearch(&key, gfps, nr_gfps, sizeof(*gfps), gfpcmp);
 726        if (gfp)
 727                return gfp->compact_str;
 728
 729        return NULL;
 730}
 731
 732static int parse_gfp_flags(struct evsel *evsel, struct perf_sample *sample,
 733                           unsigned int gfp_flags)
 734{
 735        struct tep_record record = {
 736                .cpu = sample->cpu,
 737                .data = sample->raw_data,
 738                .size = sample->raw_size,
 739        };
 740        struct trace_seq seq;
 741        char *str, *pos = NULL;
 742
 743        if (nr_gfps) {
 744                struct gfp_flag key = {
 745                        .flags = gfp_flags,
 746                };
 747
 748                if (bsearch(&key, gfps, nr_gfps, sizeof(*gfps), gfpcmp))
 749                        return 0;
 750        }
 751
 752        trace_seq_init(&seq);
 753        tep_print_event(evsel->tp_format->tep,
 754                        &seq, &record, "%s", TEP_PRINT_INFO);
 755
 756        str = strtok_r(seq.buffer, " ", &pos);
 757        while (str) {
 758                if (!strncmp(str, "gfp_flags=", 10)) {
 759                        struct gfp_flag *new;
 760
 761                        new = realloc(gfps, (nr_gfps + 1) * sizeof(*gfps));
 762                        if (new == NULL)
 763                                return -ENOMEM;
 764
 765                        gfps = new;
 766                        new += nr_gfps++;
 767
 768                        new->flags = gfp_flags;
 769                        new->human_readable = strdup(str + 10);
 770                        new->compact_str = compact_gfp_flags(str + 10);
 771                        if (!new->human_readable || !new->compact_str)
 772                                return -ENOMEM;
 773
 774                        qsort(gfps, nr_gfps, sizeof(*gfps), gfpcmp);
 775                }
 776
 777                str = strtok_r(NULL, " ", &pos);
 778        }
 779
 780        trace_seq_destroy(&seq);
 781        return 0;
 782}
 783
 784static int evsel__process_page_alloc_event(struct evsel *evsel, struct perf_sample *sample)
 785{
 786        u64 page;
 787        unsigned int order = evsel__intval(evsel, sample, "order");
 788        unsigned int gfp_flags = evsel__intval(evsel, sample, "gfp_flags");
 789        unsigned int migrate_type = evsel__intval(evsel, sample,
 790                                                       "migratetype");
 791        u64 bytes = kmem_page_size << order;
 792        u64 callsite;
 793        struct page_stat *pstat;
 794        struct page_stat this = {
 795                .order = order,
 796                .gfp_flags = gfp_flags,
 797                .migrate_type = migrate_type,
 798        };
 799
 800        if (use_pfn)
 801                page = evsel__intval(evsel, sample, "pfn");
 802        else
 803                page = evsel__intval(evsel, sample, "page");
 804
 805        nr_page_allocs++;
 806        total_page_alloc_bytes += bytes;
 807
 808        if (!valid_page(page)) {
 809                nr_page_fails++;
 810                total_page_fail_bytes += bytes;
 811
 812                return 0;
 813        }
 814
 815        if (parse_gfp_flags(evsel, sample, gfp_flags) < 0)
 816                return -1;
 817
 818        callsite = find_callsite(evsel, sample);
 819
 820        /*
 821         * This is to find the current page (with correct gfp flags and
 822         * migrate type) at free event.
 823         */
 824        this.page = page;
 825        pstat = page_stat__findnew_page(&this);
 826        if (pstat == NULL)
 827                return -ENOMEM;
 828
 829        pstat->nr_alloc++;
 830        pstat->alloc_bytes += bytes;
 831        pstat->callsite = callsite;
 832
 833        if (!live_page) {
 834                pstat = page_stat__findnew_alloc(&this);
 835                if (pstat == NULL)
 836                        return -ENOMEM;
 837
 838                pstat->nr_alloc++;
 839                pstat->alloc_bytes += bytes;
 840                pstat->callsite = callsite;
 841        }
 842
 843        this.callsite = callsite;
 844        pstat = page_stat__findnew_caller(&this);
 845        if (pstat == NULL)
 846                return -ENOMEM;
 847
 848        pstat->nr_alloc++;
 849        pstat->alloc_bytes += bytes;
 850
 851        order_stats[order][migrate_type]++;
 852
 853        return 0;
 854}
 855
 856static int evsel__process_page_free_event(struct evsel *evsel, struct perf_sample *sample)
 857{
 858        u64 page;
 859        unsigned int order = evsel__intval(evsel, sample, "order");
 860        u64 bytes = kmem_page_size << order;
 861        struct page_stat *pstat;
 862        struct page_stat this = {
 863                .order = order,
 864        };
 865
 866        if (use_pfn)
 867                page = evsel__intval(evsel, sample, "pfn");
 868        else
 869                page = evsel__intval(evsel, sample, "page");
 870
 871        nr_page_frees++;
 872        total_page_free_bytes += bytes;
 873
 874        this.page = page;
 875        pstat = page_stat__find_page(&this);
 876        if (pstat == NULL) {
 877                pr_debug2("missing free at page %"PRIx64" (order: %d)\n",
 878                          page, order);
 879
 880                nr_page_nomatch++;
 881                total_page_nomatch_bytes += bytes;
 882
 883                return 0;
 884        }
 885
 886        this.gfp_flags = pstat->gfp_flags;
 887        this.migrate_type = pstat->migrate_type;
 888        this.callsite = pstat->callsite;
 889
 890        rb_erase(&pstat->node, &page_live_tree);
 891        free(pstat);
 892
 893        if (live_page) {
 894                order_stats[this.order][this.migrate_type]--;
 895        } else {
 896                pstat = page_stat__find_alloc(&this);
 897                if (pstat == NULL)
 898                        return -ENOMEM;
 899
 900                pstat->nr_free++;
 901                pstat->free_bytes += bytes;
 902        }
 903
 904        pstat = page_stat__find_caller(&this);
 905        if (pstat == NULL)
 906                return -ENOENT;
 907
 908        pstat->nr_free++;
 909        pstat->free_bytes += bytes;
 910
 911        if (live_page) {
 912                pstat->nr_alloc--;
 913                pstat->alloc_bytes -= bytes;
 914
 915                if (pstat->nr_alloc == 0) {
 916                        rb_erase(&pstat->node, &page_caller_tree);
 917                        free(pstat);
 918                }
 919        }
 920
 921        return 0;
 922}
 923
 924static bool perf_kmem__skip_sample(struct perf_sample *sample)
 925{
 926        /* skip sample based on time? */
 927        if (perf_time__skip_sample(&ptime, sample->time))
 928                return true;
 929
 930        return false;
 931}
 932
 933typedef int (*tracepoint_handler)(struct evsel *evsel,
 934                                  struct perf_sample *sample);
 935
 936static int process_sample_event(struct perf_tool *tool __maybe_unused,
 937                                union perf_event *event,
 938                                struct perf_sample *sample,
 939                                struct evsel *evsel,
 940                                struct machine *machine)
 941{
 942        int err = 0;
 943        struct thread *thread = machine__findnew_thread(machine, sample->pid,
 944                                                        sample->tid);
 945
 946        if (thread == NULL) {
 947                pr_debug("problem processing %d event, skipping it.\n",
 948                         event->header.type);
 949                return -1;
 950        }
 951
 952        if (perf_kmem__skip_sample(sample))
 953                return 0;
 954
 955        dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid);
 956
 957        if (evsel->handler != NULL) {
 958                tracepoint_handler f = evsel->handler;
 959                err = f(evsel, sample);
 960        }
 961
 962        thread__put(thread);
 963
 964        return err;
 965}
 966
 967static struct perf_tool perf_kmem = {
 968        .sample          = process_sample_event,
 969        .comm            = perf_event__process_comm,
 970        .mmap            = perf_event__process_mmap,
 971        .mmap2           = perf_event__process_mmap2,
 972        .namespaces      = perf_event__process_namespaces,
 973        .ordered_events  = true,
 974};
 975
 976static double fragmentation(unsigned long n_req, unsigned long n_alloc)
 977{
 978        if (n_alloc == 0)
 979                return 0.0;
 980        else
 981                return 100.0 - (100.0 * n_req / n_alloc);
 982}
 983
 984static void __print_slab_result(struct rb_root *root,
 985                                struct perf_session *session,
 986                                int n_lines, int is_caller)
 987{
 988        struct rb_node *next;
 989        struct machine *machine = &session->machines.host;
 990
 991        printf("%.105s\n", graph_dotted_line);
 992        printf(" %-34s |",  is_caller ? "Callsite": "Alloc Ptr");
 993        printf(" Total_alloc/Per | Total_req/Per   | Hit      | Ping-pong | Frag\n");
 994        printf("%.105s\n", graph_dotted_line);
 995
 996        next = rb_first(root);
 997
 998        while (next && n_lines--) {
 999                struct alloc_stat *data = rb_entry(next, struct alloc_stat,
1000                                                   node);
1001                struct symbol *sym = NULL;
1002                struct map *map;
1003                char buf[BUFSIZ];
1004                u64 addr;
1005
1006                if (is_caller) {
1007                        addr = data->call_site;
1008                        if (!raw_ip)
1009                                sym = machine__find_kernel_symbol(machine, addr, &map);
1010                } else
1011                        addr = data->ptr;
1012
1013                if (sym != NULL)
1014                        snprintf(buf, sizeof(buf), "%s+%" PRIx64 "", sym->name,
1015                                 addr - map->unmap_ip(map, sym->start));
1016                else
1017                        snprintf(buf, sizeof(buf), "%#" PRIx64 "", addr);
1018                printf(" %-34s |", buf);
1019
1020                printf(" %9llu/%-5lu | %9llu/%-5lu | %8lu | %9lu | %6.3f%%\n",
1021                       (unsigned long long)data->bytes_alloc,
1022                       (unsigned long)data->bytes_alloc / data->hit,
1023                       (unsigned long long)data->bytes_req,
1024                       (unsigned long)data->bytes_req / data->hit,
1025                       (unsigned long)data->hit,
1026                       (unsigned long)data->pingpong,
1027                       fragmentation(data->bytes_req, data->bytes_alloc));
1028
1029                next = rb_next(next);
1030        }
1031
1032        if (n_lines == -1)
1033                printf(" ...                                | ...             | ...             | ...      | ...       | ...   \n");
1034
1035        printf("%.105s\n", graph_dotted_line);
1036}
1037
1038static const char * const migrate_type_str[] = {
1039        "UNMOVABL",
1040        "RECLAIM",
1041        "MOVABLE",
1042        "RESERVED",
1043        "CMA/ISLT",
1044        "UNKNOWN",
1045};
1046
1047static void __print_page_alloc_result(struct perf_session *session, int n_lines)
1048{
1049        struct rb_node *next = rb_first(&page_alloc_sorted);
1050        struct machine *machine = &session->machines.host;
1051        const char *format;
1052        int gfp_len = max(strlen("GFP flags"), max_gfp_len);
1053
1054        printf("\n%.105s\n", graph_dotted_line);
1055        printf(" %-16s | %5s alloc (KB) | Hits      | Order | Mig.type | %-*s | Callsite\n",
1056               use_pfn ? "PFN" : "Page", live_page ? "Live" : "Total",
1057               gfp_len, "GFP flags");
1058        printf("%.105s\n", graph_dotted_line);
1059
1060        if (use_pfn)
1061                format = " %16llu | %'16llu | %'9d | %5d | %8s | %-*s | %s\n";
1062        else
1063                format = " %016llx | %'16llu | %'9d | %5d | %8s | %-*s | %s\n";
1064
1065        while (next && n_lines--) {
1066                struct page_stat *data;
1067                struct symbol *sym;
1068                struct map *map;
1069                char buf[32];
1070                char *caller = buf;
1071
1072                data = rb_entry(next, struct page_stat, node);
1073                sym = machine__find_kernel_symbol(machine, data->callsite, &map);
1074                if (sym)
1075                        caller = sym->name;
1076                else
1077                        scnprintf(buf, sizeof(buf), "%"PRIx64, data->callsite);
1078
1079                printf(format, (unsigned long long)data->page,
1080                       (unsigned long long)data->alloc_bytes / 1024,
1081                       data->nr_alloc, data->order,
1082                       migrate_type_str[data->migrate_type],
1083                       gfp_len, compact_gfp_string(data->gfp_flags), caller);
1084
1085                next = rb_next(next);
1086        }
1087
1088        if (n_lines == -1) {
1089                printf(" ...              | ...              | ...       | ...   | ...      | %-*s | ...\n",
1090                       gfp_len, "...");
1091        }
1092
1093        printf("%.105s\n", graph_dotted_line);
1094}
1095
1096static void __print_page_caller_result(struct perf_session *session, int n_lines)
1097{
1098        struct rb_node *next = rb_first(&page_caller_sorted);
1099        struct machine *machine = &session->machines.host;
1100        int gfp_len = max(strlen("GFP flags"), max_gfp_len);
1101
1102        printf("\n%.105s\n", graph_dotted_line);
1103        printf(" %5s alloc (KB) | Hits      | Order | Mig.type | %-*s | Callsite\n",
1104               live_page ? "Live" : "Total", gfp_len, "GFP flags");
1105        printf("%.105s\n", graph_dotted_line);
1106
1107        while (next && n_lines--) {
1108                struct page_stat *data;
1109                struct symbol *sym;
1110                struct map *map;
1111                char buf[32];
1112                char *caller = buf;
1113
1114                data = rb_entry(next, struct page_stat, node);
1115                sym = machine__find_kernel_symbol(machine, data->callsite, &map);
1116                if (sym)
1117                        caller = sym->name;
1118                else
1119                        scnprintf(buf, sizeof(buf), "%"PRIx64, data->callsite);
1120
1121                printf(" %'16llu | %'9d | %5d | %8s | %-*s | %s\n",
1122                       (unsigned long long)data->alloc_bytes / 1024,
1123                       data->nr_alloc, data->order,
1124                       migrate_type_str[data->migrate_type],
1125                       gfp_len, compact_gfp_string(data->gfp_flags), caller);
1126
1127                next = rb_next(next);
1128        }
1129
1130        if (n_lines == -1) {
1131                printf(" ...              | ...       | ...   | ...      | %-*s | ...\n",
1132                       gfp_len, "...");
1133        }
1134
1135        printf("%.105s\n", graph_dotted_line);
1136}
1137
1138static void print_gfp_flags(void)
1139{
1140        int i;
1141
1142        printf("#\n");
1143        printf("# GFP flags\n");
1144        printf("# ---------\n");
1145        for (i = 0; i < nr_gfps; i++) {
1146                printf("# %08x: %*s: %s\n", gfps[i].flags,
1147                       (int) max_gfp_len, gfps[i].compact_str,
1148                       gfps[i].human_readable);
1149        }
1150}
1151
1152static void print_slab_summary(void)
1153{
1154        printf("\nSUMMARY (SLAB allocator)");
1155        printf("\n========================\n");
1156        printf("Total bytes requested: %'lu\n", total_requested);
1157        printf("Total bytes allocated: %'lu\n", total_allocated);
1158        printf("Total bytes freed:     %'lu\n", total_freed);
1159        if (total_allocated > total_freed) {
1160                printf("Net total bytes allocated: %'lu\n",
1161                total_allocated - total_freed);
1162        }
1163        printf("Total bytes wasted on internal fragmentation: %'lu\n",
1164               total_allocated - total_requested);
1165        printf("Internal fragmentation: %f%%\n",
1166               fragmentation(total_requested, total_allocated));
1167        printf("Cross CPU allocations: %'lu/%'lu\n", nr_cross_allocs, nr_allocs);
1168}
1169
1170static void print_page_summary(void)
1171{
1172        int o, m;
1173        u64 nr_alloc_freed = nr_page_frees - nr_page_nomatch;
1174        u64 total_alloc_freed_bytes = total_page_free_bytes - total_page_nomatch_bytes;
1175
1176        printf("\nSUMMARY (page allocator)");
1177        printf("\n========================\n");
1178        printf("%-30s: %'16lu   [ %'16"PRIu64" KB ]\n", "Total allocation requests",
1179               nr_page_allocs, total_page_alloc_bytes / 1024);
1180        printf("%-30s: %'16lu   [ %'16"PRIu64" KB ]\n", "Total free requests",
1181               nr_page_frees, total_page_free_bytes / 1024);
1182        printf("\n");
1183
1184        printf("%-30s: %'16"PRIu64"   [ %'16"PRIu64" KB ]\n", "Total alloc+freed requests",
1185               nr_alloc_freed, (total_alloc_freed_bytes) / 1024);
1186        printf("%-30s: %'16"PRIu64"   [ %'16"PRIu64" KB ]\n", "Total alloc-only requests",
1187               nr_page_allocs - nr_alloc_freed,
1188               (total_page_alloc_bytes - total_alloc_freed_bytes) / 1024);
1189        printf("%-30s: %'16lu   [ %'16"PRIu64" KB ]\n", "Total free-only requests",
1190               nr_page_nomatch, total_page_nomatch_bytes / 1024);
1191        printf("\n");
1192
1193        printf("%-30s: %'16lu   [ %'16"PRIu64" KB ]\n", "Total allocation failures",
1194               nr_page_fails, total_page_fail_bytes / 1024);
1195        printf("\n");
1196
1197        printf("%5s  %12s  %12s  %12s  %12s  %12s\n", "Order",  "Unmovable",
1198               "Reclaimable", "Movable", "Reserved", "CMA/Isolated");
1199        printf("%.5s  %.12s  %.12s  %.12s  %.12s  %.12s\n", graph_dotted_line,
1200               graph_dotted_line, graph_dotted_line, graph_dotted_line,
1201               graph_dotted_line, graph_dotted_line);
1202
1203        for (o = 0; o < MAX_PAGE_ORDER; o++) {
1204                printf("%5d", o);
1205                for (m = 0; m < MAX_MIGRATE_TYPES - 1; m++) {
1206                        if (order_stats[o][m])
1207                                printf("  %'12d", order_stats[o][m]);
1208                        else
1209                                printf("  %12c", '.');
1210                }
1211                printf("\n");
1212        }
1213}
1214
1215static void print_slab_result(struct perf_session *session)
1216{
1217        if (caller_flag)
1218                __print_slab_result(&root_caller_sorted, session, caller_lines, 1);
1219        if (alloc_flag)
1220                __print_slab_result(&root_alloc_sorted, session, alloc_lines, 0);
1221        print_slab_summary();
1222}
1223
1224static void print_page_result(struct perf_session *session)
1225{
1226        if (caller_flag || alloc_flag)
1227                print_gfp_flags();
1228        if (caller_flag)
1229                __print_page_caller_result(session, caller_lines);
1230        if (alloc_flag)
1231                __print_page_alloc_result(session, alloc_lines);
1232        print_page_summary();
1233}
1234
1235static void print_result(struct perf_session *session)
1236{
1237        if (kmem_slab)
1238                print_slab_result(session);
1239        if (kmem_page)
1240                print_page_result(session);
1241}
1242
1243static LIST_HEAD(slab_caller_sort);
1244static LIST_HEAD(slab_alloc_sort);
1245static LIST_HEAD(page_caller_sort);
1246static LIST_HEAD(page_alloc_sort);
1247
1248static void sort_slab_insert(struct rb_root *root, struct alloc_stat *data,
1249                             struct list_head *sort_list)
1250{
1251        struct rb_node **new = &(root->rb_node);
1252        struct rb_node *parent = NULL;
1253        struct sort_dimension *sort;
1254
1255        while (*new) {
1256                struct alloc_stat *this;
1257                int cmp = 0;
1258
1259                this = rb_entry(*new, struct alloc_stat, node);
1260                parent = *new;
1261
1262                list_for_each_entry(sort, sort_list, list) {
1263                        cmp = sort->cmp(data, this);
1264                        if (cmp)
1265                                break;
1266                }
1267
1268                if (cmp > 0)
1269                        new = &((*new)->rb_left);
1270                else
1271                        new = &((*new)->rb_right);
1272        }
1273
1274        rb_link_node(&data->node, parent, new);
1275        rb_insert_color(&data->node, root);
1276}
1277
1278static void __sort_slab_result(struct rb_root *root, struct rb_root *root_sorted,
1279                               struct list_head *sort_list)
1280{
1281        struct rb_node *node;
1282        struct alloc_stat *data;
1283
1284        for (;;) {
1285                node = rb_first(root);
1286                if (!node)
1287                        break;
1288
1289                rb_erase(node, root);
1290                data = rb_entry(node, struct alloc_stat, node);
1291                sort_slab_insert(root_sorted, data, sort_list);
1292        }
1293}
1294
1295static void sort_page_insert(struct rb_root *root, struct page_stat *data,
1296                             struct list_head *sort_list)
1297{
1298        struct rb_node **new = &root->rb_node;
1299        struct rb_node *parent = NULL;
1300        struct sort_dimension *sort;
1301
1302        while (*new) {
1303                struct page_stat *this;
1304                int cmp = 0;
1305
1306                this = rb_entry(*new, struct page_stat, node);
1307                parent = *new;
1308
1309                list_for_each_entry(sort, sort_list, list) {
1310                        cmp = sort->cmp(data, this);
1311                        if (cmp)
1312                                break;
1313                }
1314
1315                if (cmp > 0)
1316                        new = &parent->rb_left;
1317                else
1318                        new = &parent->rb_right;
1319        }
1320
1321        rb_link_node(&data->node, parent, new);
1322        rb_insert_color(&data->node, root);
1323}
1324
1325static void __sort_page_result(struct rb_root *root, struct rb_root *root_sorted,
1326                               struct list_head *sort_list)
1327{
1328        struct rb_node *node;
1329        struct page_stat *data;
1330
1331        for (;;) {
1332                node = rb_first(root);
1333                if (!node)
1334                        break;
1335
1336                rb_erase(node, root);
1337                data = rb_entry(node, struct page_stat, node);
1338                sort_page_insert(root_sorted, data, sort_list);
1339        }
1340}
1341
1342static void sort_result(void)
1343{
1344        if (kmem_slab) {
1345                __sort_slab_result(&root_alloc_stat, &root_alloc_sorted,
1346                                   &slab_alloc_sort);
1347                __sort_slab_result(&root_caller_stat, &root_caller_sorted,
1348                                   &slab_caller_sort);
1349        }
1350        if (kmem_page) {
1351                if (live_page)
1352                        __sort_page_result(&page_live_tree, &page_alloc_sorted,
1353                                           &page_alloc_sort);
1354                else
1355                        __sort_page_result(&page_alloc_tree, &page_alloc_sorted,
1356                                           &page_alloc_sort);
1357
1358                __sort_page_result(&page_caller_tree, &page_caller_sorted,
1359                                   &page_caller_sort);
1360        }
1361}
1362
1363static int __cmd_kmem(struct perf_session *session)
1364{
1365        int err = -EINVAL;
1366        struct evsel *evsel;
1367        const struct evsel_str_handler kmem_tracepoints[] = {
1368                /* slab allocator */
1369                { "kmem:kmalloc",               evsel__process_alloc_event, },
1370                { "kmem:kmem_cache_alloc",      evsel__process_alloc_event, },
1371                { "kmem:kmalloc_node",          evsel__process_alloc_node_event, },
1372                { "kmem:kmem_cache_alloc_node", evsel__process_alloc_node_event, },
1373                { "kmem:kfree",                 evsel__process_free_event, },
1374                { "kmem:kmem_cache_free",       evsel__process_free_event, },
1375                /* page allocator */
1376                { "kmem:mm_page_alloc",         evsel__process_page_alloc_event, },
1377                { "kmem:mm_page_free",          evsel__process_page_free_event, },
1378        };
1379
1380        if (!perf_session__has_traces(session, "kmem record"))
1381                goto out;
1382
1383        if (perf_session__set_tracepoints_handlers(session, kmem_tracepoints)) {
1384                pr_err("Initializing perf session tracepoint handlers failed\n");
1385                goto out;
1386        }
1387
1388        evlist__for_each_entry(session->evlist, evsel) {
1389                if (!strcmp(evsel__name(evsel), "kmem:mm_page_alloc") &&
1390                    evsel__field(evsel, "pfn")) {
1391                        use_pfn = true;
1392                        break;
1393                }
1394        }
1395
1396        setup_pager();
1397        err = perf_session__process_events(session);
1398        if (err != 0) {
1399                pr_err("error during process events: %d\n", err);
1400                goto out;
1401        }
1402        sort_result();
1403        print_result(session);
1404out:
1405        return err;
1406}
1407
1408/* slab sort keys */
1409static int ptr_cmp(void *a, void *b)
1410{
1411        struct alloc_stat *l = a;
1412        struct alloc_stat *r = b;
1413
1414        if (l->ptr < r->ptr)
1415                return -1;
1416        else if (l->ptr > r->ptr)
1417                return 1;
1418        return 0;
1419}
1420
1421static struct sort_dimension ptr_sort_dimension = {
1422        .name   = "ptr",
1423        .cmp    = ptr_cmp,
1424};
1425
1426static int slab_callsite_cmp(void *a, void *b)
1427{
1428        struct alloc_stat *l = a;
1429        struct alloc_stat *r = b;
1430
1431        if (l->call_site < r->call_site)
1432                return -1;
1433        else if (l->call_site > r->call_site)
1434                return 1;
1435        return 0;
1436}
1437
1438static struct sort_dimension callsite_sort_dimension = {
1439        .name   = "callsite",
1440        .cmp    = slab_callsite_cmp,
1441};
1442
1443static int hit_cmp(void *a, void *b)
1444{
1445        struct alloc_stat *l = a;
1446        struct alloc_stat *r = b;
1447
1448        if (l->hit < r->hit)
1449                return -1;
1450        else if (l->hit > r->hit)
1451                return 1;
1452        return 0;
1453}
1454
1455static struct sort_dimension hit_sort_dimension = {
1456        .name   = "hit",
1457        .cmp    = hit_cmp,
1458};
1459
1460static int bytes_cmp(void *a, void *b)
1461{
1462        struct alloc_stat *l = a;
1463        struct alloc_stat *r = b;
1464
1465        if (l->bytes_alloc < r->bytes_alloc)
1466                return -1;
1467        else if (l->bytes_alloc > r->bytes_alloc)
1468                return 1;
1469        return 0;
1470}
1471
1472static struct sort_dimension bytes_sort_dimension = {
1473        .name   = "bytes",
1474        .cmp    = bytes_cmp,
1475};
1476
1477static int frag_cmp(void *a, void *b)
1478{
1479        double x, y;
1480        struct alloc_stat *l = a;
1481        struct alloc_stat *r = b;
1482
1483        x = fragmentation(l->bytes_req, l->bytes_alloc);
1484        y = fragmentation(r->bytes_req, r->bytes_alloc);
1485
1486        if (x < y)
1487                return -1;
1488        else if (x > y)
1489                return 1;
1490        return 0;
1491}
1492
1493static struct sort_dimension frag_sort_dimension = {
1494        .name   = "frag",
1495        .cmp    = frag_cmp,
1496};
1497
1498static int pingpong_cmp(void *a, void *b)
1499{
1500        struct alloc_stat *l = a;
1501        struct alloc_stat *r = b;
1502
1503        if (l->pingpong < r->pingpong)
1504                return -1;
1505        else if (l->pingpong > r->pingpong)
1506                return 1;
1507        return 0;
1508}
1509
1510static struct sort_dimension pingpong_sort_dimension = {
1511        .name   = "pingpong",
1512        .cmp    = pingpong_cmp,
1513};
1514
1515/* page sort keys */
1516static int page_cmp(void *a, void *b)
1517{
1518        struct page_stat *l = a;
1519        struct page_stat *r = b;
1520
1521        if (l->page < r->page)
1522                return -1;
1523        else if (l->page > r->page)
1524                return 1;
1525        return 0;
1526}
1527
1528static struct sort_dimension page_sort_dimension = {
1529        .name   = "page",
1530        .cmp    = page_cmp,
1531};
1532
1533static int page_callsite_cmp(void *a, void *b)
1534{
1535        struct page_stat *l = a;
1536        struct page_stat *r = b;
1537
1538        if (l->callsite < r->callsite)
1539                return -1;
1540        else if (l->callsite > r->callsite)
1541                return 1;
1542        return 0;
1543}
1544
1545static struct sort_dimension page_callsite_sort_dimension = {
1546        .name   = "callsite",
1547        .cmp    = page_callsite_cmp,
1548};
1549
1550static int page_hit_cmp(void *a, void *b)
1551{
1552        struct page_stat *l = a;
1553        struct page_stat *r = b;
1554
1555        if (l->nr_alloc < r->nr_alloc)
1556                return -1;
1557        else if (l->nr_alloc > r->nr_alloc)
1558                return 1;
1559        return 0;
1560}
1561
1562static struct sort_dimension page_hit_sort_dimension = {
1563        .name   = "hit",
1564        .cmp    = page_hit_cmp,
1565};
1566
1567static int page_bytes_cmp(void *a, void *b)
1568{
1569        struct page_stat *l = a;
1570        struct page_stat *r = b;
1571
1572        if (l->alloc_bytes < r->alloc_bytes)
1573                return -1;
1574        else if (l->alloc_bytes > r->alloc_bytes)
1575                return 1;
1576        return 0;
1577}
1578
1579static struct sort_dimension page_bytes_sort_dimension = {
1580        .name   = "bytes",
1581        .cmp    = page_bytes_cmp,
1582};
1583
1584static int page_order_cmp(void *a, void *b)
1585{
1586        struct page_stat *l = a;
1587        struct page_stat *r = b;
1588
1589        if (l->order < r->order)
1590                return -1;
1591        else if (l->order > r->order)
1592                return 1;
1593        return 0;
1594}
1595
1596static struct sort_dimension page_order_sort_dimension = {
1597        .name   = "order",
1598        .cmp    = page_order_cmp,
1599};
1600
1601static int migrate_type_cmp(void *a, void *b)
1602{
1603        struct page_stat *l = a;
1604        struct page_stat *r = b;
1605
1606        /* for internal use to find free'd page */
1607        if (l->migrate_type == -1U)
1608                return 0;
1609
1610        if (l->migrate_type < r->migrate_type)
1611                return -1;
1612        else if (l->migrate_type > r->migrate_type)
1613                return 1;
1614        return 0;
1615}
1616
1617static struct sort_dimension migrate_type_sort_dimension = {
1618        .name   = "migtype",
1619        .cmp    = migrate_type_cmp,
1620};
1621
1622static int gfp_flags_cmp(void *a, void *b)
1623{
1624        struct page_stat *l = a;
1625        struct page_stat *r = b;
1626
1627        /* for internal use to find free'd page */
1628        if (l->gfp_flags == -1U)
1629                return 0;
1630
1631        if (l->gfp_flags < r->gfp_flags)
1632                return -1;
1633        else if (l->gfp_flags > r->gfp_flags)
1634                return 1;
1635        return 0;
1636}
1637
1638static struct sort_dimension gfp_flags_sort_dimension = {
1639        .name   = "gfp",
1640        .cmp    = gfp_flags_cmp,
1641};
1642
1643static struct sort_dimension *slab_sorts[] = {
1644        &ptr_sort_dimension,
1645        &callsite_sort_dimension,
1646        &hit_sort_dimension,
1647        &bytes_sort_dimension,
1648        &frag_sort_dimension,
1649        &pingpong_sort_dimension,
1650};
1651
1652static struct sort_dimension *page_sorts[] = {
1653        &page_sort_dimension,
1654        &page_callsite_sort_dimension,
1655        &page_hit_sort_dimension,
1656        &page_bytes_sort_dimension,
1657        &page_order_sort_dimension,
1658        &migrate_type_sort_dimension,
1659        &gfp_flags_sort_dimension,
1660};
1661
1662static int slab_sort_dimension__add(const char *tok, struct list_head *list)
1663{
1664        struct sort_dimension *sort;
1665        int i;
1666
1667        for (i = 0; i < (int)ARRAY_SIZE(slab_sorts); i++) {
1668                if (!strcmp(slab_sorts[i]->name, tok)) {
1669                        sort = memdup(slab_sorts[i], sizeof(*slab_sorts[i]));
1670                        if (!sort) {
1671                                pr_err("%s: memdup failed\n", __func__);
1672                                return -1;
1673                        }
1674                        list_add_tail(&sort->list, list);
1675                        return 0;
1676                }
1677        }
1678
1679        return -1;
1680}
1681
1682static int page_sort_dimension__add(const char *tok, struct list_head *list)
1683{
1684        struct sort_dimension *sort;
1685        int i;
1686
1687        for (i = 0; i < (int)ARRAY_SIZE(page_sorts); i++) {
1688                if (!strcmp(page_sorts[i]->name, tok)) {
1689                        sort = memdup(page_sorts[i], sizeof(*page_sorts[i]));
1690                        if (!sort) {
1691                                pr_err("%s: memdup failed\n", __func__);
1692                                return -1;
1693                        }
1694                        list_add_tail(&sort->list, list);
1695                        return 0;
1696                }
1697        }
1698
1699        return -1;
1700}
1701
1702static int setup_slab_sorting(struct list_head *sort_list, const char *arg)
1703{
1704        char *tok;
1705        char *str = strdup(arg);
1706        char *pos = str;
1707
1708        if (!str) {
1709                pr_err("%s: strdup failed\n", __func__);
1710                return -1;
1711        }
1712
1713        while (true) {
1714                tok = strsep(&pos, ",");
1715                if (!tok)
1716                        break;
1717                if (slab_sort_dimension__add(tok, sort_list) < 0) {
1718                        pr_err("Unknown slab --sort key: '%s'", tok);
1719                        free(str);
1720                        return -1;
1721                }
1722        }
1723
1724        free(str);
1725        return 0;
1726}
1727
1728static int setup_page_sorting(struct list_head *sort_list, const char *arg)
1729{
1730        char *tok;
1731        char *str = strdup(arg);
1732        char *pos = str;
1733
1734        if (!str) {
1735                pr_err("%s: strdup failed\n", __func__);
1736                return -1;
1737        }
1738
1739        while (true) {
1740                tok = strsep(&pos, ",");
1741                if (!tok)
1742                        break;
1743                if (page_sort_dimension__add(tok, sort_list) < 0) {
1744                        pr_err("Unknown page --sort key: '%s'", tok);
1745                        free(str);
1746                        return -1;
1747                }
1748        }
1749
1750        free(str);
1751        return 0;
1752}
1753
1754static int parse_sort_opt(const struct option *opt __maybe_unused,
1755                          const char *arg, int unset __maybe_unused)
1756{
1757        if (!arg)
1758                return -1;
1759
1760        if (kmem_page > kmem_slab ||
1761            (kmem_page == 0 && kmem_slab == 0 && kmem_default == KMEM_PAGE)) {
1762                if (caller_flag > alloc_flag)
1763                        return setup_page_sorting(&page_caller_sort, arg);
1764                else
1765                        return setup_page_sorting(&page_alloc_sort, arg);
1766        } else {
1767                if (caller_flag > alloc_flag)
1768                        return setup_slab_sorting(&slab_caller_sort, arg);
1769                else
1770                        return setup_slab_sorting(&slab_alloc_sort, arg);
1771        }
1772
1773        return 0;
1774}
1775
1776static int parse_caller_opt(const struct option *opt __maybe_unused,
1777                            const char *arg __maybe_unused,
1778                            int unset __maybe_unused)
1779{
1780        caller_flag = (alloc_flag + 1);
1781        return 0;
1782}
1783
1784static int parse_alloc_opt(const struct option *opt __maybe_unused,
1785                           const char *arg __maybe_unused,
1786                           int unset __maybe_unused)
1787{
1788        alloc_flag = (caller_flag + 1);
1789        return 0;
1790}
1791
1792static int parse_slab_opt(const struct option *opt __maybe_unused,
1793                          const char *arg __maybe_unused,
1794                          int unset __maybe_unused)
1795{
1796        kmem_slab = (kmem_page + 1);
1797        return 0;
1798}
1799
1800static int parse_page_opt(const struct option *opt __maybe_unused,
1801                          const char *arg __maybe_unused,
1802                          int unset __maybe_unused)
1803{
1804        kmem_page = (kmem_slab + 1);
1805        return 0;
1806}
1807
1808static int parse_line_opt(const struct option *opt __maybe_unused,
1809                          const char *arg, int unset __maybe_unused)
1810{
1811        int lines;
1812
1813        if (!arg)
1814                return -1;
1815
1816        lines = strtoul(arg, NULL, 10);
1817
1818        if (caller_flag > alloc_flag)
1819                caller_lines = lines;
1820        else
1821                alloc_lines = lines;
1822
1823        return 0;
1824}
1825
1826static int __cmd_record(int argc, const char **argv)
1827{
1828        const char * const record_args[] = {
1829        "record", "-a", "-R", "-c", "1",
1830        };
1831        const char * const slab_events[] = {
1832        "-e", "kmem:kmalloc",
1833        "-e", "kmem:kmalloc_node",
1834        "-e", "kmem:kfree",
1835        "-e", "kmem:kmem_cache_alloc",
1836        "-e", "kmem:kmem_cache_alloc_node",
1837        "-e", "kmem:kmem_cache_free",
1838        };
1839        const char * const page_events[] = {
1840        "-e", "kmem:mm_page_alloc",
1841        "-e", "kmem:mm_page_free",
1842        };
1843        unsigned int rec_argc, i, j;
1844        const char **rec_argv;
1845
1846        rec_argc = ARRAY_SIZE(record_args) + argc - 1;
1847        if (kmem_slab)
1848                rec_argc += ARRAY_SIZE(slab_events);
1849        if (kmem_page)
1850                rec_argc += ARRAY_SIZE(page_events) + 1; /* for -g */
1851
1852        rec_argv = calloc(rec_argc + 1, sizeof(char *));
1853
1854        if (rec_argv == NULL)
1855                return -ENOMEM;
1856
1857        for (i = 0; i < ARRAY_SIZE(record_args); i++)
1858                rec_argv[i] = strdup(record_args[i]);
1859
1860        if (kmem_slab) {
1861                for (j = 0; j < ARRAY_SIZE(slab_events); j++, i++)
1862                        rec_argv[i] = strdup(slab_events[j]);
1863        }
1864        if (kmem_page) {
1865                rec_argv[i++] = strdup("-g");
1866
1867                for (j = 0; j < ARRAY_SIZE(page_events); j++, i++)
1868                        rec_argv[i] = strdup(page_events[j]);
1869        }
1870
1871        for (j = 1; j < (unsigned int)argc; j++, i++)
1872                rec_argv[i] = argv[j];
1873
1874        return cmd_record(i, rec_argv);
1875}
1876
1877static int kmem_config(const char *var, const char *value, void *cb __maybe_unused)
1878{
1879        if (!strcmp(var, "kmem.default")) {
1880                if (!strcmp(value, "slab"))
1881                        kmem_default = KMEM_SLAB;
1882                else if (!strcmp(value, "page"))
1883                        kmem_default = KMEM_PAGE;
1884                else
1885                        pr_err("invalid default value ('slab' or 'page' required): %s\n",
1886                               value);
1887                return 0;
1888        }
1889
1890        return 0;
1891}
1892
1893int cmd_kmem(int argc, const char **argv)
1894{
1895        const char * const default_slab_sort = "frag,hit,bytes";
1896        const char * const default_page_sort = "bytes,hit";
1897        struct perf_data data = {
1898                .mode = PERF_DATA_MODE_READ,
1899        };
1900        const struct option kmem_options[] = {
1901        OPT_STRING('i', "input", &input_name, "file", "input file name"),
1902        OPT_INCR('v', "verbose", &verbose,
1903                    "be more verbose (show symbol address, etc)"),
1904        OPT_CALLBACK_NOOPT(0, "caller", NULL, NULL,
1905                           "show per-callsite statistics", parse_caller_opt),
1906        OPT_CALLBACK_NOOPT(0, "alloc", NULL, NULL,
1907                           "show per-allocation statistics", parse_alloc_opt),
1908        OPT_CALLBACK('s', "sort", NULL, "key[,key2...]",
1909                     "sort by keys: ptr, callsite, bytes, hit, pingpong, frag, "
1910                     "page, order, migtype, gfp", parse_sort_opt),
1911        OPT_CALLBACK('l', "line", NULL, "num", "show n lines", parse_line_opt),
1912        OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"),
1913        OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"),
1914        OPT_CALLBACK_NOOPT(0, "slab", NULL, NULL, "Analyze slab allocator",
1915                           parse_slab_opt),
1916        OPT_CALLBACK_NOOPT(0, "page", NULL, NULL, "Analyze page allocator",
1917                           parse_page_opt),
1918        OPT_BOOLEAN(0, "live", &live_page, "Show live page stat"),
1919        OPT_STRING(0, "time", &time_str, "str",
1920                   "Time span of interest (start,stop)"),
1921        OPT_END()
1922        };
1923        const char *const kmem_subcommands[] = { "record", "stat", NULL };
1924        const char *kmem_usage[] = {
1925                NULL,
1926                NULL
1927        };
1928        struct perf_session *session;
1929        static const char errmsg[] = "No %s allocation events found.  Have you run 'perf kmem record --%s'?\n";
1930        int ret = perf_config(kmem_config, NULL);
1931
1932        if (ret)
1933                return ret;
1934
1935        argc = parse_options_subcommand(argc, argv, kmem_options,
1936                                        kmem_subcommands, kmem_usage,
1937                                        PARSE_OPT_STOP_AT_NON_OPTION);
1938
1939        if (!argc)
1940                usage_with_options(kmem_usage, kmem_options);
1941
1942        if (kmem_slab == 0 && kmem_page == 0) {
1943                if (kmem_default == KMEM_SLAB)
1944                        kmem_slab = 1;
1945                else
1946                        kmem_page = 1;
1947        }
1948
1949        if (!strncmp(argv[0], "rec", 3)) {
1950                symbol__init(NULL);
1951                return __cmd_record(argc, argv);
1952        }
1953
1954        data.path = input_name;
1955
1956        kmem_session = session = perf_session__new(&data, &perf_kmem);
1957        if (IS_ERR(session))
1958                return PTR_ERR(session);
1959
1960        ret = -1;
1961
1962        if (kmem_slab) {
1963                if (!evlist__find_tracepoint_by_name(session->evlist, "kmem:kmalloc")) {
1964                        pr_err(errmsg, "slab", "slab");
1965                        goto out_delete;
1966                }
1967        }
1968
1969        if (kmem_page) {
1970                struct evsel *evsel = evlist__find_tracepoint_by_name(session->evlist, "kmem:mm_page_alloc");
1971
1972                if (evsel == NULL) {
1973                        pr_err(errmsg, "page", "page");
1974                        goto out_delete;
1975                }
1976
1977                kmem_page_size = tep_get_page_size(evsel->tp_format->tep);
1978                symbol_conf.use_callchain = true;
1979        }
1980
1981        symbol__init(&session->header.env);
1982
1983        if (perf_time__parse_str(&ptime, time_str) != 0) {
1984                pr_err("Invalid time string\n");
1985                ret = -EINVAL;
1986                goto out_delete;
1987        }
1988
1989        if (!strcmp(argv[0], "stat")) {
1990                setlocale(LC_ALL, "");
1991
1992                if (cpu__setup_cpunode_map())
1993                        goto out_delete;
1994
1995                if (list_empty(&slab_caller_sort))
1996                        setup_slab_sorting(&slab_caller_sort, default_slab_sort);
1997                if (list_empty(&slab_alloc_sort))
1998                        setup_slab_sorting(&slab_alloc_sort, default_slab_sort);
1999                if (list_empty(&page_caller_sort))
2000                        setup_page_sorting(&page_caller_sort, default_page_sort);
2001                if (list_empty(&page_alloc_sort))
2002                        setup_page_sorting(&page_alloc_sort, default_page_sort);
2003
2004                if (kmem_page) {
2005                        setup_page_sorting(&page_alloc_sort_input,
2006                                           "page,order,migtype,gfp");
2007                        setup_page_sorting(&page_caller_sort_input,
2008                                           "callsite,order,migtype,gfp");
2009                }
2010                ret = __cmd_kmem(session);
2011        } else
2012                usage_with_options(kmem_usage, kmem_options);
2013
2014out_delete:
2015        perf_session__delete(session);
2016
2017        return ret;
2018}
2019
2020