linux/tools/perf/builtin-kmem.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2#include "builtin.h"
   3#include "perf.h"
   4
   5#include "util/dso.h"
   6#include "util/evlist.h"
   7#include "util/evsel.h"
   8#include "util/config.h"
   9#include "util/map.h"
  10#include "util/symbol.h"
  11#include "util/thread.h"
  12#include "util/header.h"
  13#include "util/session.h"
  14#include "util/tool.h"
  15#include "util/callchain.h"
  16#include "util/time-utils.h"
  17#include <linux/err.h>
  18
  19#include <subcmd/pager.h>
  20#include <subcmd/parse-options.h>
  21#include "util/trace-event.h"
  22#include "util/data.h"
  23#include "util/cpumap.h"
  24
  25#include "util/debug.h"
  26#include "util/string2.h"
  27
  28#include <linux/kernel.h>
  29#include <linux/rbtree.h>
  30#include <linux/string.h>
  31#include <linux/zalloc.h>
  32#include <errno.h>
  33#include <inttypes.h>
  34#include <locale.h>
  35#include <regex.h>
  36
  37#include <linux/ctype.h>
  38
  39static int      kmem_slab;
  40static int      kmem_page;
  41
  42static long     kmem_page_size;
  43static enum {
  44        KMEM_SLAB,
  45        KMEM_PAGE,
  46} kmem_default = KMEM_SLAB;  /* for backward compatibility */
  47
  48struct alloc_stat;
  49typedef int (*sort_fn_t)(void *, void *);
  50
  51static int                      alloc_flag;
  52static int                      caller_flag;
  53
  54static int                      alloc_lines = -1;
  55static int                      caller_lines = -1;
  56
  57static bool                     raw_ip;
  58
  59struct alloc_stat {
  60        u64     call_site;
  61        u64     ptr;
  62        u64     bytes_req;
  63        u64     bytes_alloc;
  64        u64     last_alloc;
  65        u32     hit;
  66        u32     pingpong;
  67
  68        short   alloc_cpu;
  69
  70        struct rb_node node;
  71};
  72
  73static struct rb_root root_alloc_stat;
  74static struct rb_root root_alloc_sorted;
  75static struct rb_root root_caller_stat;
  76static struct rb_root root_caller_sorted;
  77
  78static unsigned long total_requested, total_allocated, total_freed;
  79static unsigned long nr_allocs, nr_cross_allocs;
  80
  81/* filters for controlling start and stop of time of analysis */
  82static struct perf_time_interval ptime;
  83const char *time_str;
  84
  85static int insert_alloc_stat(unsigned long call_site, unsigned long ptr,
  86                             int bytes_req, int bytes_alloc, int cpu)
  87{
  88        struct rb_node **node = &root_alloc_stat.rb_node;
  89        struct rb_node *parent = NULL;
  90        struct alloc_stat *data = NULL;
  91
  92        while (*node) {
  93                parent = *node;
  94                data = rb_entry(*node, struct alloc_stat, node);
  95
  96                if (ptr > data->ptr)
  97                        node = &(*node)->rb_right;
  98                else if (ptr < data->ptr)
  99                        node = &(*node)->rb_left;
 100                else
 101                        break;
 102        }
 103
 104        if (data && data->ptr == ptr) {
 105                data->hit++;
 106                data->bytes_req += bytes_req;
 107                data->bytes_alloc += bytes_alloc;
 108        } else {
 109                data = malloc(sizeof(*data));
 110                if (!data) {
 111                        pr_err("%s: malloc failed\n", __func__);
 112                        return -1;
 113                }
 114                data->ptr = ptr;
 115                data->pingpong = 0;
 116                data->hit = 1;
 117                data->bytes_req = bytes_req;
 118                data->bytes_alloc = bytes_alloc;
 119
 120                rb_link_node(&data->node, parent, node);
 121                rb_insert_color(&data->node, &root_alloc_stat);
 122        }
 123        data->call_site = call_site;
 124        data->alloc_cpu = cpu;
 125        data->last_alloc = bytes_alloc;
 126
 127        return 0;
 128}
 129
 130static int insert_caller_stat(unsigned long call_site,
 131                              int bytes_req, int bytes_alloc)
 132{
 133        struct rb_node **node = &root_caller_stat.rb_node;
 134        struct rb_node *parent = NULL;
 135        struct alloc_stat *data = NULL;
 136
 137        while (*node) {
 138                parent = *node;
 139                data = rb_entry(*node, struct alloc_stat, node);
 140
 141                if (call_site > data->call_site)
 142                        node = &(*node)->rb_right;
 143                else if (call_site < data->call_site)
 144                        node = &(*node)->rb_left;
 145                else
 146                        break;
 147        }
 148
 149        if (data && data->call_site == call_site) {
 150                data->hit++;
 151                data->bytes_req += bytes_req;
 152                data->bytes_alloc += bytes_alloc;
 153        } else {
 154                data = malloc(sizeof(*data));
 155                if (!data) {
 156                        pr_err("%s: malloc failed\n", __func__);
 157                        return -1;
 158                }
 159                data->call_site = call_site;
 160                data->pingpong = 0;
 161                data->hit = 1;
 162                data->bytes_req = bytes_req;
 163                data->bytes_alloc = bytes_alloc;
 164
 165                rb_link_node(&data->node, parent, node);
 166                rb_insert_color(&data->node, &root_caller_stat);
 167        }
 168
 169        return 0;
 170}
 171
 172static int perf_evsel__process_alloc_event(struct evsel *evsel,
 173                                           struct perf_sample *sample)
 174{
 175        unsigned long ptr = perf_evsel__intval(evsel, sample, "ptr"),
 176                      call_site = perf_evsel__intval(evsel, sample, "call_site");
 177        int bytes_req = perf_evsel__intval(evsel, sample, "bytes_req"),
 178            bytes_alloc = perf_evsel__intval(evsel, sample, "bytes_alloc");
 179
 180        if (insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, sample->cpu) ||
 181            insert_caller_stat(call_site, bytes_req, bytes_alloc))
 182                return -1;
 183
 184        total_requested += bytes_req;
 185        total_allocated += bytes_alloc;
 186
 187        nr_allocs++;
 188        return 0;
 189}
 190
 191static int perf_evsel__process_alloc_node_event(struct evsel *evsel,
 192                                                struct perf_sample *sample)
 193{
 194        int ret = perf_evsel__process_alloc_event(evsel, sample);
 195
 196        if (!ret) {
 197                int node1 = cpu__get_node(sample->cpu),
 198                    node2 = perf_evsel__intval(evsel, sample, "node");
 199
 200                if (node1 != node2)
 201                        nr_cross_allocs++;
 202        }
 203
 204        return ret;
 205}
 206
 207static int ptr_cmp(void *, void *);
 208static int slab_callsite_cmp(void *, void *);
 209
 210static struct alloc_stat *search_alloc_stat(unsigned long ptr,
 211                                            unsigned long call_site,
 212                                            struct rb_root *root,
 213                                            sort_fn_t sort_fn)
 214{
 215        struct rb_node *node = root->rb_node;
 216        struct alloc_stat key = { .ptr = ptr, .call_site = call_site };
 217
 218        while (node) {
 219                struct alloc_stat *data;
 220                int cmp;
 221
 222                data = rb_entry(node, struct alloc_stat, node);
 223
 224                cmp = sort_fn(&key, data);
 225                if (cmp < 0)
 226                        node = node->rb_left;
 227                else if (cmp > 0)
 228                        node = node->rb_right;
 229                else
 230                        return data;
 231        }
 232        return NULL;
 233}
 234
 235static int perf_evsel__process_free_event(struct evsel *evsel,
 236                                          struct perf_sample *sample)
 237{
 238        unsigned long ptr = perf_evsel__intval(evsel, sample, "ptr");
 239        struct alloc_stat *s_alloc, *s_caller;
 240
 241        s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp);
 242        if (!s_alloc)
 243                return 0;
 244
 245        total_freed += s_alloc->last_alloc;
 246
 247        if ((short)sample->cpu != s_alloc->alloc_cpu) {
 248                s_alloc->pingpong++;
 249
 250                s_caller = search_alloc_stat(0, s_alloc->call_site,
 251                                             &root_caller_stat,
 252                                             slab_callsite_cmp);
 253                if (!s_caller)
 254                        return -1;
 255                s_caller->pingpong++;
 256        }
 257        s_alloc->alloc_cpu = -1;
 258
 259        return 0;
 260}
 261
 262static u64 total_page_alloc_bytes;
 263static u64 total_page_free_bytes;
 264static u64 total_page_nomatch_bytes;
 265static u64 total_page_fail_bytes;
 266static unsigned long nr_page_allocs;
 267static unsigned long nr_page_frees;
 268static unsigned long nr_page_fails;
 269static unsigned long nr_page_nomatch;
 270
 271static bool use_pfn;
 272static bool live_page;
 273static struct perf_session *kmem_session;
 274
 275#define MAX_MIGRATE_TYPES  6
 276#define MAX_PAGE_ORDER     11
 277
 278static int order_stats[MAX_PAGE_ORDER][MAX_MIGRATE_TYPES];
 279
 280struct page_stat {
 281        struct rb_node  node;
 282        u64             page;
 283        u64             callsite;
 284        int             order;
 285        unsigned        gfp_flags;
 286        unsigned        migrate_type;
 287        u64             alloc_bytes;
 288        u64             free_bytes;
 289        int             nr_alloc;
 290        int             nr_free;
 291};
 292
 293static struct rb_root page_live_tree;
 294static struct rb_root page_alloc_tree;
 295static struct rb_root page_alloc_sorted;
 296static struct rb_root page_caller_tree;
 297static struct rb_root page_caller_sorted;
 298
 299struct alloc_func {
 300        u64 start;
 301        u64 end;
 302        char *name;
 303};
 304
 305static int nr_alloc_funcs;
 306static struct alloc_func *alloc_func_list;
 307
 308static int funcmp(const void *a, const void *b)
 309{
 310        const struct alloc_func *fa = a;
 311        const struct alloc_func *fb = b;
 312
 313        if (fa->start > fb->start)
 314                return 1;
 315        else
 316                return -1;
 317}
 318
 319static int callcmp(const void *a, const void *b)
 320{
 321        const struct alloc_func *fa = a;
 322        const struct alloc_func *fb = b;
 323
 324        if (fb->start <= fa->start && fa->end < fb->end)
 325                return 0;
 326
 327        if (fa->start > fb->start)
 328                return 1;
 329        else
 330                return -1;
 331}
 332
 333static int build_alloc_func_list(void)
 334{
 335        int ret;
 336        struct map *kernel_map;
 337        struct symbol *sym;
 338        struct rb_node *node;
 339        struct alloc_func *func;
 340        struct machine *machine = &kmem_session->machines.host;
 341        regex_t alloc_func_regex;
 342        static const char pattern[] = "^_?_?(alloc|get_free|get_zeroed)_pages?";
 343
 344        ret = regcomp(&alloc_func_regex, pattern, REG_EXTENDED);
 345        if (ret) {
 346                char err[BUFSIZ];
 347
 348                regerror(ret, &alloc_func_regex, err, sizeof(err));
 349                pr_err("Invalid regex: %s\n%s", pattern, err);
 350                return -EINVAL;
 351        }
 352
 353        kernel_map = machine__kernel_map(machine);
 354        if (map__load(kernel_map) < 0) {
 355                pr_err("cannot load kernel map\n");
 356                return -ENOENT;
 357        }
 358
 359        map__for_each_symbol(kernel_map, sym, node) {
 360                if (regexec(&alloc_func_regex, sym->name, 0, NULL, 0))
 361                        continue;
 362
 363                func = realloc(alloc_func_list,
 364                               (nr_alloc_funcs + 1) * sizeof(*func));
 365                if (func == NULL)
 366                        return -ENOMEM;
 367
 368                pr_debug("alloc func: %s\n", sym->name);
 369                func[nr_alloc_funcs].start = sym->start;
 370                func[nr_alloc_funcs].end   = sym->end;
 371                func[nr_alloc_funcs].name  = sym->name;
 372
 373                alloc_func_list = func;
 374                nr_alloc_funcs++;
 375        }
 376
 377        qsort(alloc_func_list, nr_alloc_funcs, sizeof(*func), funcmp);
 378
 379        regfree(&alloc_func_regex);
 380        return 0;
 381}
 382
 383/*
 384 * Find first non-memory allocation function from callchain.
 385 * The allocation functions are in the 'alloc_func_list'.
 386 */
 387static u64 find_callsite(struct evsel *evsel, struct perf_sample *sample)
 388{
 389        struct addr_location al;
 390        struct machine *machine = &kmem_session->machines.host;
 391        struct callchain_cursor_node *node;
 392
 393        if (alloc_func_list == NULL) {
 394                if (build_alloc_func_list() < 0)
 395                        goto out;
 396        }
 397
 398        al.thread = machine__findnew_thread(machine, sample->pid, sample->tid);
 399        sample__resolve_callchain(sample, &callchain_cursor, NULL, evsel, &al, 16);
 400
 401        callchain_cursor_commit(&callchain_cursor);
 402        while (true) {
 403                struct alloc_func key, *caller;
 404                u64 addr;
 405
 406                node = callchain_cursor_current(&callchain_cursor);
 407                if (node == NULL)
 408                        break;
 409
 410                key.start = key.end = node->ip;
 411                caller = bsearch(&key, alloc_func_list, nr_alloc_funcs,
 412                                 sizeof(key), callcmp);
 413                if (!caller) {
 414                        /* found */
 415                        if (node->ms.map)
 416                                addr = map__unmap_ip(node->ms.map, node->ip);
 417                        else
 418                                addr = node->ip;
 419
 420                        return addr;
 421                } else
 422                        pr_debug3("skipping alloc function: %s\n", caller->name);
 423
 424                callchain_cursor_advance(&callchain_cursor);
 425        }
 426
 427out:
 428        pr_debug2("unknown callsite: %"PRIx64 "\n", sample->ip);
 429        return sample->ip;
 430}
 431
 432struct sort_dimension {
 433        const char              name[20];
 434        sort_fn_t               cmp;
 435        struct list_head        list;
 436};
 437
 438static LIST_HEAD(page_alloc_sort_input);
 439static LIST_HEAD(page_caller_sort_input);
 440
 441static struct page_stat *
 442__page_stat__findnew_page(struct page_stat *pstat, bool create)
 443{
 444        struct rb_node **node = &page_live_tree.rb_node;
 445        struct rb_node *parent = NULL;
 446        struct page_stat *data;
 447
 448        while (*node) {
 449                s64 cmp;
 450
 451                parent = *node;
 452                data = rb_entry(*node, struct page_stat, node);
 453
 454                cmp = data->page - pstat->page;
 455                if (cmp < 0)
 456                        node = &parent->rb_left;
 457                else if (cmp > 0)
 458                        node = &parent->rb_right;
 459                else
 460                        return data;
 461        }
 462
 463        if (!create)
 464                return NULL;
 465
 466        data = zalloc(sizeof(*data));
 467        if (data != NULL) {
 468                data->page = pstat->page;
 469                data->order = pstat->order;
 470                data->gfp_flags = pstat->gfp_flags;
 471                data->migrate_type = pstat->migrate_type;
 472
 473                rb_link_node(&data->node, parent, node);
 474                rb_insert_color(&data->node, &page_live_tree);
 475        }
 476
 477        return data;
 478}
 479
 480static struct page_stat *page_stat__find_page(struct page_stat *pstat)
 481{
 482        return __page_stat__findnew_page(pstat, false);
 483}
 484
 485static struct page_stat *page_stat__findnew_page(struct page_stat *pstat)
 486{
 487        return __page_stat__findnew_page(pstat, true);
 488}
 489
 490static struct page_stat *
 491__page_stat__findnew_alloc(struct page_stat *pstat, bool create)
 492{
 493        struct rb_node **node = &page_alloc_tree.rb_node;
 494        struct rb_node *parent = NULL;
 495        struct page_stat *data;
 496        struct sort_dimension *sort;
 497
 498        while (*node) {
 499                int cmp = 0;
 500
 501                parent = *node;
 502                data = rb_entry(*node, struct page_stat, node);
 503
 504                list_for_each_entry(sort, &page_alloc_sort_input, list) {
 505                        cmp = sort->cmp(pstat, data);
 506                        if (cmp)
 507                                break;
 508                }
 509
 510                if (cmp < 0)
 511                        node = &parent->rb_left;
 512                else if (cmp > 0)
 513                        node = &parent->rb_right;
 514                else
 515                        return data;
 516        }
 517
 518        if (!create)
 519                return NULL;
 520
 521        data = zalloc(sizeof(*data));
 522        if (data != NULL) {
 523                data->page = pstat->page;
 524                data->order = pstat->order;
 525                data->gfp_flags = pstat->gfp_flags;
 526                data->migrate_type = pstat->migrate_type;
 527
 528                rb_link_node(&data->node, parent, node);
 529                rb_insert_color(&data->node, &page_alloc_tree);
 530        }
 531
 532        return data;
 533}
 534
 535static struct page_stat *page_stat__find_alloc(struct page_stat *pstat)
 536{
 537        return __page_stat__findnew_alloc(pstat, false);
 538}
 539
 540static struct page_stat *page_stat__findnew_alloc(struct page_stat *pstat)
 541{
 542        return __page_stat__findnew_alloc(pstat, true);
 543}
 544
 545static struct page_stat *
 546__page_stat__findnew_caller(struct page_stat *pstat, bool create)
 547{
 548        struct rb_node **node = &page_caller_tree.rb_node;
 549        struct rb_node *parent = NULL;
 550        struct page_stat *data;
 551        struct sort_dimension *sort;
 552
 553        while (*node) {
 554                int cmp = 0;
 555
 556                parent = *node;
 557                data = rb_entry(*node, struct page_stat, node);
 558
 559                list_for_each_entry(sort, &page_caller_sort_input, list) {
 560                        cmp = sort->cmp(pstat, data);
 561                        if (cmp)
 562                                break;
 563                }
 564
 565                if (cmp < 0)
 566                        node = &parent->rb_left;
 567                else if (cmp > 0)
 568                        node = &parent->rb_right;
 569                else
 570                        return data;
 571        }
 572
 573        if (!create)
 574                return NULL;
 575
 576        data = zalloc(sizeof(*data));
 577        if (data != NULL) {
 578                data->callsite = pstat->callsite;
 579                data->order = pstat->order;
 580                data->gfp_flags = pstat->gfp_flags;
 581                data->migrate_type = pstat->migrate_type;
 582
 583                rb_link_node(&data->node, parent, node);
 584                rb_insert_color(&data->node, &page_caller_tree);
 585        }
 586
 587        return data;
 588}
 589
 590static struct page_stat *page_stat__find_caller(struct page_stat *pstat)
 591{
 592        return __page_stat__findnew_caller(pstat, false);
 593}
 594
 595static struct page_stat *page_stat__findnew_caller(struct page_stat *pstat)
 596{
 597        return __page_stat__findnew_caller(pstat, true);
 598}
 599
 600static bool valid_page(u64 pfn_or_page)
 601{
 602        if (use_pfn && pfn_or_page == -1UL)
 603                return false;
 604        if (!use_pfn && pfn_or_page == 0)
 605                return false;
 606        return true;
 607}
 608
 609struct gfp_flag {
 610        unsigned int flags;
 611        char *compact_str;
 612        char *human_readable;
 613};
 614
 615static struct gfp_flag *gfps;
 616static int nr_gfps;
 617
 618static int gfpcmp(const void *a, const void *b)
 619{
 620        const struct gfp_flag *fa = a;
 621        const struct gfp_flag *fb = b;
 622
 623        return fa->flags - fb->flags;
 624}
 625
 626/* see include/trace/events/mmflags.h */
 627static const struct {
 628        const char *original;
 629        const char *compact;
 630} gfp_compact_table[] = {
 631        { "GFP_TRANSHUGE",              "THP" },
 632        { "GFP_TRANSHUGE_LIGHT",        "THL" },
 633        { "GFP_HIGHUSER_MOVABLE",       "HUM" },
 634        { "GFP_HIGHUSER",               "HU" },
 635        { "GFP_USER",                   "U" },
 636        { "GFP_KERNEL_ACCOUNT",         "KAC" },
 637        { "GFP_KERNEL",                 "K" },
 638        { "GFP_NOFS",                   "NF" },
 639        { "GFP_ATOMIC",                 "A" },
 640        { "GFP_NOIO",                   "NI" },
 641        { "GFP_NOWAIT",                 "NW" },
 642        { "GFP_DMA",                    "D" },
 643        { "__GFP_HIGHMEM",              "HM" },
 644        { "GFP_DMA32",                  "D32" },
 645        { "__GFP_HIGH",                 "H" },
 646        { "__GFP_ATOMIC",               "_A" },
 647        { "__GFP_IO",                   "I" },
 648        { "__GFP_FS",                   "F" },
 649        { "__GFP_NOWARN",               "NWR" },
 650        { "__GFP_RETRY_MAYFAIL",        "R" },
 651        { "__GFP_NOFAIL",               "NF" },
 652        { "__GFP_NORETRY",              "NR" },
 653        { "__GFP_COMP",                 "C" },
 654        { "__GFP_ZERO",                 "Z" },
 655        { "__GFP_NOMEMALLOC",           "NMA" },
 656        { "__GFP_MEMALLOC",             "MA" },
 657        { "__GFP_HARDWALL",             "HW" },
 658        { "__GFP_THISNODE",             "TN" },
 659        { "__GFP_RECLAIMABLE",          "RC" },
 660        { "__GFP_MOVABLE",              "M" },
 661        { "__GFP_ACCOUNT",              "AC" },
 662        { "__GFP_WRITE",                "WR" },
 663        { "__GFP_RECLAIM",              "R" },
 664        { "__GFP_DIRECT_RECLAIM",       "DR" },
 665        { "__GFP_KSWAPD_RECLAIM",       "KR" },
 666};
 667
 668static size_t max_gfp_len;
 669
 670static char *compact_gfp_flags(char *gfp_flags)
 671{
 672        char *orig_flags = strdup(gfp_flags);
 673        char *new_flags = NULL;
 674        char *str, *pos = NULL;
 675        size_t len = 0;
 676
 677        if (orig_flags == NULL)
 678                return NULL;
 679
 680        str = strtok_r(orig_flags, "|", &pos);
 681        while (str) {
 682                size_t i;
 683                char *new;
 684                const char *cpt;
 685
 686                for (i = 0; i < ARRAY_SIZE(gfp_compact_table); i++) {
 687                        if (strcmp(gfp_compact_table[i].original, str))
 688                                continue;
 689
 690                        cpt = gfp_compact_table[i].compact;
 691                        new = realloc(new_flags, len + strlen(cpt) + 2);
 692                        if (new == NULL) {
 693                                free(new_flags);
 694                                free(orig_flags);
 695                                return NULL;
 696                        }
 697
 698                        new_flags = new;
 699
 700                        if (!len) {
 701                                strcpy(new_flags, cpt);
 702                        } else {
 703                                strcat(new_flags, "|");
 704                                strcat(new_flags, cpt);
 705                                len++;
 706                        }
 707
 708                        len += strlen(cpt);
 709                }
 710
 711                str = strtok_r(NULL, "|", &pos);
 712        }
 713
 714        if (max_gfp_len < len)
 715                max_gfp_len = len;
 716
 717        free(orig_flags);
 718        return new_flags;
 719}
 720
 721static char *compact_gfp_string(unsigned long gfp_flags)
 722{
 723        struct gfp_flag key = {
 724                .flags = gfp_flags,
 725        };
 726        struct gfp_flag *gfp;
 727
 728        gfp = bsearch(&key, gfps, nr_gfps, sizeof(*gfps), gfpcmp);
 729        if (gfp)
 730                return gfp->compact_str;
 731
 732        return NULL;
 733}
 734
 735static int parse_gfp_flags(struct evsel *evsel, struct perf_sample *sample,
 736                           unsigned int gfp_flags)
 737{
 738        struct tep_record record = {
 739                .cpu = sample->cpu,
 740                .data = sample->raw_data,
 741                .size = sample->raw_size,
 742        };
 743        struct trace_seq seq;
 744        char *str, *pos = NULL;
 745
 746        if (nr_gfps) {
 747                struct gfp_flag key = {
 748                        .flags = gfp_flags,
 749                };
 750
 751                if (bsearch(&key, gfps, nr_gfps, sizeof(*gfps), gfpcmp))
 752                        return 0;
 753        }
 754
 755        trace_seq_init(&seq);
 756        tep_print_event(evsel->tp_format->tep,
 757                        &seq, &record, "%s", TEP_PRINT_INFO);
 758
 759        str = strtok_r(seq.buffer, " ", &pos);
 760        while (str) {
 761                if (!strncmp(str, "gfp_flags=", 10)) {
 762                        struct gfp_flag *new;
 763
 764                        new = realloc(gfps, (nr_gfps + 1) * sizeof(*gfps));
 765                        if (new == NULL)
 766                                return -ENOMEM;
 767
 768                        gfps = new;
 769                        new += nr_gfps++;
 770
 771                        new->flags = gfp_flags;
 772                        new->human_readable = strdup(str + 10);
 773                        new->compact_str = compact_gfp_flags(str + 10);
 774                        if (!new->human_readable || !new->compact_str)
 775                                return -ENOMEM;
 776
 777                        qsort(gfps, nr_gfps, sizeof(*gfps), gfpcmp);
 778                }
 779
 780                str = strtok_r(NULL, " ", &pos);
 781        }
 782
 783        trace_seq_destroy(&seq);
 784        return 0;
 785}
 786
 787static int perf_evsel__process_page_alloc_event(struct evsel *evsel,
 788                                                struct perf_sample *sample)
 789{
 790        u64 page;
 791        unsigned int order = perf_evsel__intval(evsel, sample, "order");
 792        unsigned int gfp_flags = perf_evsel__intval(evsel, sample, "gfp_flags");
 793        unsigned int migrate_type = perf_evsel__intval(evsel, sample,
 794                                                       "migratetype");
 795        u64 bytes = kmem_page_size << order;
 796        u64 callsite;
 797        struct page_stat *pstat;
 798        struct page_stat this = {
 799                .order = order,
 800                .gfp_flags = gfp_flags,
 801                .migrate_type = migrate_type,
 802        };
 803
 804        if (use_pfn)
 805                page = perf_evsel__intval(evsel, sample, "pfn");
 806        else
 807                page = perf_evsel__intval(evsel, sample, "page");
 808
 809        nr_page_allocs++;
 810        total_page_alloc_bytes += bytes;
 811
 812        if (!valid_page(page)) {
 813                nr_page_fails++;
 814                total_page_fail_bytes += bytes;
 815
 816                return 0;
 817        }
 818
 819        if (parse_gfp_flags(evsel, sample, gfp_flags) < 0)
 820                return -1;
 821
 822        callsite = find_callsite(evsel, sample);
 823
 824        /*
 825         * This is to find the current page (with correct gfp flags and
 826         * migrate type) at free event.
 827         */
 828        this.page = page;
 829        pstat = page_stat__findnew_page(&this);
 830        if (pstat == NULL)
 831                return -ENOMEM;
 832
 833        pstat->nr_alloc++;
 834        pstat->alloc_bytes += bytes;
 835        pstat->callsite = callsite;
 836
 837        if (!live_page) {
 838                pstat = page_stat__findnew_alloc(&this);
 839                if (pstat == NULL)
 840                        return -ENOMEM;
 841
 842                pstat->nr_alloc++;
 843                pstat->alloc_bytes += bytes;
 844                pstat->callsite = callsite;
 845        }
 846
 847        this.callsite = callsite;
 848        pstat = page_stat__findnew_caller(&this);
 849        if (pstat == NULL)
 850                return -ENOMEM;
 851
 852        pstat->nr_alloc++;
 853        pstat->alloc_bytes += bytes;
 854
 855        order_stats[order][migrate_type]++;
 856
 857        return 0;
 858}
 859
 860static int perf_evsel__process_page_free_event(struct evsel *evsel,
 861                                                struct perf_sample *sample)
 862{
 863        u64 page;
 864        unsigned int order = perf_evsel__intval(evsel, sample, "order");
 865        u64 bytes = kmem_page_size << order;
 866        struct page_stat *pstat;
 867        struct page_stat this = {
 868                .order = order,
 869        };
 870
 871        if (use_pfn)
 872                page = perf_evsel__intval(evsel, sample, "pfn");
 873        else
 874                page = perf_evsel__intval(evsel, sample, "page");
 875
 876        nr_page_frees++;
 877        total_page_free_bytes += bytes;
 878
 879        this.page = page;
 880        pstat = page_stat__find_page(&this);
 881        if (pstat == NULL) {
 882                pr_debug2("missing free at page %"PRIx64" (order: %d)\n",
 883                          page, order);
 884
 885                nr_page_nomatch++;
 886                total_page_nomatch_bytes += bytes;
 887
 888                return 0;
 889        }
 890
 891        this.gfp_flags = pstat->gfp_flags;
 892        this.migrate_type = pstat->migrate_type;
 893        this.callsite = pstat->callsite;
 894
 895        rb_erase(&pstat->node, &page_live_tree);
 896        free(pstat);
 897
 898        if (live_page) {
 899                order_stats[this.order][this.migrate_type]--;
 900        } else {
 901                pstat = page_stat__find_alloc(&this);
 902                if (pstat == NULL)
 903                        return -ENOMEM;
 904
 905                pstat->nr_free++;
 906                pstat->free_bytes += bytes;
 907        }
 908
 909        pstat = page_stat__find_caller(&this);
 910        if (pstat == NULL)
 911                return -ENOENT;
 912
 913        pstat->nr_free++;
 914        pstat->free_bytes += bytes;
 915
 916        if (live_page) {
 917                pstat->nr_alloc--;
 918                pstat->alloc_bytes -= bytes;
 919
 920                if (pstat->nr_alloc == 0) {
 921                        rb_erase(&pstat->node, &page_caller_tree);
 922                        free(pstat);
 923                }
 924        }
 925
 926        return 0;
 927}
 928
 929static bool perf_kmem__skip_sample(struct perf_sample *sample)
 930{
 931        /* skip sample based on time? */
 932        if (perf_time__skip_sample(&ptime, sample->time))
 933                return true;
 934
 935        return false;
 936}
 937
 938typedef int (*tracepoint_handler)(struct evsel *evsel,
 939                                  struct perf_sample *sample);
 940
 941static int process_sample_event(struct perf_tool *tool __maybe_unused,
 942                                union perf_event *event,
 943                                struct perf_sample *sample,
 944                                struct evsel *evsel,
 945                                struct machine *machine)
 946{
 947        int err = 0;
 948        struct thread *thread = machine__findnew_thread(machine, sample->pid,
 949                                                        sample->tid);
 950
 951        if (thread == NULL) {
 952                pr_debug("problem processing %d event, skipping it.\n",
 953                         event->header.type);
 954                return -1;
 955        }
 956
 957        if (perf_kmem__skip_sample(sample))
 958                return 0;
 959
 960        dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid);
 961
 962        if (evsel->handler != NULL) {
 963                tracepoint_handler f = evsel->handler;
 964                err = f(evsel, sample);
 965        }
 966
 967        thread__put(thread);
 968
 969        return err;
 970}
 971
 972static struct perf_tool perf_kmem = {
 973        .sample          = process_sample_event,
 974        .comm            = perf_event__process_comm,
 975        .mmap            = perf_event__process_mmap,
 976        .mmap2           = perf_event__process_mmap2,
 977        .namespaces      = perf_event__process_namespaces,
 978        .ordered_events  = true,
 979};
 980
 981static double fragmentation(unsigned long n_req, unsigned long n_alloc)
 982{
 983        if (n_alloc == 0)
 984                return 0.0;
 985        else
 986                return 100.0 - (100.0 * n_req / n_alloc);
 987}
 988
 989static void __print_slab_result(struct rb_root *root,
 990                                struct perf_session *session,
 991                                int n_lines, int is_caller)
 992{
 993        struct rb_node *next;
 994        struct machine *machine = &session->machines.host;
 995
 996        printf("%.105s\n", graph_dotted_line);
 997        printf(" %-34s |",  is_caller ? "Callsite": "Alloc Ptr");
 998        printf(" Total_alloc/Per | Total_req/Per   | Hit      | Ping-pong | Frag\n");
 999        printf("%.105s\n", graph_dotted_line);
1000
1001        next = rb_first(root);
1002
1003        while (next && n_lines--) {
1004                struct alloc_stat *data = rb_entry(next, struct alloc_stat,
1005                                                   node);
1006                struct symbol *sym = NULL;
1007                struct map *map;
1008                char buf[BUFSIZ];
1009                u64 addr;
1010
1011                if (is_caller) {
1012                        addr = data->call_site;
1013                        if (!raw_ip)
1014                                sym = machine__find_kernel_symbol(machine, addr, &map);
1015                } else
1016                        addr = data->ptr;
1017
1018                if (sym != NULL)
1019                        snprintf(buf, sizeof(buf), "%s+%" PRIx64 "", sym->name,
1020                                 addr - map->unmap_ip(map, sym->start));
1021                else
1022                        snprintf(buf, sizeof(buf), "%#" PRIx64 "", addr);
1023                printf(" %-34s |", buf);
1024
1025                printf(" %9llu/%-5lu | %9llu/%-5lu | %8lu | %9lu | %6.3f%%\n",
1026                       (unsigned long long)data->bytes_alloc,
1027                       (unsigned long)data->bytes_alloc / data->hit,
1028                       (unsigned long long)data->bytes_req,
1029                       (unsigned long)data->bytes_req / data->hit,
1030                       (unsigned long)data->hit,
1031                       (unsigned long)data->pingpong,
1032                       fragmentation(data->bytes_req, data->bytes_alloc));
1033
1034                next = rb_next(next);
1035        }
1036
1037        if (n_lines == -1)
1038                printf(" ...                                | ...             | ...             | ...      | ...       | ...   \n");
1039
1040        printf("%.105s\n", graph_dotted_line);
1041}
1042
1043static const char * const migrate_type_str[] = {
1044        "UNMOVABL",
1045        "RECLAIM",
1046        "MOVABLE",
1047        "RESERVED",
1048        "CMA/ISLT",
1049        "UNKNOWN",
1050};
1051
1052static void __print_page_alloc_result(struct perf_session *session, int n_lines)
1053{
1054        struct rb_node *next = rb_first(&page_alloc_sorted);
1055        struct machine *machine = &session->machines.host;
1056        const char *format;
1057        int gfp_len = max(strlen("GFP flags"), max_gfp_len);
1058
1059        printf("\n%.105s\n", graph_dotted_line);
1060        printf(" %-16s | %5s alloc (KB) | Hits      | Order | Mig.type | %-*s | Callsite\n",
1061               use_pfn ? "PFN" : "Page", live_page ? "Live" : "Total",
1062               gfp_len, "GFP flags");
1063        printf("%.105s\n", graph_dotted_line);
1064
1065        if (use_pfn)
1066                format = " %16llu | %'16llu | %'9d | %5d | %8s | %-*s | %s\n";
1067        else
1068                format = " %016llx | %'16llu | %'9d | %5d | %8s | %-*s | %s\n";
1069
1070        while (next && n_lines--) {
1071                struct page_stat *data;
1072                struct symbol *sym;
1073                struct map *map;
1074                char buf[32];
1075                char *caller = buf;
1076
1077                data = rb_entry(next, struct page_stat, node);
1078                sym = machine__find_kernel_symbol(machine, data->callsite, &map);
1079                if (sym)
1080                        caller = sym->name;
1081                else
1082                        scnprintf(buf, sizeof(buf), "%"PRIx64, data->callsite);
1083
1084                printf(format, (unsigned long long)data->page,
1085                       (unsigned long long)data->alloc_bytes / 1024,
1086                       data->nr_alloc, data->order,
1087                       migrate_type_str[data->migrate_type],
1088                       gfp_len, compact_gfp_string(data->gfp_flags), caller);
1089
1090                next = rb_next(next);
1091        }
1092
1093        if (n_lines == -1) {
1094                printf(" ...              | ...              | ...       | ...   | ...      | %-*s | ...\n",
1095                       gfp_len, "...");
1096        }
1097
1098        printf("%.105s\n", graph_dotted_line);
1099}
1100
1101static void __print_page_caller_result(struct perf_session *session, int n_lines)
1102{
1103        struct rb_node *next = rb_first(&page_caller_sorted);
1104        struct machine *machine = &session->machines.host;
1105        int gfp_len = max(strlen("GFP flags"), max_gfp_len);
1106
1107        printf("\n%.105s\n", graph_dotted_line);
1108        printf(" %5s alloc (KB) | Hits      | Order | Mig.type | %-*s | Callsite\n",
1109               live_page ? "Live" : "Total", gfp_len, "GFP flags");
1110        printf("%.105s\n", graph_dotted_line);
1111
1112        while (next && n_lines--) {
1113                struct page_stat *data;
1114                struct symbol *sym;
1115                struct map *map;
1116                char buf[32];
1117                char *caller = buf;
1118
1119                data = rb_entry(next, struct page_stat, node);
1120                sym = machine__find_kernel_symbol(machine, data->callsite, &map);
1121                if (sym)
1122                        caller = sym->name;
1123                else
1124                        scnprintf(buf, sizeof(buf), "%"PRIx64, data->callsite);
1125
1126                printf(" %'16llu | %'9d | %5d | %8s | %-*s | %s\n",
1127                       (unsigned long long)data->alloc_bytes / 1024,
1128                       data->nr_alloc, data->order,
1129                       migrate_type_str[data->migrate_type],
1130                       gfp_len, compact_gfp_string(data->gfp_flags), caller);
1131
1132                next = rb_next(next);
1133        }
1134
1135        if (n_lines == -1) {
1136                printf(" ...              | ...       | ...   | ...      | %-*s | ...\n",
1137                       gfp_len, "...");
1138        }
1139
1140        printf("%.105s\n", graph_dotted_line);
1141}
1142
1143static void print_gfp_flags(void)
1144{
1145        int i;
1146
1147        printf("#\n");
1148        printf("# GFP flags\n");
1149        printf("# ---------\n");
1150        for (i = 0; i < nr_gfps; i++) {
1151                printf("# %08x: %*s: %s\n", gfps[i].flags,
1152                       (int) max_gfp_len, gfps[i].compact_str,
1153                       gfps[i].human_readable);
1154        }
1155}
1156
1157static void print_slab_summary(void)
1158{
1159        printf("\nSUMMARY (SLAB allocator)");
1160        printf("\n========================\n");
1161        printf("Total bytes requested: %'lu\n", total_requested);
1162        printf("Total bytes allocated: %'lu\n", total_allocated);
1163        printf("Total bytes freed:     %'lu\n", total_freed);
1164        if (total_allocated > total_freed) {
1165                printf("Net total bytes allocated: %'lu\n",
1166                total_allocated - total_freed);
1167        }
1168        printf("Total bytes wasted on internal fragmentation: %'lu\n",
1169               total_allocated - total_requested);
1170        printf("Internal fragmentation: %f%%\n",
1171               fragmentation(total_requested, total_allocated));
1172        printf("Cross CPU allocations: %'lu/%'lu\n", nr_cross_allocs, nr_allocs);
1173}
1174
1175static void print_page_summary(void)
1176{
1177        int o, m;
1178        u64 nr_alloc_freed = nr_page_frees - nr_page_nomatch;
1179        u64 total_alloc_freed_bytes = total_page_free_bytes - total_page_nomatch_bytes;
1180
1181        printf("\nSUMMARY (page allocator)");
1182        printf("\n========================\n");
1183        printf("%-30s: %'16lu   [ %'16"PRIu64" KB ]\n", "Total allocation requests",
1184               nr_page_allocs, total_page_alloc_bytes / 1024);
1185        printf("%-30s: %'16lu   [ %'16"PRIu64" KB ]\n", "Total free requests",
1186               nr_page_frees, total_page_free_bytes / 1024);
1187        printf("\n");
1188
1189        printf("%-30s: %'16"PRIu64"   [ %'16"PRIu64" KB ]\n", "Total alloc+freed requests",
1190               nr_alloc_freed, (total_alloc_freed_bytes) / 1024);
1191        printf("%-30s: %'16"PRIu64"   [ %'16"PRIu64" KB ]\n", "Total alloc-only requests",
1192               nr_page_allocs - nr_alloc_freed,
1193               (total_page_alloc_bytes - total_alloc_freed_bytes) / 1024);
1194        printf("%-30s: %'16lu   [ %'16"PRIu64" KB ]\n", "Total free-only requests",
1195               nr_page_nomatch, total_page_nomatch_bytes / 1024);
1196        printf("\n");
1197
1198        printf("%-30s: %'16lu   [ %'16"PRIu64" KB ]\n", "Total allocation failures",
1199               nr_page_fails, total_page_fail_bytes / 1024);
1200        printf("\n");
1201
1202        printf("%5s  %12s  %12s  %12s  %12s  %12s\n", "Order",  "Unmovable",
1203               "Reclaimable", "Movable", "Reserved", "CMA/Isolated");
1204        printf("%.5s  %.12s  %.12s  %.12s  %.12s  %.12s\n", graph_dotted_line,
1205               graph_dotted_line, graph_dotted_line, graph_dotted_line,
1206               graph_dotted_line, graph_dotted_line);
1207
1208        for (o = 0; o < MAX_PAGE_ORDER; o++) {
1209                printf("%5d", o);
1210                for (m = 0; m < MAX_MIGRATE_TYPES - 1; m++) {
1211                        if (order_stats[o][m])
1212                                printf("  %'12d", order_stats[o][m]);
1213                        else
1214                                printf("  %12c", '.');
1215                }
1216                printf("\n");
1217        }
1218}
1219
1220static void print_slab_result(struct perf_session *session)
1221{
1222        if (caller_flag)
1223                __print_slab_result(&root_caller_sorted, session, caller_lines, 1);
1224        if (alloc_flag)
1225                __print_slab_result(&root_alloc_sorted, session, alloc_lines, 0);
1226        print_slab_summary();
1227}
1228
1229static void print_page_result(struct perf_session *session)
1230{
1231        if (caller_flag || alloc_flag)
1232                print_gfp_flags();
1233        if (caller_flag)
1234                __print_page_caller_result(session, caller_lines);
1235        if (alloc_flag)
1236                __print_page_alloc_result(session, alloc_lines);
1237        print_page_summary();
1238}
1239
1240static void print_result(struct perf_session *session)
1241{
1242        if (kmem_slab)
1243                print_slab_result(session);
1244        if (kmem_page)
1245                print_page_result(session);
1246}
1247
1248static LIST_HEAD(slab_caller_sort);
1249static LIST_HEAD(slab_alloc_sort);
1250static LIST_HEAD(page_caller_sort);
1251static LIST_HEAD(page_alloc_sort);
1252
1253static void sort_slab_insert(struct rb_root *root, struct alloc_stat *data,
1254                             struct list_head *sort_list)
1255{
1256        struct rb_node **new = &(root->rb_node);
1257        struct rb_node *parent = NULL;
1258        struct sort_dimension *sort;
1259
1260        while (*new) {
1261                struct alloc_stat *this;
1262                int cmp = 0;
1263
1264                this = rb_entry(*new, struct alloc_stat, node);
1265                parent = *new;
1266
1267                list_for_each_entry(sort, sort_list, list) {
1268                        cmp = sort->cmp(data, this);
1269                        if (cmp)
1270                                break;
1271                }
1272
1273                if (cmp > 0)
1274                        new = &((*new)->rb_left);
1275                else
1276                        new = &((*new)->rb_right);
1277        }
1278
1279        rb_link_node(&data->node, parent, new);
1280        rb_insert_color(&data->node, root);
1281}
1282
1283static void __sort_slab_result(struct rb_root *root, struct rb_root *root_sorted,
1284                               struct list_head *sort_list)
1285{
1286        struct rb_node *node;
1287        struct alloc_stat *data;
1288
1289        for (;;) {
1290                node = rb_first(root);
1291                if (!node)
1292                        break;
1293
1294                rb_erase(node, root);
1295                data = rb_entry(node, struct alloc_stat, node);
1296                sort_slab_insert(root_sorted, data, sort_list);
1297        }
1298}
1299
1300static void sort_page_insert(struct rb_root *root, struct page_stat *data,
1301                             struct list_head *sort_list)
1302{
1303        struct rb_node **new = &root->rb_node;
1304        struct rb_node *parent = NULL;
1305        struct sort_dimension *sort;
1306
1307        while (*new) {
1308                struct page_stat *this;
1309                int cmp = 0;
1310
1311                this = rb_entry(*new, struct page_stat, node);
1312                parent = *new;
1313
1314                list_for_each_entry(sort, sort_list, list) {
1315                        cmp = sort->cmp(data, this);
1316                        if (cmp)
1317                                break;
1318                }
1319
1320                if (cmp > 0)
1321                        new = &parent->rb_left;
1322                else
1323                        new = &parent->rb_right;
1324        }
1325
1326        rb_link_node(&data->node, parent, new);
1327        rb_insert_color(&data->node, root);
1328}
1329
1330static void __sort_page_result(struct rb_root *root, struct rb_root *root_sorted,
1331                               struct list_head *sort_list)
1332{
1333        struct rb_node *node;
1334        struct page_stat *data;
1335
1336        for (;;) {
1337                node = rb_first(root);
1338                if (!node)
1339                        break;
1340
1341                rb_erase(node, root);
1342                data = rb_entry(node, struct page_stat, node);
1343                sort_page_insert(root_sorted, data, sort_list);
1344        }
1345}
1346
1347static void sort_result(void)
1348{
1349        if (kmem_slab) {
1350                __sort_slab_result(&root_alloc_stat, &root_alloc_sorted,
1351                                   &slab_alloc_sort);
1352                __sort_slab_result(&root_caller_stat, &root_caller_sorted,
1353                                   &slab_caller_sort);
1354        }
1355        if (kmem_page) {
1356                if (live_page)
1357                        __sort_page_result(&page_live_tree, &page_alloc_sorted,
1358                                           &page_alloc_sort);
1359                else
1360                        __sort_page_result(&page_alloc_tree, &page_alloc_sorted,
1361                                           &page_alloc_sort);
1362
1363                __sort_page_result(&page_caller_tree, &page_caller_sorted,
1364                                   &page_caller_sort);
1365        }
1366}
1367
1368static int __cmd_kmem(struct perf_session *session)
1369{
1370        int err = -EINVAL;
1371        struct evsel *evsel;
1372        const struct evsel_str_handler kmem_tracepoints[] = {
1373                /* slab allocator */
1374                { "kmem:kmalloc",               perf_evsel__process_alloc_event, },
1375                { "kmem:kmem_cache_alloc",      perf_evsel__process_alloc_event, },
1376                { "kmem:kmalloc_node",          perf_evsel__process_alloc_node_event, },
1377                { "kmem:kmem_cache_alloc_node", perf_evsel__process_alloc_node_event, },
1378                { "kmem:kfree",                 perf_evsel__process_free_event, },
1379                { "kmem:kmem_cache_free",       perf_evsel__process_free_event, },
1380                /* page allocator */
1381                { "kmem:mm_page_alloc",         perf_evsel__process_page_alloc_event, },
1382                { "kmem:mm_page_free",          perf_evsel__process_page_free_event, },
1383        };
1384
1385        if (!perf_session__has_traces(session, "kmem record"))
1386                goto out;
1387
1388        if (perf_session__set_tracepoints_handlers(session, kmem_tracepoints)) {
1389                pr_err("Initializing perf session tracepoint handlers failed\n");
1390                goto out;
1391        }
1392
1393        evlist__for_each_entry(session->evlist, evsel) {
1394                if (!strcmp(perf_evsel__name(evsel), "kmem:mm_page_alloc") &&
1395                    perf_evsel__field(evsel, "pfn")) {
1396                        use_pfn = true;
1397                        break;
1398                }
1399        }
1400
1401        setup_pager();
1402        err = perf_session__process_events(session);
1403        if (err != 0) {
1404                pr_err("error during process events: %d\n", err);
1405                goto out;
1406        }
1407        sort_result();
1408        print_result(session);
1409out:
1410        return err;
1411}
1412
1413/* slab sort keys */
1414static int ptr_cmp(void *a, void *b)
1415{
1416        struct alloc_stat *l = a;
1417        struct alloc_stat *r = b;
1418
1419        if (l->ptr < r->ptr)
1420                return -1;
1421        else if (l->ptr > r->ptr)
1422                return 1;
1423        return 0;
1424}
1425
1426static struct sort_dimension ptr_sort_dimension = {
1427        .name   = "ptr",
1428        .cmp    = ptr_cmp,
1429};
1430
1431static int slab_callsite_cmp(void *a, void *b)
1432{
1433        struct alloc_stat *l = a;
1434        struct alloc_stat *r = b;
1435
1436        if (l->call_site < r->call_site)
1437                return -1;
1438        else if (l->call_site > r->call_site)
1439                return 1;
1440        return 0;
1441}
1442
1443static struct sort_dimension callsite_sort_dimension = {
1444        .name   = "callsite",
1445        .cmp    = slab_callsite_cmp,
1446};
1447
1448static int hit_cmp(void *a, void *b)
1449{
1450        struct alloc_stat *l = a;
1451        struct alloc_stat *r = b;
1452
1453        if (l->hit < r->hit)
1454                return -1;
1455        else if (l->hit > r->hit)
1456                return 1;
1457        return 0;
1458}
1459
1460static struct sort_dimension hit_sort_dimension = {
1461        .name   = "hit",
1462        .cmp    = hit_cmp,
1463};
1464
1465static int bytes_cmp(void *a, void *b)
1466{
1467        struct alloc_stat *l = a;
1468        struct alloc_stat *r = b;
1469
1470        if (l->bytes_alloc < r->bytes_alloc)
1471                return -1;
1472        else if (l->bytes_alloc > r->bytes_alloc)
1473                return 1;
1474        return 0;
1475}
1476
1477static struct sort_dimension bytes_sort_dimension = {
1478        .name   = "bytes",
1479        .cmp    = bytes_cmp,
1480};
1481
1482static int frag_cmp(void *a, void *b)
1483{
1484        double x, y;
1485        struct alloc_stat *l = a;
1486        struct alloc_stat *r = b;
1487
1488        x = fragmentation(l->bytes_req, l->bytes_alloc);
1489        y = fragmentation(r->bytes_req, r->bytes_alloc);
1490
1491        if (x < y)
1492                return -1;
1493        else if (x > y)
1494                return 1;
1495        return 0;
1496}
1497
1498static struct sort_dimension frag_sort_dimension = {
1499        .name   = "frag",
1500        .cmp    = frag_cmp,
1501};
1502
1503static int pingpong_cmp(void *a, void *b)
1504{
1505        struct alloc_stat *l = a;
1506        struct alloc_stat *r = b;
1507
1508        if (l->pingpong < r->pingpong)
1509                return -1;
1510        else if (l->pingpong > r->pingpong)
1511                return 1;
1512        return 0;
1513}
1514
1515static struct sort_dimension pingpong_sort_dimension = {
1516        .name   = "pingpong",
1517        .cmp    = pingpong_cmp,
1518};
1519
1520/* page sort keys */
1521static int page_cmp(void *a, void *b)
1522{
1523        struct page_stat *l = a;
1524        struct page_stat *r = b;
1525
1526        if (l->page < r->page)
1527                return -1;
1528        else if (l->page > r->page)
1529                return 1;
1530        return 0;
1531}
1532
1533static struct sort_dimension page_sort_dimension = {
1534        .name   = "page",
1535        .cmp    = page_cmp,
1536};
1537
1538static int page_callsite_cmp(void *a, void *b)
1539{
1540        struct page_stat *l = a;
1541        struct page_stat *r = b;
1542
1543        if (l->callsite < r->callsite)
1544                return -1;
1545        else if (l->callsite > r->callsite)
1546                return 1;
1547        return 0;
1548}
1549
1550static struct sort_dimension page_callsite_sort_dimension = {
1551        .name   = "callsite",
1552        .cmp    = page_callsite_cmp,
1553};
1554
1555static int page_hit_cmp(void *a, void *b)
1556{
1557        struct page_stat *l = a;
1558        struct page_stat *r = b;
1559
1560        if (l->nr_alloc < r->nr_alloc)
1561                return -1;
1562        else if (l->nr_alloc > r->nr_alloc)
1563                return 1;
1564        return 0;
1565}
1566
1567static struct sort_dimension page_hit_sort_dimension = {
1568        .name   = "hit",
1569        .cmp    = page_hit_cmp,
1570};
1571
1572static int page_bytes_cmp(void *a, void *b)
1573{
1574        struct page_stat *l = a;
1575        struct page_stat *r = b;
1576
1577        if (l->alloc_bytes < r->alloc_bytes)
1578                return -1;
1579        else if (l->alloc_bytes > r->alloc_bytes)
1580                return 1;
1581        return 0;
1582}
1583
1584static struct sort_dimension page_bytes_sort_dimension = {
1585        .name   = "bytes",
1586        .cmp    = page_bytes_cmp,
1587};
1588
1589static int page_order_cmp(void *a, void *b)
1590{
1591        struct page_stat *l = a;
1592        struct page_stat *r = b;
1593
1594        if (l->order < r->order)
1595                return -1;
1596        else if (l->order > r->order)
1597                return 1;
1598        return 0;
1599}
1600
1601static struct sort_dimension page_order_sort_dimension = {
1602        .name   = "order",
1603        .cmp    = page_order_cmp,
1604};
1605
1606static int migrate_type_cmp(void *a, void *b)
1607{
1608        struct page_stat *l = a;
1609        struct page_stat *r = b;
1610
1611        /* for internal use to find free'd page */
1612        if (l->migrate_type == -1U)
1613                return 0;
1614
1615        if (l->migrate_type < r->migrate_type)
1616                return -1;
1617        else if (l->migrate_type > r->migrate_type)
1618                return 1;
1619        return 0;
1620}
1621
1622static struct sort_dimension migrate_type_sort_dimension = {
1623        .name   = "migtype",
1624        .cmp    = migrate_type_cmp,
1625};
1626
1627static int gfp_flags_cmp(void *a, void *b)
1628{
1629        struct page_stat *l = a;
1630        struct page_stat *r = b;
1631
1632        /* for internal use to find free'd page */
1633        if (l->gfp_flags == -1U)
1634                return 0;
1635
1636        if (l->gfp_flags < r->gfp_flags)
1637                return -1;
1638        else if (l->gfp_flags > r->gfp_flags)
1639                return 1;
1640        return 0;
1641}
1642
1643static struct sort_dimension gfp_flags_sort_dimension = {
1644        .name   = "gfp",
1645        .cmp    = gfp_flags_cmp,
1646};
1647
1648static struct sort_dimension *slab_sorts[] = {
1649        &ptr_sort_dimension,
1650        &callsite_sort_dimension,
1651        &hit_sort_dimension,
1652        &bytes_sort_dimension,
1653        &frag_sort_dimension,
1654        &pingpong_sort_dimension,
1655};
1656
1657static struct sort_dimension *page_sorts[] = {
1658        &page_sort_dimension,
1659        &page_callsite_sort_dimension,
1660        &page_hit_sort_dimension,
1661        &page_bytes_sort_dimension,
1662        &page_order_sort_dimension,
1663        &migrate_type_sort_dimension,
1664        &gfp_flags_sort_dimension,
1665};
1666
1667static int slab_sort_dimension__add(const char *tok, struct list_head *list)
1668{
1669        struct sort_dimension *sort;
1670        int i;
1671
1672        for (i = 0; i < (int)ARRAY_SIZE(slab_sorts); i++) {
1673                if (!strcmp(slab_sorts[i]->name, tok)) {
1674                        sort = memdup(slab_sorts[i], sizeof(*slab_sorts[i]));
1675                        if (!sort) {
1676                                pr_err("%s: memdup failed\n", __func__);
1677                                return -1;
1678                        }
1679                        list_add_tail(&sort->list, list);
1680                        return 0;
1681                }
1682        }
1683
1684        return -1;
1685}
1686
1687static int page_sort_dimension__add(const char *tok, struct list_head *list)
1688{
1689        struct sort_dimension *sort;
1690        int i;
1691
1692        for (i = 0; i < (int)ARRAY_SIZE(page_sorts); i++) {
1693                if (!strcmp(page_sorts[i]->name, tok)) {
1694                        sort = memdup(page_sorts[i], sizeof(*page_sorts[i]));
1695                        if (!sort) {
1696                                pr_err("%s: memdup failed\n", __func__);
1697                                return -1;
1698                        }
1699                        list_add_tail(&sort->list, list);
1700                        return 0;
1701                }
1702        }
1703
1704        return -1;
1705}
1706
1707static int setup_slab_sorting(struct list_head *sort_list, const char *arg)
1708{
1709        char *tok;
1710        char *str = strdup(arg);
1711        char *pos = str;
1712
1713        if (!str) {
1714                pr_err("%s: strdup failed\n", __func__);
1715                return -1;
1716        }
1717
1718        while (true) {
1719                tok = strsep(&pos, ",");
1720                if (!tok)
1721                        break;
1722                if (slab_sort_dimension__add(tok, sort_list) < 0) {
1723                        pr_err("Unknown slab --sort key: '%s'", tok);
1724                        free(str);
1725                        return -1;
1726                }
1727        }
1728
1729        free(str);
1730        return 0;
1731}
1732
1733static int setup_page_sorting(struct list_head *sort_list, const char *arg)
1734{
1735        char *tok;
1736        char *str = strdup(arg);
1737        char *pos = str;
1738
1739        if (!str) {
1740                pr_err("%s: strdup failed\n", __func__);
1741                return -1;
1742        }
1743
1744        while (true) {
1745                tok = strsep(&pos, ",");
1746                if (!tok)
1747                        break;
1748                if (page_sort_dimension__add(tok, sort_list) < 0) {
1749                        pr_err("Unknown page --sort key: '%s'", tok);
1750                        free(str);
1751                        return -1;
1752                }
1753        }
1754
1755        free(str);
1756        return 0;
1757}
1758
1759static int parse_sort_opt(const struct option *opt __maybe_unused,
1760                          const char *arg, int unset __maybe_unused)
1761{
1762        if (!arg)
1763                return -1;
1764
1765        if (kmem_page > kmem_slab ||
1766            (kmem_page == 0 && kmem_slab == 0 && kmem_default == KMEM_PAGE)) {
1767                if (caller_flag > alloc_flag)
1768                        return setup_page_sorting(&page_caller_sort, arg);
1769                else
1770                        return setup_page_sorting(&page_alloc_sort, arg);
1771        } else {
1772                if (caller_flag > alloc_flag)
1773                        return setup_slab_sorting(&slab_caller_sort, arg);
1774                else
1775                        return setup_slab_sorting(&slab_alloc_sort, arg);
1776        }
1777
1778        return 0;
1779}
1780
1781static int parse_caller_opt(const struct option *opt __maybe_unused,
1782                            const char *arg __maybe_unused,
1783                            int unset __maybe_unused)
1784{
1785        caller_flag = (alloc_flag + 1);
1786        return 0;
1787}
1788
1789static int parse_alloc_opt(const struct option *opt __maybe_unused,
1790                           const char *arg __maybe_unused,
1791                           int unset __maybe_unused)
1792{
1793        alloc_flag = (caller_flag + 1);
1794        return 0;
1795}
1796
1797static int parse_slab_opt(const struct option *opt __maybe_unused,
1798                          const char *arg __maybe_unused,
1799                          int unset __maybe_unused)
1800{
1801        kmem_slab = (kmem_page + 1);
1802        return 0;
1803}
1804
1805static int parse_page_opt(const struct option *opt __maybe_unused,
1806                          const char *arg __maybe_unused,
1807                          int unset __maybe_unused)
1808{
1809        kmem_page = (kmem_slab + 1);
1810        return 0;
1811}
1812
1813static int parse_line_opt(const struct option *opt __maybe_unused,
1814                          const char *arg, int unset __maybe_unused)
1815{
1816        int lines;
1817
1818        if (!arg)
1819                return -1;
1820
1821        lines = strtoul(arg, NULL, 10);
1822
1823        if (caller_flag > alloc_flag)
1824                caller_lines = lines;
1825        else
1826                alloc_lines = lines;
1827
1828        return 0;
1829}
1830
1831static int __cmd_record(int argc, const char **argv)
1832{
1833        const char * const record_args[] = {
1834        "record", "-a", "-R", "-c", "1",
1835        };
1836        const char * const slab_events[] = {
1837        "-e", "kmem:kmalloc",
1838        "-e", "kmem:kmalloc_node",
1839        "-e", "kmem:kfree",
1840        "-e", "kmem:kmem_cache_alloc",
1841        "-e", "kmem:kmem_cache_alloc_node",
1842        "-e", "kmem:kmem_cache_free",
1843        };
1844        const char * const page_events[] = {
1845        "-e", "kmem:mm_page_alloc",
1846        "-e", "kmem:mm_page_free",
1847        };
1848        unsigned int rec_argc, i, j;
1849        const char **rec_argv;
1850
1851        rec_argc = ARRAY_SIZE(record_args) + argc - 1;
1852        if (kmem_slab)
1853                rec_argc += ARRAY_SIZE(slab_events);
1854        if (kmem_page)
1855                rec_argc += ARRAY_SIZE(page_events) + 1; /* for -g */
1856
1857        rec_argv = calloc(rec_argc + 1, sizeof(char *));
1858
1859        if (rec_argv == NULL)
1860                return -ENOMEM;
1861
1862        for (i = 0; i < ARRAY_SIZE(record_args); i++)
1863                rec_argv[i] = strdup(record_args[i]);
1864
1865        if (kmem_slab) {
1866                for (j = 0; j < ARRAY_SIZE(slab_events); j++, i++)
1867                        rec_argv[i] = strdup(slab_events[j]);
1868        }
1869        if (kmem_page) {
1870                rec_argv[i++] = strdup("-g");
1871
1872                for (j = 0; j < ARRAY_SIZE(page_events); j++, i++)
1873                        rec_argv[i] = strdup(page_events[j]);
1874        }
1875
1876        for (j = 1; j < (unsigned int)argc; j++, i++)
1877                rec_argv[i] = argv[j];
1878
1879        return cmd_record(i, rec_argv);
1880}
1881
1882static int kmem_config(const char *var, const char *value, void *cb __maybe_unused)
1883{
1884        if (!strcmp(var, "kmem.default")) {
1885                if (!strcmp(value, "slab"))
1886                        kmem_default = KMEM_SLAB;
1887                else if (!strcmp(value, "page"))
1888                        kmem_default = KMEM_PAGE;
1889                else
1890                        pr_err("invalid default value ('slab' or 'page' required): %s\n",
1891                               value);
1892                return 0;
1893        }
1894
1895        return 0;
1896}
1897
1898int cmd_kmem(int argc, const char **argv)
1899{
1900        const char * const default_slab_sort = "frag,hit,bytes";
1901        const char * const default_page_sort = "bytes,hit";
1902        struct perf_data data = {
1903                .mode = PERF_DATA_MODE_READ,
1904        };
1905        const struct option kmem_options[] = {
1906        OPT_STRING('i', "input", &input_name, "file", "input file name"),
1907        OPT_INCR('v', "verbose", &verbose,
1908                    "be more verbose (show symbol address, etc)"),
1909        OPT_CALLBACK_NOOPT(0, "caller", NULL, NULL,
1910                           "show per-callsite statistics", parse_caller_opt),
1911        OPT_CALLBACK_NOOPT(0, "alloc", NULL, NULL,
1912                           "show per-allocation statistics", parse_alloc_opt),
1913        OPT_CALLBACK('s', "sort", NULL, "key[,key2...]",
1914                     "sort by keys: ptr, callsite, bytes, hit, pingpong, frag, "
1915                     "page, order, migtype, gfp", parse_sort_opt),
1916        OPT_CALLBACK('l', "line", NULL, "num", "show n lines", parse_line_opt),
1917        OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"),
1918        OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"),
1919        OPT_CALLBACK_NOOPT(0, "slab", NULL, NULL, "Analyze slab allocator",
1920                           parse_slab_opt),
1921        OPT_CALLBACK_NOOPT(0, "page", NULL, NULL, "Analyze page allocator",
1922                           parse_page_opt),
1923        OPT_BOOLEAN(0, "live", &live_page, "Show live page stat"),
1924        OPT_STRING(0, "time", &time_str, "str",
1925                   "Time span of interest (start,stop)"),
1926        OPT_END()
1927        };
1928        const char *const kmem_subcommands[] = { "record", "stat", NULL };
1929        const char *kmem_usage[] = {
1930                NULL,
1931                NULL
1932        };
1933        struct perf_session *session;
1934        static const char errmsg[] = "No %s allocation events found.  Have you run 'perf kmem record --%s'?\n";
1935        int ret = perf_config(kmem_config, NULL);
1936
1937        if (ret)
1938                return ret;
1939
1940        argc = parse_options_subcommand(argc, argv, kmem_options,
1941                                        kmem_subcommands, kmem_usage, 0);
1942
1943        if (!argc)
1944                usage_with_options(kmem_usage, kmem_options);
1945
1946        if (kmem_slab == 0 && kmem_page == 0) {
1947                if (kmem_default == KMEM_SLAB)
1948                        kmem_slab = 1;
1949                else
1950                        kmem_page = 1;
1951        }
1952
1953        if (!strncmp(argv[0], "rec", 3)) {
1954                symbol__init(NULL);
1955                return __cmd_record(argc, argv);
1956        }
1957
1958        data.path = input_name;
1959
1960        kmem_session = session = perf_session__new(&data, false, &perf_kmem);
1961        if (IS_ERR(session))
1962                return PTR_ERR(session);
1963
1964        ret = -1;
1965
1966        if (kmem_slab) {
1967                if (!perf_evlist__find_tracepoint_by_name(session->evlist,
1968                                                          "kmem:kmalloc")) {
1969                        pr_err(errmsg, "slab", "slab");
1970                        goto out_delete;
1971                }
1972        }
1973
1974        if (kmem_page) {
1975                struct evsel *evsel;
1976
1977                evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
1978                                                             "kmem:mm_page_alloc");
1979                if (evsel == NULL) {
1980                        pr_err(errmsg, "page", "page");
1981                        goto out_delete;
1982                }
1983
1984                kmem_page_size = tep_get_page_size(evsel->tp_format->tep);
1985                symbol_conf.use_callchain = true;
1986        }
1987
1988        symbol__init(&session->header.env);
1989
1990        if (perf_time__parse_str(&ptime, time_str) != 0) {
1991                pr_err("Invalid time string\n");
1992                ret = -EINVAL;
1993                goto out_delete;
1994        }
1995
1996        if (!strcmp(argv[0], "stat")) {
1997                setlocale(LC_ALL, "");
1998
1999                if (cpu__setup_cpunode_map())
2000                        goto out_delete;
2001
2002                if (list_empty(&slab_caller_sort))
2003                        setup_slab_sorting(&slab_caller_sort, default_slab_sort);
2004                if (list_empty(&slab_alloc_sort))
2005                        setup_slab_sorting(&slab_alloc_sort, default_slab_sort);
2006                if (list_empty(&page_caller_sort))
2007                        setup_page_sorting(&page_caller_sort, default_page_sort);
2008                if (list_empty(&page_alloc_sort))
2009                        setup_page_sorting(&page_alloc_sort, default_page_sort);
2010
2011                if (kmem_page) {
2012                        setup_page_sorting(&page_alloc_sort_input,
2013                                           "page,order,migtype,gfp");
2014                        setup_page_sorting(&page_caller_sort_input,
2015                                           "callsite,order,migtype,gfp");
2016                }
2017                ret = __cmd_kmem(session);
2018        } else
2019                usage_with_options(kmem_usage, kmem_options);
2020
2021out_delete:
2022        perf_session__delete(session);
2023
2024        return ret;
2025}
2026
2027