linux/arch/nds32/kernel/perf_event_cpu.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright (C) 2008-2017 Andes Technology Corporation
   4 *
   5 * Reference ARMv7: Jean Pihet <jpihet@mvista.com>
   6 * 2010 (c) MontaVista Software, LLC.
   7 */
   8
   9#include <linux/perf_event.h>
  10#include <linux/bitmap.h>
  11#include <linux/export.h>
  12#include <linux/kernel.h>
  13#include <linux/of.h>
  14#include <linux/platform_device.h>
  15#include <linux/slab.h>
  16#include <linux/spinlock.h>
  17#include <linux/pm_runtime.h>
  18#include <linux/ftrace.h>
  19#include <linux/uaccess.h>
  20#include <linux/sched/clock.h>
  21#include <linux/percpu-defs.h>
  22
  23#include <asm/pmu.h>
  24#include <asm/irq_regs.h>
  25#include <asm/nds32.h>
  26#include <asm/stacktrace.h>
  27#include <asm/perf_event.h>
  28#include <nds32_intrinsic.h>
  29
  30/* Set at runtime when we know what CPU type we are. */
  31static struct nds32_pmu *cpu_pmu;
  32
  33static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events);
  34static void nds32_pmu_start(struct nds32_pmu *cpu_pmu);
  35static void nds32_pmu_stop(struct nds32_pmu *cpu_pmu);
  36static struct platform_device_id cpu_pmu_plat_device_ids[] = {
  37        {.name = "nds32-pfm"},
  38        {},
  39};
  40
  41static int nds32_pmu_map_cache_event(const unsigned int (*cache_map)
  42                                  [PERF_COUNT_HW_CACHE_MAX]
  43                                  [PERF_COUNT_HW_CACHE_OP_MAX]
  44                                  [PERF_COUNT_HW_CACHE_RESULT_MAX], u64 config)
  45{
  46        unsigned int cache_type, cache_op, cache_result, ret;
  47
  48        cache_type = (config >> 0) & 0xff;
  49        if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
  50                return -EINVAL;
  51
  52        cache_op = (config >> 8) & 0xff;
  53        if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
  54                return -EINVAL;
  55
  56        cache_result = (config >> 16) & 0xff;
  57        if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
  58                return -EINVAL;
  59
  60        ret = (int)(*cache_map)[cache_type][cache_op][cache_result];
  61
  62        if (ret == CACHE_OP_UNSUPPORTED)
  63                return -ENOENT;
  64
  65        return ret;
  66}
  67
  68static int
  69nds32_pmu_map_hw_event(const unsigned int (*event_map)[PERF_COUNT_HW_MAX],
  70                       u64 config)
  71{
  72        int mapping;
  73
  74        if (config >= PERF_COUNT_HW_MAX)
  75                return -ENOENT;
  76
  77        mapping = (*event_map)[config];
  78        return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping;
  79}
  80
  81static int nds32_pmu_map_raw_event(u32 raw_event_mask, u64 config)
  82{
  83        int ev_type = (int)(config & raw_event_mask);
  84        int idx = config >> 8;
  85
  86        switch (idx) {
  87        case 0:
  88                ev_type = PFM_OFFSET_MAGIC_0 + ev_type;
  89                if (ev_type >= SPAV3_0_SEL_LAST || ev_type <= SPAV3_0_SEL_BASE)
  90                        return -ENOENT;
  91                break;
  92        case 1:
  93                ev_type = PFM_OFFSET_MAGIC_1 + ev_type;
  94                if (ev_type >= SPAV3_1_SEL_LAST || ev_type <= SPAV3_1_SEL_BASE)
  95                        return -ENOENT;
  96                break;
  97        case 2:
  98                ev_type = PFM_OFFSET_MAGIC_2 + ev_type;
  99                if (ev_type >= SPAV3_2_SEL_LAST || ev_type <= SPAV3_2_SEL_BASE)
 100                        return -ENOENT;
 101                break;
 102        default:
 103                return -ENOENT;
 104        }
 105
 106        return ev_type;
 107}
 108
 109int
 110nds32_pmu_map_event(struct perf_event *event,
 111                    const unsigned int (*event_map)[PERF_COUNT_HW_MAX],
 112                    const unsigned int (*cache_map)
 113                    [PERF_COUNT_HW_CACHE_MAX]
 114                    [PERF_COUNT_HW_CACHE_OP_MAX]
 115                    [PERF_COUNT_HW_CACHE_RESULT_MAX], u32 raw_event_mask)
 116{
 117        u64 config = event->attr.config;
 118
 119        switch (event->attr.type) {
 120        case PERF_TYPE_HARDWARE:
 121                return nds32_pmu_map_hw_event(event_map, config);
 122        case PERF_TYPE_HW_CACHE:
 123                return nds32_pmu_map_cache_event(cache_map, config);
 124        case PERF_TYPE_RAW:
 125                return nds32_pmu_map_raw_event(raw_event_mask, config);
 126        }
 127
 128        return -ENOENT;
 129}
 130
 131static int nds32_spav3_map_event(struct perf_event *event)
 132{
 133        return nds32_pmu_map_event(event, &nds32_pfm_perf_map,
 134                                &nds32_pfm_perf_cache_map, SOFTWARE_EVENT_MASK);
 135}
 136
 137static inline u32 nds32_pfm_getreset_flags(void)
 138{
 139        /* Read overflow status */
 140        u32 val = __nds32__mfsr(NDS32_SR_PFM_CTL);
 141        u32 old_val = val;
 142
 143        /* Write overflow bit to clear status, and others keep it 0 */
 144        u32 ov_flag = PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2];
 145
 146        __nds32__mtsr(val | ov_flag, NDS32_SR_PFM_CTL);
 147
 148        return old_val;
 149}
 150
 151static inline int nds32_pfm_has_overflowed(u32 pfm)
 152{
 153        u32 ov_flag = PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2];
 154
 155        return pfm & ov_flag;
 156}
 157
 158static inline int nds32_pfm_counter_has_overflowed(u32 pfm, int idx)
 159{
 160        u32 mask = 0;
 161
 162        switch (idx) {
 163        case 0:
 164                mask = PFM_CTL_OVF[0];
 165                break;
 166        case 1:
 167                mask = PFM_CTL_OVF[1];
 168                break;
 169        case 2:
 170                mask = PFM_CTL_OVF[2];
 171                break;
 172        default:
 173                pr_err("%s index wrong\n", __func__);
 174                break;
 175        }
 176        return pfm & mask;
 177}
 178
 179/*
 180 * Set the next IRQ period, based on the hwc->period_left value.
 181 * To be called with the event disabled in hw:
 182 */
 183int nds32_pmu_event_set_period(struct perf_event *event)
 184{
 185        struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
 186        struct hw_perf_event *hwc = &event->hw;
 187        s64 left = local64_read(&hwc->period_left);
 188        s64 period = hwc->sample_period;
 189        int ret = 0;
 190
 191        /* The period may have been changed by PERF_EVENT_IOC_PERIOD */
 192        if (unlikely(period != hwc->last_period))
 193                left = period - (hwc->last_period - left);
 194
 195        if (unlikely(left <= -period)) {
 196                left = period;
 197                local64_set(&hwc->period_left, left);
 198                hwc->last_period = period;
 199                ret = 1;
 200        }
 201
 202        if (unlikely(left <= 0)) {
 203                left += period;
 204                local64_set(&hwc->period_left, left);
 205                hwc->last_period = period;
 206                ret = 1;
 207        }
 208
 209        if (left > (s64)nds32_pmu->max_period)
 210                left = nds32_pmu->max_period;
 211
 212        /*
 213         * The hw event starts counting from this event offset,
 214         * mark it to be able to extract future "deltas":
 215         */
 216        local64_set(&hwc->prev_count, (u64)(-left));
 217
 218        nds32_pmu->write_counter(event, (u64)(-left) & nds32_pmu->max_period);
 219
 220        perf_event_update_userpage(event);
 221
 222        return ret;
 223}
 224
 225static irqreturn_t nds32_pmu_handle_irq(int irq_num, void *dev)
 226{
 227        u32 pfm;
 228        struct perf_sample_data data;
 229        struct nds32_pmu *cpu_pmu = (struct nds32_pmu *)dev;
 230        struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events();
 231        struct pt_regs *regs;
 232        int idx;
 233        /*
 234         * Get and reset the IRQ flags
 235         */
 236        pfm = nds32_pfm_getreset_flags();
 237
 238        /*
 239         * Did an overflow occur?
 240         */
 241        if (!nds32_pfm_has_overflowed(pfm))
 242                return IRQ_NONE;
 243
 244        /*
 245         * Handle the counter(s) overflow(s)
 246         */
 247        regs = get_irq_regs();
 248
 249        nds32_pmu_stop(cpu_pmu);
 250        for (idx = 0; idx < cpu_pmu->num_events; ++idx) {
 251                struct perf_event *event = cpuc->events[idx];
 252                struct hw_perf_event *hwc;
 253
 254                /* Ignore if we don't have an event. */
 255                if (!event)
 256                        continue;
 257
 258                /*
 259                 * We have a single interrupt for all counters. Check that
 260                 * each counter has overflowed before we process it.
 261                 */
 262                if (!nds32_pfm_counter_has_overflowed(pfm, idx))
 263                        continue;
 264
 265                hwc = &event->hw;
 266                nds32_pmu_event_update(event);
 267                perf_sample_data_init(&data, 0, hwc->last_period);
 268                if (!nds32_pmu_event_set_period(event))
 269                        continue;
 270
 271                if (perf_event_overflow(event, &data, regs))
 272                        cpu_pmu->disable(event);
 273        }
 274        nds32_pmu_start(cpu_pmu);
 275        /*
 276         * Handle the pending perf events.
 277         *
 278         * Note: this call *must* be run with interrupts disabled. For
 279         * platforms that can have the PMU interrupts raised as an NMI, this
 280         * will not work.
 281         */
 282        irq_work_run();
 283
 284        return IRQ_HANDLED;
 285}
 286
 287static inline int nds32_pfm_counter_valid(struct nds32_pmu *cpu_pmu, int idx)
 288{
 289        return ((idx >= 0) && (idx < cpu_pmu->num_events));
 290}
 291
 292static inline int nds32_pfm_disable_counter(int idx)
 293{
 294        unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL);
 295        u32 mask = 0;
 296
 297        mask = PFM_CTL_EN[idx];
 298        val &= ~mask;
 299        val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
 300        __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
 301        return idx;
 302}
 303
 304/*
 305 * Add an event filter to a given event.
 306 */
 307static int nds32_pmu_set_event_filter(struct hw_perf_event *event,
 308                                      struct perf_event_attr *attr)
 309{
 310        unsigned long config_base = 0;
 311        int idx = event->idx;
 312        unsigned long no_kernel_tracing = 0;
 313        unsigned long no_user_tracing = 0;
 314        /* If index is -1, do not do anything */
 315        if (idx == -1)
 316                return 0;
 317
 318        no_kernel_tracing = PFM_CTL_KS[idx];
 319        no_user_tracing = PFM_CTL_KU[idx];
 320        /*
 321         * Default: enable both kernel and user mode tracing.
 322         */
 323        if (attr->exclude_user)
 324                config_base |= no_user_tracing;
 325
 326        if (attr->exclude_kernel)
 327                config_base |= no_kernel_tracing;
 328
 329        /*
 330         * Install the filter into config_base as this is used to
 331         * construct the event type.
 332         */
 333        event->config_base |= config_base;
 334        return 0;
 335}
 336
 337static inline void nds32_pfm_write_evtsel(int idx, u32 evnum)
 338{
 339        u32 offset = 0;
 340        u32 ori_val = __nds32__mfsr(NDS32_SR_PFM_CTL);
 341        u32 ev_mask = 0;
 342        u32 no_kernel_mask = 0;
 343        u32 no_user_mask = 0;
 344        u32 val;
 345
 346        offset = PFM_CTL_OFFSEL[idx];
 347        /* Clear previous mode selection, and write new one */
 348        no_kernel_mask = PFM_CTL_KS[idx];
 349        no_user_mask = PFM_CTL_KU[idx];
 350        ori_val &= ~no_kernel_mask;
 351        ori_val &= ~no_user_mask;
 352        if (evnum & no_kernel_mask)
 353                ori_val |= no_kernel_mask;
 354
 355        if (evnum & no_user_mask)
 356                ori_val |= no_user_mask;
 357
 358        /* Clear previous event selection */
 359        ev_mask = PFM_CTL_SEL[idx];
 360        ori_val &= ~ev_mask;
 361        evnum &= SOFTWARE_EVENT_MASK;
 362
 363        /* undo the linear mapping */
 364        evnum = get_converted_evet_hw_num(evnum);
 365        val = ori_val | (evnum << offset);
 366        val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
 367        __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
 368}
 369
 370static inline int nds32_pfm_enable_counter(int idx)
 371{
 372        unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL);
 373        u32 mask = 0;
 374
 375        mask = PFM_CTL_EN[idx];
 376        val |= mask;
 377        val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
 378        __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
 379        return idx;
 380}
 381
 382static inline int nds32_pfm_enable_intens(int idx)
 383{
 384        unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL);
 385        u32 mask = 0;
 386
 387        mask = PFM_CTL_IE[idx];
 388        val |= mask;
 389        val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
 390        __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
 391        return idx;
 392}
 393
 394static inline int nds32_pfm_disable_intens(int idx)
 395{
 396        unsigned int val = __nds32__mfsr(NDS32_SR_PFM_CTL);
 397        u32 mask = 0;
 398
 399        mask = PFM_CTL_IE[idx];
 400        val &= ~mask;
 401        val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
 402        __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
 403        return idx;
 404}
 405
 406static int event_requires_mode_exclusion(struct perf_event_attr *attr)
 407{
 408        /* Other modes NDS32 does not support */
 409        return attr->exclude_user || attr->exclude_kernel;
 410}
 411
 412static void nds32_pmu_enable_event(struct perf_event *event)
 413{
 414        unsigned long flags;
 415        unsigned int evnum = 0;
 416        struct hw_perf_event *hwc = &event->hw;
 417        struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu);
 418        struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 419        int idx = hwc->idx;
 420
 421        if (!nds32_pfm_counter_valid(cpu_pmu, idx)) {
 422                pr_err("CPU enabling wrong pfm counter IRQ enable\n");
 423                return;
 424        }
 425
 426        /*
 427         * Enable counter and interrupt, and set the counter to count
 428         * the event that we're interested in.
 429         */
 430        raw_spin_lock_irqsave(&events->pmu_lock, flags);
 431
 432        /*
 433         * Disable counter
 434         */
 435        nds32_pfm_disable_counter(idx);
 436
 437        /*
 438         * Check whether we need to exclude the counter from certain modes.
 439         */
 440        if ((!cpu_pmu->set_event_filter ||
 441             cpu_pmu->set_event_filter(hwc, &event->attr)) &&
 442             event_requires_mode_exclusion(&event->attr)) {
 443                pr_notice
 444                ("NDS32 performance counters do not support mode exclusion\n");
 445                hwc->config_base = 0;
 446        }
 447        /* Write event */
 448        evnum = hwc->config_base;
 449        nds32_pfm_write_evtsel(idx, evnum);
 450
 451        /*
 452         * Enable interrupt for this counter
 453         */
 454        nds32_pfm_enable_intens(idx);
 455
 456        /*
 457         * Enable counter
 458         */
 459        nds32_pfm_enable_counter(idx);
 460
 461        raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 462}
 463
 464static void nds32_pmu_disable_event(struct perf_event *event)
 465{
 466        unsigned long flags;
 467        struct hw_perf_event *hwc = &event->hw;
 468        struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu);
 469        struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 470        int idx = hwc->idx;
 471
 472        if (!nds32_pfm_counter_valid(cpu_pmu, idx)) {
 473                pr_err("CPU disabling wrong pfm counter IRQ enable %d\n", idx);
 474                return;
 475        }
 476
 477        /*
 478         * Disable counter and interrupt
 479         */
 480        raw_spin_lock_irqsave(&events->pmu_lock, flags);
 481
 482        /*
 483         * Disable counter
 484         */
 485        nds32_pfm_disable_counter(idx);
 486
 487        /*
 488         * Disable interrupt for this counter
 489         */
 490        nds32_pfm_disable_intens(idx);
 491
 492        raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 493}
 494
 495static inline u32 nds32_pmu_read_counter(struct perf_event *event)
 496{
 497        struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu);
 498        struct hw_perf_event *hwc = &event->hw;
 499        int idx = hwc->idx;
 500        u32 count = 0;
 501
 502        if (!nds32_pfm_counter_valid(cpu_pmu, idx)) {
 503                pr_err("CPU reading wrong counter %d\n", idx);
 504        } else {
 505                switch (idx) {
 506                case PFMC0:
 507                        count = __nds32__mfsr(NDS32_SR_PFMC0);
 508                        break;
 509                case PFMC1:
 510                        count = __nds32__mfsr(NDS32_SR_PFMC1);
 511                        break;
 512                case PFMC2:
 513                        count = __nds32__mfsr(NDS32_SR_PFMC2);
 514                        break;
 515                default:
 516                        pr_err
 517                            ("%s: CPU has no performance counters %d\n",
 518                             __func__, idx);
 519                }
 520        }
 521        return count;
 522}
 523
 524static inline void nds32_pmu_write_counter(struct perf_event *event, u32 value)
 525{
 526        struct nds32_pmu *cpu_pmu = to_nds32_pmu(event->pmu);
 527        struct hw_perf_event *hwc = &event->hw;
 528        int idx = hwc->idx;
 529
 530        if (!nds32_pfm_counter_valid(cpu_pmu, idx)) {
 531                pr_err("CPU writing wrong counter %d\n", idx);
 532        } else {
 533                switch (idx) {
 534                case PFMC0:
 535                        __nds32__mtsr_isb(value, NDS32_SR_PFMC0);
 536                        break;
 537                case PFMC1:
 538                        __nds32__mtsr_isb(value, NDS32_SR_PFMC1);
 539                        break;
 540                case PFMC2:
 541                        __nds32__mtsr_isb(value, NDS32_SR_PFMC2);
 542                        break;
 543                default:
 544                        pr_err
 545                            ("%s: CPU has no performance counters %d\n",
 546                             __func__, idx);
 547                }
 548        }
 549}
 550
 551static int nds32_pmu_get_event_idx(struct pmu_hw_events *cpuc,
 552                                   struct perf_event *event)
 553{
 554        int idx;
 555        struct hw_perf_event *hwc = &event->hw;
 556        /*
 557         * Current implementation maps cycles, instruction count and cache-miss
 558         * to specific counter.
 559         * However, multiple of the 3 counters are able to count these events.
 560         *
 561         *
 562         * SOFTWARE_EVENT_MASK mask for getting event num ,
 563         * This is defined by Jia-Rung, you can change the polocies.
 564         * However, do not exceed 8 bits. This is hardware specific.
 565         * The last number is SPAv3_2_SEL_LAST.
 566         */
 567        unsigned long evtype = hwc->config_base & SOFTWARE_EVENT_MASK;
 568
 569        idx = get_converted_event_idx(evtype);
 570        /*
 571         * Try to get the counter for correpsonding event
 572         */
 573        if (evtype == SPAV3_0_SEL_TOTAL_CYCLES) {
 574                if (!test_and_set_bit(idx, cpuc->used_mask))
 575                        return idx;
 576                if (!test_and_set_bit(NDS32_IDX_COUNTER0, cpuc->used_mask))
 577                        return NDS32_IDX_COUNTER0;
 578                if (!test_and_set_bit(NDS32_IDX_COUNTER1, cpuc->used_mask))
 579                        return NDS32_IDX_COUNTER1;
 580        } else if (evtype == SPAV3_1_SEL_COMPLETED_INSTRUCTION) {
 581                if (!test_and_set_bit(idx, cpuc->used_mask))
 582                        return idx;
 583                else if (!test_and_set_bit(NDS32_IDX_COUNTER1, cpuc->used_mask))
 584                        return NDS32_IDX_COUNTER1;
 585                else if (!test_and_set_bit
 586                         (NDS32_IDX_CYCLE_COUNTER, cpuc->used_mask))
 587                        return NDS32_IDX_CYCLE_COUNTER;
 588        } else {
 589                if (!test_and_set_bit(idx, cpuc->used_mask))
 590                        return idx;
 591        }
 592        return -EAGAIN;
 593}
 594
 595static void nds32_pmu_start(struct nds32_pmu *cpu_pmu)
 596{
 597        unsigned long flags;
 598        unsigned int val;
 599        struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 600
 601        raw_spin_lock_irqsave(&events->pmu_lock, flags);
 602
 603        /* Enable all counters , NDS PFM has 3 counters */
 604        val = __nds32__mfsr(NDS32_SR_PFM_CTL);
 605        val |= (PFM_CTL_EN[0] | PFM_CTL_EN[1] | PFM_CTL_EN[2]);
 606        val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
 607        __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
 608
 609        raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 610}
 611
 612static void nds32_pmu_stop(struct nds32_pmu *cpu_pmu)
 613{
 614        unsigned long flags;
 615        unsigned int val;
 616        struct pmu_hw_events *events = cpu_pmu->get_hw_events();
 617
 618        raw_spin_lock_irqsave(&events->pmu_lock, flags);
 619
 620        /* Disable all counters , NDS PFM has 3 counters */
 621        val = __nds32__mfsr(NDS32_SR_PFM_CTL);
 622        val &= ~(PFM_CTL_EN[0] | PFM_CTL_EN[1] | PFM_CTL_EN[2]);
 623        val &= ~(PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
 624        __nds32__mtsr_isb(val, NDS32_SR_PFM_CTL);
 625
 626        raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
 627}
 628
 629static void nds32_pmu_reset(void *info)
 630{
 631        u32 val = 0;
 632
 633        val |= (PFM_CTL_OVF[0] | PFM_CTL_OVF[1] | PFM_CTL_OVF[2]);
 634        __nds32__mtsr(val, NDS32_SR_PFM_CTL);
 635        __nds32__mtsr(0, NDS32_SR_PFM_CTL);
 636        __nds32__mtsr(0, NDS32_SR_PFMC0);
 637        __nds32__mtsr(0, NDS32_SR_PFMC1);
 638        __nds32__mtsr(0, NDS32_SR_PFMC2);
 639}
 640
 641static void nds32_pmu_init(struct nds32_pmu *cpu_pmu)
 642{
 643        cpu_pmu->handle_irq = nds32_pmu_handle_irq;
 644        cpu_pmu->enable = nds32_pmu_enable_event;
 645        cpu_pmu->disable = nds32_pmu_disable_event;
 646        cpu_pmu->read_counter = nds32_pmu_read_counter;
 647        cpu_pmu->write_counter = nds32_pmu_write_counter;
 648        cpu_pmu->get_event_idx = nds32_pmu_get_event_idx;
 649        cpu_pmu->start = nds32_pmu_start;
 650        cpu_pmu->stop = nds32_pmu_stop;
 651        cpu_pmu->reset = nds32_pmu_reset;
 652        cpu_pmu->max_period = 0xFFFFFFFF;       /* Maximum counts */
 653};
 654
 655static u32 nds32_read_num_pfm_events(void)
 656{
 657        /* NDS32 SPAv3 PMU support 3 counter */
 658        return 3;
 659}
 660
 661static int device_pmu_init(struct nds32_pmu *cpu_pmu)
 662{
 663        nds32_pmu_init(cpu_pmu);
 664        /*
 665         * This name should be devive-specific name, whatever you like :)
 666         * I think "PMU" will be a good generic name.
 667         */
 668        cpu_pmu->name = "nds32v3-pmu";
 669        cpu_pmu->map_event = nds32_spav3_map_event;
 670        cpu_pmu->num_events = nds32_read_num_pfm_events();
 671        cpu_pmu->set_event_filter = nds32_pmu_set_event_filter;
 672        return 0;
 673}
 674
 675/*
 676 * CPU PMU identification and probing.
 677 */
 678static int probe_current_pmu(struct nds32_pmu *pmu)
 679{
 680        int ret;
 681
 682        get_cpu();
 683        ret = -ENODEV;
 684        /*
 685         * If ther are various CPU types with its own PMU, initialize with
 686         *
 687         * the corresponding one
 688         */
 689        device_pmu_init(pmu);
 690        put_cpu();
 691        return ret;
 692}
 693
 694static void nds32_pmu_enable(struct pmu *pmu)
 695{
 696        struct nds32_pmu *nds32_pmu = to_nds32_pmu(pmu);
 697        struct pmu_hw_events *hw_events = nds32_pmu->get_hw_events();
 698        int enabled = bitmap_weight(hw_events->used_mask,
 699                                    nds32_pmu->num_events);
 700
 701        if (enabled)
 702                nds32_pmu->start(nds32_pmu);
 703}
 704
 705static void nds32_pmu_disable(struct pmu *pmu)
 706{
 707        struct nds32_pmu *nds32_pmu = to_nds32_pmu(pmu);
 708
 709        nds32_pmu->stop(nds32_pmu);
 710}
 711
 712static void nds32_pmu_release_hardware(struct nds32_pmu *nds32_pmu)
 713{
 714        nds32_pmu->free_irq(nds32_pmu);
 715        pm_runtime_put_sync(&nds32_pmu->plat_device->dev);
 716}
 717
 718static irqreturn_t nds32_pmu_dispatch_irq(int irq, void *dev)
 719{
 720        struct nds32_pmu *nds32_pmu = (struct nds32_pmu *)dev;
 721        int ret;
 722        u64 start_clock, finish_clock;
 723
 724        start_clock = local_clock();
 725        ret = nds32_pmu->handle_irq(irq, dev);
 726        finish_clock = local_clock();
 727
 728        perf_sample_event_took(finish_clock - start_clock);
 729        return ret;
 730}
 731
 732static int nds32_pmu_reserve_hardware(struct nds32_pmu *nds32_pmu)
 733{
 734        int err;
 735        struct platform_device *pmu_device = nds32_pmu->plat_device;
 736
 737        if (!pmu_device)
 738                return -ENODEV;
 739
 740        pm_runtime_get_sync(&pmu_device->dev);
 741        err = nds32_pmu->request_irq(nds32_pmu, nds32_pmu_dispatch_irq);
 742        if (err) {
 743                nds32_pmu_release_hardware(nds32_pmu);
 744                return err;
 745        }
 746
 747        return 0;
 748}
 749
 750static int
 751validate_event(struct pmu *pmu, struct pmu_hw_events *hw_events,
 752               struct perf_event *event)
 753{
 754        struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
 755
 756        if (is_software_event(event))
 757                return 1;
 758
 759        if (event->pmu != pmu)
 760                return 0;
 761
 762        if (event->state < PERF_EVENT_STATE_OFF)
 763                return 1;
 764
 765        if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec)
 766                return 1;
 767
 768        return nds32_pmu->get_event_idx(hw_events, event) >= 0;
 769}
 770
 771static int validate_group(struct perf_event *event)
 772{
 773        struct perf_event *sibling, *leader = event->group_leader;
 774        struct pmu_hw_events fake_pmu;
 775        DECLARE_BITMAP(fake_used_mask, MAX_COUNTERS);
 776        /*
 777         * Initialize the fake PMU. We only need to populate the
 778         * used_mask for the purposes of validation.
 779         */
 780        memset(fake_used_mask, 0, sizeof(fake_used_mask));
 781
 782        if (!validate_event(event->pmu, &fake_pmu, leader))
 783                return -EINVAL;
 784
 785        for_each_sibling_event(sibling, leader) {
 786                if (!validate_event(event->pmu, &fake_pmu, sibling))
 787                        return -EINVAL;
 788        }
 789
 790        if (!validate_event(event->pmu, &fake_pmu, event))
 791                return -EINVAL;
 792
 793        return 0;
 794}
 795
 796static int __hw_perf_event_init(struct perf_event *event)
 797{
 798        struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
 799        struct hw_perf_event *hwc = &event->hw;
 800        int mapping;
 801
 802        mapping = nds32_pmu->map_event(event);
 803
 804        if (mapping < 0) {
 805                pr_debug("event %x:%llx not supported\n", event->attr.type,
 806                         event->attr.config);
 807                return mapping;
 808        }
 809
 810        /*
 811         * We don't assign an index until we actually place the event onto
 812         * hardware. Use -1 to signify that we haven't decided where to put it
 813         * yet. For SMP systems, each core has it's own PMU so we can't do any
 814         * clever allocation or constraints checking at this point.
 815         */
 816        hwc->idx = -1;
 817        hwc->config_base = 0;
 818        hwc->config = 0;
 819        hwc->event_base = 0;
 820
 821        /*
 822         * Check whether we need to exclude the counter from certain modes.
 823         */
 824        if ((!nds32_pmu->set_event_filter ||
 825             nds32_pmu->set_event_filter(hwc, &event->attr)) &&
 826            event_requires_mode_exclusion(&event->attr)) {
 827                pr_debug
 828                        ("NDS performance counters do not support mode exclusion\n");
 829                return -EOPNOTSUPP;
 830        }
 831
 832        /*
 833         * Store the event encoding into the config_base field.
 834         */
 835        hwc->config_base |= (unsigned long)mapping;
 836
 837        if (!hwc->sample_period) {
 838                /*
 839                 * For non-sampling runs, limit the sample_period to half
 840                 * of the counter width. That way, the new counter value
 841                 * is far less likely to overtake the previous one unless
 842                 * you have some serious IRQ latency issues.
 843                 */
 844                hwc->sample_period = nds32_pmu->max_period >> 1;
 845                hwc->last_period = hwc->sample_period;
 846                local64_set(&hwc->period_left, hwc->sample_period);
 847        }
 848
 849        if (event->group_leader != event) {
 850                if (validate_group(event) != 0)
 851                        return -EINVAL;
 852        }
 853
 854        return 0;
 855}
 856
 857static int nds32_pmu_event_init(struct perf_event *event)
 858{
 859        struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
 860        int err = 0;
 861        atomic_t *active_events = &nds32_pmu->active_events;
 862
 863        /* does not support taken branch sampling */
 864        if (has_branch_stack(event))
 865                return -EOPNOTSUPP;
 866
 867        if (nds32_pmu->map_event(event) == -ENOENT)
 868                return -ENOENT;
 869
 870        if (!atomic_inc_not_zero(active_events)) {
 871                if (atomic_read(active_events) == 0) {
 872                        /* Register irq handler */
 873                        err = nds32_pmu_reserve_hardware(nds32_pmu);
 874                }
 875
 876                if (!err)
 877                        atomic_inc(active_events);
 878        }
 879
 880        if (err)
 881                return err;
 882
 883        err = __hw_perf_event_init(event);
 884
 885        return err;
 886}
 887
 888static void nds32_start(struct perf_event *event, int flags)
 889{
 890        struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
 891        struct hw_perf_event *hwc = &event->hw;
 892        /*
 893         * NDS pmu always has to reprogram the period, so ignore
 894         * PERF_EF_RELOAD, see the comment below.
 895         */
 896        if (flags & PERF_EF_RELOAD)
 897                WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
 898
 899        hwc->state = 0;
 900        /* Set the period for the event. */
 901        nds32_pmu_event_set_period(event);
 902
 903        nds32_pmu->enable(event);
 904}
 905
 906static int nds32_pmu_add(struct perf_event *event, int flags)
 907{
 908        struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
 909        struct pmu_hw_events *hw_events = nds32_pmu->get_hw_events();
 910        struct hw_perf_event *hwc = &event->hw;
 911        int idx;
 912        int err = 0;
 913
 914        perf_pmu_disable(event->pmu);
 915
 916        /* If we don't have a space for the counter then finish early. */
 917        idx = nds32_pmu->get_event_idx(hw_events, event);
 918        if (idx < 0) {
 919                err = idx;
 920                goto out;
 921        }
 922
 923        /*
 924         * If there is an event in the counter we are going to use then make
 925         * sure it is disabled.
 926         */
 927        event->hw.idx = idx;
 928        nds32_pmu->disable(event);
 929        hw_events->events[idx] = event;
 930
 931        hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
 932        if (flags & PERF_EF_START)
 933                nds32_start(event, PERF_EF_RELOAD);
 934
 935        /* Propagate our changes to the userspace mapping. */
 936        perf_event_update_userpage(event);
 937
 938out:
 939        perf_pmu_enable(event->pmu);
 940        return err;
 941}
 942
 943u64 nds32_pmu_event_update(struct perf_event *event)
 944{
 945        struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
 946        struct hw_perf_event *hwc = &event->hw;
 947        u64 delta, prev_raw_count, new_raw_count;
 948
 949again:
 950        prev_raw_count = local64_read(&hwc->prev_count);
 951        new_raw_count = nds32_pmu->read_counter(event);
 952
 953        if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
 954                            new_raw_count) != prev_raw_count) {
 955                goto again;
 956        }
 957        /*
 958         * Whether overflow or not, "unsigned substraction"
 959         * will always get their delta
 960         */
 961        delta = (new_raw_count - prev_raw_count) & nds32_pmu->max_period;
 962
 963        local64_add(delta, &event->count);
 964        local64_sub(delta, &hwc->period_left);
 965
 966        return new_raw_count;
 967}
 968
 969static void nds32_stop(struct perf_event *event, int flags)
 970{
 971        struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
 972        struct hw_perf_event *hwc = &event->hw;
 973        /*
 974         * NDS pmu always has to update the counter, so ignore
 975         * PERF_EF_UPDATE, see comments in nds32_start().
 976         */
 977        if (!(hwc->state & PERF_HES_STOPPED)) {
 978                nds32_pmu->disable(event);
 979                nds32_pmu_event_update(event);
 980                hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
 981        }
 982}
 983
 984static void nds32_pmu_del(struct perf_event *event, int flags)
 985{
 986        struct nds32_pmu *nds32_pmu = to_nds32_pmu(event->pmu);
 987        struct pmu_hw_events *hw_events = nds32_pmu->get_hw_events();
 988        struct hw_perf_event *hwc = &event->hw;
 989        int idx = hwc->idx;
 990
 991        nds32_stop(event, PERF_EF_UPDATE);
 992        hw_events->events[idx] = NULL;
 993        clear_bit(idx, hw_events->used_mask);
 994
 995        perf_event_update_userpage(event);
 996}
 997
 998static void nds32_pmu_read(struct perf_event *event)
 999{
1000        nds32_pmu_event_update(event);
1001}
1002
1003/* Please refer to SPAv3 for more hardware specific details */
1004PMU_FORMAT_ATTR(event, "config:0-63");
1005
1006static struct attribute *nds32_arch_formats_attr[] = {
1007        &format_attr_event.attr,
1008        NULL,
1009};
1010
1011static struct attribute_group nds32_pmu_format_group = {
1012        .name = "format",
1013        .attrs = nds32_arch_formats_attr,
1014};
1015
1016static ssize_t nds32_pmu_cpumask_show(struct device *dev,
1017                                      struct device_attribute *attr,
1018                                      char *buf)
1019{
1020        return 0;
1021}
1022
1023static DEVICE_ATTR(cpus, 0444, nds32_pmu_cpumask_show, NULL);
1024
1025static struct attribute *nds32_pmu_common_attrs[] = {
1026        &dev_attr_cpus.attr,
1027        NULL,
1028};
1029
1030static struct attribute_group nds32_pmu_common_group = {
1031        .attrs = nds32_pmu_common_attrs,
1032};
1033
1034static const struct attribute_group *nds32_pmu_attr_groups[] = {
1035        &nds32_pmu_format_group,
1036        &nds32_pmu_common_group,
1037        NULL,
1038};
1039
1040static void nds32_init(struct nds32_pmu *nds32_pmu)
1041{
1042        atomic_set(&nds32_pmu->active_events, 0);
1043
1044        nds32_pmu->pmu = (struct pmu) {
1045                .pmu_enable = nds32_pmu_enable,
1046                .pmu_disable = nds32_pmu_disable,
1047                .attr_groups = nds32_pmu_attr_groups,
1048                .event_init = nds32_pmu_event_init,
1049                .add = nds32_pmu_add,
1050                .del = nds32_pmu_del,
1051                .start = nds32_start,
1052                .stop = nds32_stop,
1053                .read = nds32_pmu_read,
1054        };
1055}
1056
1057int nds32_pmu_register(struct nds32_pmu *nds32_pmu, int type)
1058{
1059        nds32_init(nds32_pmu);
1060        pm_runtime_enable(&nds32_pmu->plat_device->dev);
1061        pr_info("enabled with %s PMU driver, %d counters available\n",
1062                nds32_pmu->name, nds32_pmu->num_events);
1063        return perf_pmu_register(&nds32_pmu->pmu, nds32_pmu->name, type);
1064}
1065
1066static struct pmu_hw_events *cpu_pmu_get_cpu_events(void)
1067{
1068        return this_cpu_ptr(&cpu_hw_events);
1069}
1070
1071static int cpu_pmu_request_irq(struct nds32_pmu *cpu_pmu, irq_handler_t handler)
1072{
1073        int err, irq, irqs;
1074        struct platform_device *pmu_device = cpu_pmu->plat_device;
1075
1076        if (!pmu_device)
1077                return -ENODEV;
1078
1079        irqs = min(pmu_device->num_resources, num_possible_cpus());
1080        if (irqs < 1) {
1081                pr_err("no irqs for PMUs defined\n");
1082                return -ENODEV;
1083        }
1084
1085        irq = platform_get_irq(pmu_device, 0);
1086        err = request_irq(irq, handler, IRQF_NOBALANCING, "nds32-pfm",
1087                          cpu_pmu);
1088        if (err) {
1089                pr_err("unable to request IRQ%d for NDS PMU counters\n",
1090                       irq);
1091                return err;
1092        }
1093        return 0;
1094}
1095
1096static void cpu_pmu_free_irq(struct nds32_pmu *cpu_pmu)
1097{
1098        int irq;
1099        struct platform_device *pmu_device = cpu_pmu->plat_device;
1100
1101        irq = platform_get_irq(pmu_device, 0);
1102        if (irq >= 0)
1103                free_irq(irq, cpu_pmu);
1104}
1105
1106static void cpu_pmu_init(struct nds32_pmu *cpu_pmu)
1107{
1108        int cpu;
1109        struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu);
1110
1111        raw_spin_lock_init(&events->pmu_lock);
1112
1113        cpu_pmu->get_hw_events = cpu_pmu_get_cpu_events;
1114        cpu_pmu->request_irq = cpu_pmu_request_irq;
1115        cpu_pmu->free_irq = cpu_pmu_free_irq;
1116
1117        /* Ensure the PMU has sane values out of reset. */
1118        if (cpu_pmu->reset)
1119                on_each_cpu(cpu_pmu->reset, cpu_pmu, 1);
1120}
1121
1122static const struct of_device_id cpu_pmu_of_device_ids[] = {
1123        {.compatible = "andestech,nds32v3-pmu",
1124         .data = device_pmu_init},
1125        {},
1126};
1127
1128static int cpu_pmu_device_probe(struct platform_device *pdev)
1129{
1130        const struct of_device_id *of_id;
1131        int (*init_fn)(struct nds32_pmu *nds32_pmu);
1132        struct device_node *node = pdev->dev.of_node;
1133        struct nds32_pmu *pmu;
1134        int ret = -ENODEV;
1135
1136        if (cpu_pmu) {
1137                pr_notice("[perf] attempt to register multiple PMU devices!\n");
1138                return -ENOSPC;
1139        }
1140
1141        pmu = kzalloc(sizeof(*pmu), GFP_KERNEL);
1142        if (!pmu)
1143                return -ENOMEM;
1144
1145        of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node);
1146        if (node && of_id) {
1147                init_fn = of_id->data;
1148                ret = init_fn(pmu);
1149        } else {
1150                ret = probe_current_pmu(pmu);
1151        }
1152
1153        if (ret) {
1154                pr_notice("[perf] failed to probe PMU!\n");
1155                goto out_free;
1156        }
1157
1158        cpu_pmu = pmu;
1159        cpu_pmu->plat_device = pdev;
1160        cpu_pmu_init(cpu_pmu);
1161        ret = nds32_pmu_register(cpu_pmu, PERF_TYPE_RAW);
1162
1163        if (!ret)
1164                return 0;
1165
1166out_free:
1167        pr_notice("[perf] failed to register PMU devices!\n");
1168        kfree(pmu);
1169        return ret;
1170}
1171
1172static struct platform_driver cpu_pmu_driver = {
1173        .driver = {
1174                   .name = "nds32-pfm",
1175                   .of_match_table = cpu_pmu_of_device_ids,
1176                   },
1177        .probe = cpu_pmu_device_probe,
1178        .id_table = cpu_pmu_plat_device_ids,
1179};
1180
1181static int __init register_pmu_driver(void)
1182{
1183        int err = 0;
1184
1185        err = platform_driver_register(&cpu_pmu_driver);
1186        if (err)
1187                pr_notice("[perf] PMU initialization failed\n");
1188        else
1189                pr_notice("[perf] PMU initialization done\n");
1190
1191        return err;
1192}
1193
1194device_initcall(register_pmu_driver);
1195
1196/*
1197 * References: arch/nds32/kernel/traps.c:__dump()
1198 * You will need to know the NDS ABI first.
1199 */
1200static int unwind_frame_kernel(struct stackframe *frame)
1201{
1202        int graph = 0;
1203#ifdef CONFIG_FRAME_POINTER
1204        /* 0x3 means misalignment */
1205        if (!kstack_end((void *)frame->fp) &&
1206            !((unsigned long)frame->fp & 0x3) &&
1207            ((unsigned long)frame->fp >= TASK_SIZE)) {
1208                /*
1209                 *      The array index is based on the ABI, the below graph
1210                 *      illustrate the reasons.
1211                 *      Function call procedure: "smw" and "lmw" will always
1212                 *      update SP and FP for you automatically.
1213                 *
1214                 *      Stack                                 Relative Address
1215                 *      |  |                                          0
1216                 *      ----
1217                 *      |LP| <-- SP(before smw)  <-- FP(after smw)   -1
1218                 *      ----
1219                 *      |FP|                                         -2
1220                 *      ----
1221                 *      |  | <-- SP(after smw)                       -3
1222                 */
1223                frame->lp = ((unsigned long *)frame->fp)[-1];
1224                frame->fp = ((unsigned long *)frame->fp)[FP_OFFSET];
1225                /* make sure CONFIG_FUNCTION_GRAPH_TRACER is turned on */
1226                if (__kernel_text_address(frame->lp))
1227                        frame->lp = ftrace_graph_ret_addr
1228                                                (NULL, &graph, frame->lp, NULL);
1229
1230                return 0;
1231        } else {
1232                return -EPERM;
1233        }
1234#else
1235        /*
1236         * You can refer to arch/nds32/kernel/traps.c:__dump()
1237         * Treat "sp" as "fp", but the "sp" is one frame ahead of "fp".
1238         * And, the "sp" is not always correct.
1239         *
1240         *   Stack                                 Relative Address
1241         *   |  |                                          0
1242         *   ----
1243         *   |LP| <-- SP(before smw)                      -1
1244         *   ----
1245         *   |  | <-- SP(after smw)                       -2
1246         *   ----
1247         */
1248        if (!kstack_end((void *)frame->sp)) {
1249                frame->lp = ((unsigned long *)frame->sp)[1];
1250                /* TODO: How to deal with the value in first
1251                 * "sp" is not correct?
1252                 */
1253                if (__kernel_text_address(frame->lp))
1254                        frame->lp = ftrace_graph_ret_addr
1255                                                (tsk, &graph, frame->lp, NULL);
1256
1257                frame->sp = ((unsigned long *)frame->sp) + 1;
1258
1259                return 0;
1260        } else {
1261                return -EPERM;
1262        }
1263#endif
1264}
1265
1266static void notrace
1267walk_stackframe(struct stackframe *frame,
1268                int (*fn_record)(struct stackframe *, void *),
1269                void *data)
1270{
1271        while (1) {
1272                int ret;
1273
1274                if (fn_record(frame, data))
1275                        break;
1276
1277                ret = unwind_frame_kernel(frame);
1278                if (ret < 0)
1279                        break;
1280        }
1281}
1282
1283/*
1284 * Gets called by walk_stackframe() for every stackframe. This will be called
1285 * whist unwinding the stackframe and is like a subroutine return so we use
1286 * the PC.
1287 */
1288static int callchain_trace(struct stackframe *fr, void *data)
1289{
1290        struct perf_callchain_entry_ctx *entry = data;
1291
1292        perf_callchain_store(entry, fr->lp);
1293        return 0;
1294}
1295
1296/*
1297 * Get the return address for a single stackframe and return a pointer to the
1298 * next frame tail.
1299 */
1300static unsigned long
1301user_backtrace(struct perf_callchain_entry_ctx *entry, unsigned long fp)
1302{
1303        struct frame_tail buftail;
1304        unsigned long lp = 0;
1305        unsigned long *user_frame_tail =
1306                (unsigned long *)(fp - (unsigned long)sizeof(buftail));
1307
1308        /* Check accessibility of one struct frame_tail beyond */
1309        if (!access_ok(user_frame_tail, sizeof(buftail)))
1310                return 0;
1311        if (__copy_from_user_inatomic
1312                (&buftail, user_frame_tail, sizeof(buftail)))
1313                return 0;
1314
1315        /*
1316         * Refer to unwind_frame_kernel() for more illurstration
1317         */
1318        lp = buftail.stack_lp;  /* ((unsigned long *)fp)[-1] */
1319        fp = buftail.stack_fp;  /* ((unsigned long *)fp)[FP_OFFSET] */
1320        perf_callchain_store(entry, lp);
1321        return fp;
1322}
1323
1324static unsigned long
1325user_backtrace_opt_size(struct perf_callchain_entry_ctx *entry,
1326                        unsigned long fp)
1327{
1328        struct frame_tail_opt_size buftail;
1329        unsigned long lp = 0;
1330
1331        unsigned long *user_frame_tail =
1332                (unsigned long *)(fp - (unsigned long)sizeof(buftail));
1333
1334        /* Check accessibility of one struct frame_tail beyond */
1335        if (!access_ok(user_frame_tail, sizeof(buftail)))
1336                return 0;
1337        if (__copy_from_user_inatomic
1338                (&buftail, user_frame_tail, sizeof(buftail)))
1339                return 0;
1340
1341        /*
1342         * Refer to unwind_frame_kernel() for more illurstration
1343         */
1344        lp = buftail.stack_lp;  /* ((unsigned long *)fp)[-1] */
1345        fp = buftail.stack_fp;  /* ((unsigned long *)fp)[FP_OFFSET] */
1346
1347        perf_callchain_store(entry, lp);
1348        return fp;
1349}
1350
1351/*
1352 * This will be called when the target is in user mode
1353 * This function will only be called when we use
1354 * "PERF_SAMPLE_CALLCHAIN" in
1355 * kernel/events/core.c:perf_prepare_sample()
1356 *
1357 * How to trigger perf_callchain_[user/kernel] :
1358 * $ perf record -e cpu-clock --call-graph fp ./program
1359 * $ perf report --call-graph
1360 */
1361unsigned long leaf_fp;
1362void
1363perf_callchain_user(struct perf_callchain_entry_ctx *entry,
1364                    struct pt_regs *regs)
1365{
1366        unsigned long fp = 0;
1367        unsigned long gp = 0;
1368        unsigned long lp = 0;
1369        unsigned long sp = 0;
1370        unsigned long *user_frame_tail;
1371
1372        leaf_fp = 0;
1373
1374        if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1375                /* We don't support guest os callchain now */
1376                return;
1377        }
1378
1379        perf_callchain_store(entry, regs->ipc);
1380        fp = regs->fp;
1381        gp = regs->gp;
1382        lp = regs->lp;
1383        sp = regs->sp;
1384        if (entry->nr < PERF_MAX_STACK_DEPTH &&
1385            (unsigned long)fp && !((unsigned long)fp & 0x7) && fp > sp) {
1386                user_frame_tail =
1387                        (unsigned long *)(fp - (unsigned long)sizeof(fp));
1388
1389                if (!access_ok(user_frame_tail, sizeof(fp)))
1390                        return;
1391
1392                if (__copy_from_user_inatomic
1393                        (&leaf_fp, user_frame_tail, sizeof(fp)))
1394                        return;
1395
1396                if (leaf_fp == lp) {
1397                        /*
1398                         * Maybe this is non leaf function
1399                         * with optimize for size,
1400                         * or maybe this is the function
1401                         * with optimize for size
1402                         */
1403                        struct frame_tail buftail;
1404
1405                        user_frame_tail =
1406                                (unsigned long *)(fp -
1407                                        (unsigned long)sizeof(buftail));
1408
1409                        if (!access_ok(user_frame_tail, sizeof(buftail)))
1410                                return;
1411
1412                        if (__copy_from_user_inatomic
1413                                (&buftail, user_frame_tail, sizeof(buftail)))
1414                                return;
1415
1416                        if (buftail.stack_fp == gp) {
1417                                /* non leaf function with optimize
1418                                 * for size condition
1419                                 */
1420                                struct frame_tail_opt_size buftail_opt_size;
1421
1422                                user_frame_tail =
1423                                        (unsigned long *)(fp - (unsigned long)
1424                                                sizeof(buftail_opt_size));
1425
1426                                if (!access_ok(user_frame_tail,
1427                                               sizeof(buftail_opt_size)))
1428                                        return;
1429
1430                                if (__copy_from_user_inatomic
1431                                   (&buftail_opt_size, user_frame_tail,
1432                                   sizeof(buftail_opt_size)))
1433                                        return;
1434
1435                                perf_callchain_store(entry, lp);
1436                                fp = buftail_opt_size.stack_fp;
1437
1438                                while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
1439                                       (unsigned long)fp &&
1440                                                !((unsigned long)fp & 0x7) &&
1441                                                fp > sp) {
1442                                        sp = fp;
1443                                        fp = user_backtrace_opt_size(entry, fp);
1444                                }
1445
1446                        } else {
1447                                /* this is the function
1448                                 * without optimize for size
1449                                 */
1450                                fp = buftail.stack_fp;
1451                                perf_callchain_store(entry, lp);
1452                                while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
1453                                       (unsigned long)fp &&
1454                                                !((unsigned long)fp & 0x7) &&
1455                                                fp > sp) {
1456                                        sp = fp;
1457                                        fp = user_backtrace(entry, fp);
1458                                }
1459                        }
1460                } else {
1461                        /* this is leaf function */
1462                        fp = leaf_fp;
1463                        perf_callchain_store(entry, lp);
1464
1465                        /* previous function callcahin  */
1466                        while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
1467                               (unsigned long)fp &&
1468                                   !((unsigned long)fp & 0x7) && fp > sp) {
1469                                sp = fp;
1470                                fp = user_backtrace(entry, fp);
1471                        }
1472                }
1473                return;
1474        }
1475}
1476
1477/* This will be called when the target is in kernel mode */
1478void
1479perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
1480                      struct pt_regs *regs)
1481{
1482        struct stackframe fr;
1483
1484        if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1485                /* We don't support guest os callchain now */
1486                return;
1487        }
1488        fr.fp = regs->fp;
1489        fr.lp = regs->lp;
1490        fr.sp = regs->sp;
1491        walk_stackframe(&fr, callchain_trace, entry);
1492}
1493
1494unsigned long perf_instruction_pointer(struct pt_regs *regs)
1495{
1496        /* However, NDS32 does not support virtualization */
1497        if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
1498                return perf_guest_cbs->get_guest_ip();
1499
1500        return instruction_pointer(regs);
1501}
1502
1503unsigned long perf_misc_flags(struct pt_regs *regs)
1504{
1505        int misc = 0;
1506
1507        /* However, NDS32 does not support virtualization */
1508        if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
1509                if (perf_guest_cbs->is_user_mode())
1510                        misc |= PERF_RECORD_MISC_GUEST_USER;
1511                else
1512                        misc |= PERF_RECORD_MISC_GUEST_KERNEL;
1513        } else {
1514                if (user_mode(regs))
1515                        misc |= PERF_RECORD_MISC_USER;
1516                else
1517                        misc |= PERF_RECORD_MISC_KERNEL;
1518        }
1519
1520        return misc;
1521}
1522