linux/arch/x86/kernel/cpu/perf_event_intel_bts.c
<<
>>
Prefs
   1/*
   2 * BTS PMU driver for perf
   3 * Copyright (c) 2013-2014, Intel Corporation.
   4 *
   5 * This program is free software; you can redistribute it and/or modify it
   6 * under the terms and conditions of the GNU General Public License,
   7 * version 2, as published by the Free Software Foundation.
   8 *
   9 * This program is distributed in the hope it will be useful, but WITHOUT
  10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  12 * more details.
  13 */
  14
  15#undef DEBUG
  16
  17#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  18
  19#include <linux/bitops.h>
  20#include <linux/types.h>
  21#include <linux/slab.h>
  22#include <linux/debugfs.h>
  23#include <linux/device.h>
  24#include <linux/coredump.h>
  25
  26#include <asm-generic/sizes.h>
  27#include <asm/perf_event.h>
  28
  29#include "perf_event.h"
  30
  31struct bts_ctx {
  32        struct perf_output_handle       handle;
  33        struct debug_store              ds_back;
  34        int                             started;
  35};
  36
  37static DEFINE_PER_CPU(struct bts_ctx, bts_ctx);
  38
  39#define BTS_RECORD_SIZE         24
  40#define BTS_SAFETY_MARGIN       4080
  41
  42struct bts_phys {
  43        struct page     *page;
  44        unsigned long   size;
  45        unsigned long   offset;
  46        unsigned long   displacement;
  47};
  48
  49struct bts_buffer {
  50        size_t          real_size;      /* multiple of BTS_RECORD_SIZE */
  51        unsigned int    nr_pages;
  52        unsigned int    nr_bufs;
  53        unsigned int    cur_buf;
  54        bool            snapshot;
  55        local_t         data_size;
  56        local_t         lost;
  57        local_t         head;
  58        unsigned long   end;
  59        void            **data_pages;
  60        struct bts_phys buf[0];
  61};
  62
  63struct pmu bts_pmu;
  64
  65void intel_pmu_enable_bts(u64 config);
  66void intel_pmu_disable_bts(void);
  67
  68static size_t buf_size(struct page *page)
  69{
  70        return 1 << (PAGE_SHIFT + page_private(page));
  71}
  72
  73static void *
  74bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite)
  75{
  76        struct bts_buffer *buf;
  77        struct page *page;
  78        int node = (cpu == -1) ? cpu : cpu_to_node(cpu);
  79        unsigned long offset;
  80        size_t size = nr_pages << PAGE_SHIFT;
  81        int pg, nbuf, pad;
  82
  83        /* count all the high order buffers */
  84        for (pg = 0, nbuf = 0; pg < nr_pages;) {
  85                page = virt_to_page(pages[pg]);
  86                if (WARN_ON_ONCE(!PagePrivate(page) && nr_pages > 1))
  87                        return NULL;
  88                pg += 1 << page_private(page);
  89                nbuf++;
  90        }
  91
  92        /*
  93         * to avoid interrupts in overwrite mode, only allow one physical
  94         */
  95        if (overwrite && nbuf > 1)
  96                return NULL;
  97
  98        buf = kzalloc_node(offsetof(struct bts_buffer, buf[nbuf]), GFP_KERNEL, node);
  99        if (!buf)
 100                return NULL;
 101
 102        buf->nr_pages = nr_pages;
 103        buf->nr_bufs = nbuf;
 104        buf->snapshot = overwrite;
 105        buf->data_pages = pages;
 106        buf->real_size = size - size % BTS_RECORD_SIZE;
 107
 108        for (pg = 0, nbuf = 0, offset = 0, pad = 0; nbuf < buf->nr_bufs; nbuf++) {
 109                unsigned int __nr_pages;
 110
 111                page = virt_to_page(pages[pg]);
 112                __nr_pages = PagePrivate(page) ? 1 << page_private(page) : 1;
 113                buf->buf[nbuf].page = page;
 114                buf->buf[nbuf].offset = offset;
 115                buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0);
 116                buf->buf[nbuf].size = buf_size(page) - buf->buf[nbuf].displacement;
 117                pad = buf->buf[nbuf].size % BTS_RECORD_SIZE;
 118                buf->buf[nbuf].size -= pad;
 119
 120                pg += __nr_pages;
 121                offset += __nr_pages << PAGE_SHIFT;
 122        }
 123
 124        return buf;
 125}
 126
 127static void bts_buffer_free_aux(void *data)
 128{
 129        kfree(data);
 130}
 131
 132static unsigned long bts_buffer_offset(struct bts_buffer *buf, unsigned int idx)
 133{
 134        return buf->buf[idx].offset + buf->buf[idx].displacement;
 135}
 136
 137static void
 138bts_config_buffer(struct bts_buffer *buf)
 139{
 140        int cpu = raw_smp_processor_id();
 141        struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
 142        struct bts_phys *phys = &buf->buf[buf->cur_buf];
 143        unsigned long index, thresh = 0, end = phys->size;
 144        struct page *page = phys->page;
 145
 146        index = local_read(&buf->head);
 147
 148        if (!buf->snapshot) {
 149                if (buf->end < phys->offset + buf_size(page))
 150                        end = buf->end - phys->offset - phys->displacement;
 151
 152                index -= phys->offset + phys->displacement;
 153
 154                if (end - index > BTS_SAFETY_MARGIN)
 155                        thresh = end - BTS_SAFETY_MARGIN;
 156                else if (end - index > BTS_RECORD_SIZE)
 157                        thresh = end - BTS_RECORD_SIZE;
 158                else
 159                        thresh = end;
 160        }
 161
 162        ds->bts_buffer_base = (u64)(long)page_address(page) + phys->displacement;
 163        ds->bts_index = ds->bts_buffer_base + index;
 164        ds->bts_absolute_maximum = ds->bts_buffer_base + end;
 165        ds->bts_interrupt_threshold = !buf->snapshot
 166                ? ds->bts_buffer_base + thresh
 167                : ds->bts_absolute_maximum + BTS_RECORD_SIZE;
 168}
 169
 170static void bts_buffer_pad_out(struct bts_phys *phys, unsigned long head)
 171{
 172        unsigned long index = head - phys->offset;
 173
 174        memset(page_address(phys->page) + index, 0, phys->size - index);
 175}
 176
 177static bool bts_buffer_is_full(struct bts_buffer *buf, struct bts_ctx *bts)
 178{
 179        if (buf->snapshot)
 180                return false;
 181
 182        if (local_read(&buf->data_size) >= bts->handle.size ||
 183            bts->handle.size - local_read(&buf->data_size) < BTS_RECORD_SIZE)
 184                return true;
 185
 186        return false;
 187}
 188
 189static void bts_update(struct bts_ctx *bts)
 190{
 191        int cpu = raw_smp_processor_id();
 192        struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
 193        struct bts_buffer *buf = perf_get_aux(&bts->handle);
 194        unsigned long index = ds->bts_index - ds->bts_buffer_base, old, head;
 195
 196        if (!buf)
 197                return;
 198
 199        head = index + bts_buffer_offset(buf, buf->cur_buf);
 200        old = local_xchg(&buf->head, head);
 201
 202        if (!buf->snapshot) {
 203                if (old == head)
 204                        return;
 205
 206                if (ds->bts_index >= ds->bts_absolute_maximum)
 207                        local_inc(&buf->lost);
 208
 209                /*
 210                 * old and head are always in the same physical buffer, so we
 211                 * can subtract them to get the data size.
 212                 */
 213                local_add(head - old, &buf->data_size);
 214        } else {
 215                local_set(&buf->data_size, head);
 216        }
 217}
 218
 219static void __bts_event_start(struct perf_event *event)
 220{
 221        struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
 222        struct bts_buffer *buf = perf_get_aux(&bts->handle);
 223        u64 config = 0;
 224
 225        if (!buf || bts_buffer_is_full(buf, bts))
 226                return;
 227
 228        event->hw.state = 0;
 229
 230        if (!buf->snapshot)
 231                config |= ARCH_PERFMON_EVENTSEL_INT;
 232        if (!event->attr.exclude_kernel)
 233                config |= ARCH_PERFMON_EVENTSEL_OS;
 234        if (!event->attr.exclude_user)
 235                config |= ARCH_PERFMON_EVENTSEL_USR;
 236
 237        bts_config_buffer(buf);
 238
 239        /*
 240         * local barrier to make sure that ds configuration made it
 241         * before we enable BTS
 242         */
 243        wmb();
 244
 245        intel_pmu_enable_bts(config);
 246}
 247
 248static void bts_event_start(struct perf_event *event, int flags)
 249{
 250        struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
 251
 252        __bts_event_start(event);
 253
 254        /* PMI handler: this counter is running and likely generating PMIs */
 255        ACCESS_ONCE(bts->started) = 1;
 256}
 257
 258static void __bts_event_stop(struct perf_event *event)
 259{
 260        /*
 261         * No extra synchronization is mandated by the documentation to have
 262         * BTS data stores globally visible.
 263         */
 264        intel_pmu_disable_bts();
 265
 266        if (event->hw.state & PERF_HES_STOPPED)
 267                return;
 268
 269        ACCESS_ONCE(event->hw.state) |= PERF_HES_STOPPED;
 270}
 271
 272static void bts_event_stop(struct perf_event *event, int flags)
 273{
 274        struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
 275
 276        /* PMI handler: don't restart this counter */
 277        ACCESS_ONCE(bts->started) = 0;
 278
 279        __bts_event_stop(event);
 280
 281        if (flags & PERF_EF_UPDATE)
 282                bts_update(bts);
 283}
 284
 285void intel_bts_enable_local(void)
 286{
 287        struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
 288
 289        if (bts->handle.event && bts->started)
 290                __bts_event_start(bts->handle.event);
 291}
 292
 293void intel_bts_disable_local(void)
 294{
 295        struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
 296
 297        if (bts->handle.event)
 298                __bts_event_stop(bts->handle.event);
 299}
 300
 301static int
 302bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle)
 303{
 304        unsigned long head, space, next_space, pad, gap, skip, wakeup;
 305        unsigned int next_buf;
 306        struct bts_phys *phys, *next_phys;
 307        int ret;
 308
 309        if (buf->snapshot)
 310                return 0;
 311
 312        head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1);
 313        if (WARN_ON_ONCE(head != local_read(&buf->head)))
 314                return -EINVAL;
 315
 316        phys = &buf->buf[buf->cur_buf];
 317        space = phys->offset + phys->displacement + phys->size - head;
 318        pad = space;
 319        if (space > handle->size) {
 320                space = handle->size;
 321                space -= space % BTS_RECORD_SIZE;
 322        }
 323        if (space <= BTS_SAFETY_MARGIN) {
 324                /* See if next phys buffer has more space */
 325                next_buf = buf->cur_buf + 1;
 326                if (next_buf >= buf->nr_bufs)
 327                        next_buf = 0;
 328                next_phys = &buf->buf[next_buf];
 329                gap = buf_size(phys->page) - phys->displacement - phys->size +
 330                      next_phys->displacement;
 331                skip = pad + gap;
 332                if (handle->size >= skip) {
 333                        next_space = next_phys->size;
 334                        if (next_space + skip > handle->size) {
 335                                next_space = handle->size - skip;
 336                                next_space -= next_space % BTS_RECORD_SIZE;
 337                        }
 338                        if (next_space > space || !space) {
 339                                if (pad)
 340                                        bts_buffer_pad_out(phys, head);
 341                                ret = perf_aux_output_skip(handle, skip);
 342                                if (ret)
 343                                        return ret;
 344                                /* Advance to next phys buffer */
 345                                phys = next_phys;
 346                                space = next_space;
 347                                head = phys->offset + phys->displacement;
 348                                /*
 349                                 * After this, cur_buf and head won't match ds
 350                                 * anymore, so we must not be racing with
 351                                 * bts_update().
 352                                 */
 353                                buf->cur_buf = next_buf;
 354                                local_set(&buf->head, head);
 355                        }
 356                }
 357        }
 358
 359        /* Don't go far beyond wakeup watermark */
 360        wakeup = BTS_SAFETY_MARGIN + BTS_RECORD_SIZE + handle->wakeup -
 361                 handle->head;
 362        if (space > wakeup) {
 363                space = wakeup;
 364                space -= space % BTS_RECORD_SIZE;
 365        }
 366
 367        buf->end = head + space;
 368
 369        /*
 370         * If we have no space, the lost notification would have been sent when
 371         * we hit absolute_maximum - see bts_update()
 372         */
 373        if (!space)
 374                return -ENOSPC;
 375
 376        return 0;
 377}
 378
 379int intel_bts_interrupt(void)
 380{
 381        struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
 382        struct perf_event *event = bts->handle.event;
 383        struct bts_buffer *buf;
 384        s64 old_head;
 385        int err;
 386
 387        if (!event || !bts->started)
 388                return 0;
 389
 390        buf = perf_get_aux(&bts->handle);
 391        /*
 392         * Skip snapshot counters: they don't use the interrupt, but
 393         * there's no other way of telling, because the pointer will
 394         * keep moving
 395         */
 396        if (!buf || buf->snapshot)
 397                return 0;
 398
 399        old_head = local_read(&buf->head);
 400        bts_update(bts);
 401
 402        /* no new data */
 403        if (old_head == local_read(&buf->head))
 404                return 0;
 405
 406        perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
 407                            !!local_xchg(&buf->lost, 0));
 408
 409        buf = perf_aux_output_begin(&bts->handle, event);
 410        if (!buf)
 411                return 1;
 412
 413        err = bts_buffer_reset(buf, &bts->handle);
 414        if (err)
 415                perf_aux_output_end(&bts->handle, 0, false);
 416
 417        return 1;
 418}
 419
 420static void bts_event_del(struct perf_event *event, int mode)
 421{
 422        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 423        struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
 424        struct bts_buffer *buf = perf_get_aux(&bts->handle);
 425
 426        bts_event_stop(event, PERF_EF_UPDATE);
 427
 428        if (buf) {
 429                if (buf->snapshot)
 430                        bts->handle.head =
 431                                local_xchg(&buf->data_size,
 432                                           buf->nr_pages << PAGE_SHIFT);
 433                perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
 434                                    !!local_xchg(&buf->lost, 0));
 435        }
 436
 437        cpuc->ds->bts_index = bts->ds_back.bts_buffer_base;
 438        cpuc->ds->bts_buffer_base = bts->ds_back.bts_buffer_base;
 439        cpuc->ds->bts_absolute_maximum = bts->ds_back.bts_absolute_maximum;
 440        cpuc->ds->bts_interrupt_threshold = bts->ds_back.bts_interrupt_threshold;
 441}
 442
 443static int bts_event_add(struct perf_event *event, int mode)
 444{
 445        struct bts_buffer *buf;
 446        struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
 447        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 448        struct hw_perf_event *hwc = &event->hw;
 449        int ret = -EBUSY;
 450
 451        event->hw.state = PERF_HES_STOPPED;
 452
 453        if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
 454                return -EBUSY;
 455
 456        if (bts->handle.event)
 457                return -EBUSY;
 458
 459        buf = perf_aux_output_begin(&bts->handle, event);
 460        if (!buf)
 461                return -EINVAL;
 462
 463        ret = bts_buffer_reset(buf, &bts->handle);
 464        if (ret) {
 465                perf_aux_output_end(&bts->handle, 0, false);
 466                return ret;
 467        }
 468
 469        bts->ds_back.bts_buffer_base = cpuc->ds->bts_buffer_base;
 470        bts->ds_back.bts_absolute_maximum = cpuc->ds->bts_absolute_maximum;
 471        bts->ds_back.bts_interrupt_threshold = cpuc->ds->bts_interrupt_threshold;
 472
 473        if (mode & PERF_EF_START) {
 474                bts_event_start(event, 0);
 475                if (hwc->state & PERF_HES_STOPPED) {
 476                        bts_event_del(event, 0);
 477                        return -EBUSY;
 478                }
 479        }
 480
 481        return 0;
 482}
 483
 484static void bts_event_destroy(struct perf_event *event)
 485{
 486        x86_del_exclusive(x86_lbr_exclusive_bts);
 487}
 488
 489static int bts_event_init(struct perf_event *event)
 490{
 491        if (event->attr.type != bts_pmu.type)
 492                return -ENOENT;
 493
 494        if (x86_add_exclusive(x86_lbr_exclusive_bts))
 495                return -EBUSY;
 496
 497        event->destroy = bts_event_destroy;
 498
 499        return 0;
 500}
 501
 502static void bts_event_read(struct perf_event *event)
 503{
 504}
 505
 506static __init int bts_init(void)
 507{
 508        if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts)
 509                return -ENODEV;
 510
 511        bts_pmu.capabilities    = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE;
 512        bts_pmu.task_ctx_nr     = perf_sw_context;
 513        bts_pmu.event_init      = bts_event_init;
 514        bts_pmu.add             = bts_event_add;
 515        bts_pmu.del             = bts_event_del;
 516        bts_pmu.start           = bts_event_start;
 517        bts_pmu.stop            = bts_event_stop;
 518        bts_pmu.read            = bts_event_read;
 519        bts_pmu.setup_aux       = bts_buffer_setup_aux;
 520        bts_pmu.free_aux        = bts_buffer_free_aux;
 521
 522        return perf_pmu_register(&bts_pmu, "intel_bts", -1);
 523}
 524
 525module_init(bts_init);
 526