linux/arch/powerpc/oprofile/cell/spu_task_sync.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * Cell Broadband Engine OProfile Support
   4 *
   5 * (C) Copyright IBM Corporation 2006
   6 *
   7 * Author: Maynard Johnson <maynardj@us.ibm.com>
   8 */
   9
  10/* The purpose of this file is to handle SPU event task switching
  11 * and to record SPU context information into the OProfile
  12 * event buffer.
  13 *
  14 * Additionally, the spu_sync_buffer function is provided as a helper
  15 * for recoding actual SPU program counter samples to the event buffer.
  16 */
  17#include <linux/dcookies.h>
  18#include <linux/kref.h>
  19#include <linux/mm.h>
  20#include <linux/fs.h>
  21#include <linux/file.h>
  22#include <linux/module.h>
  23#include <linux/notifier.h>
  24#include <linux/numa.h>
  25#include <linux/oprofile.h>
  26#include <linux/slab.h>
  27#include <linux/spinlock.h>
  28#include "pr_util.h"
  29
  30#define RELEASE_ALL 9999
  31
  32static DEFINE_SPINLOCK(buffer_lock);
  33static DEFINE_SPINLOCK(cache_lock);
  34static int num_spu_nodes;
  35static int spu_prof_num_nodes;
  36
  37struct spu_buffer spu_buff[MAX_NUMNODES * SPUS_PER_NODE];
  38struct delayed_work spu_work;
  39static unsigned max_spu_buff;
  40
  41static void spu_buff_add(unsigned long int value, int spu)
  42{
  43        /* spu buff is a circular buffer.  Add entries to the
  44         * head.  Head is the index to store the next value.
  45         * The buffer is full when there is one available entry
  46         * in the queue, i.e. head and tail can't be equal.
  47         * That way we can tell the difference between the
  48         * buffer being full versus empty.
  49         *
  50         *  ASSUMPTION: the buffer_lock is held when this function
  51         *             is called to lock the buffer, head and tail.
  52         */
  53        int full = 1;
  54
  55        if (spu_buff[spu].head >= spu_buff[spu].tail) {
  56                if ((spu_buff[spu].head - spu_buff[spu].tail)
  57                    <  (max_spu_buff - 1))
  58                        full = 0;
  59
  60        } else if (spu_buff[spu].tail > spu_buff[spu].head) {
  61                if ((spu_buff[spu].tail - spu_buff[spu].head)
  62                    > 1)
  63                        full = 0;
  64        }
  65
  66        if (!full) {
  67                spu_buff[spu].buff[spu_buff[spu].head] = value;
  68                spu_buff[spu].head++;
  69
  70                if (spu_buff[spu].head >= max_spu_buff)
  71                        spu_buff[spu].head = 0;
  72        } else {
  73                /* From the user's perspective make the SPU buffer
  74                 * size management/overflow look like we are using
  75                 * per cpu buffers.  The user uses the same
  76                 * per cpu parameter to adjust the SPU buffer size.
  77                 * Increment the sample_lost_overflow to inform
  78                 * the user the buffer size needs to be increased.
  79                 */
  80                oprofile_cpu_buffer_inc_smpl_lost();
  81        }
  82}
  83
  84/* This function copies the per SPU buffers to the
  85 * OProfile kernel buffer.
  86 */
  87static void sync_spu_buff(void)
  88{
  89        int spu;
  90        unsigned long flags;
  91        int curr_head;
  92
  93        for (spu = 0; spu < num_spu_nodes; spu++) {
  94                /* In case there was an issue and the buffer didn't
  95                 * get created skip it.
  96                 */
  97                if (spu_buff[spu].buff == NULL)
  98                        continue;
  99
 100                /* Hold the lock to make sure the head/tail
 101                 * doesn't change while spu_buff_add() is
 102                 * deciding if the buffer is full or not.
 103                 * Being a little paranoid.
 104                 */
 105                spin_lock_irqsave(&buffer_lock, flags);
 106                curr_head = spu_buff[spu].head;
 107                spin_unlock_irqrestore(&buffer_lock, flags);
 108
 109                /* Transfer the current contents to the kernel buffer.
 110                 * data can still be added to the head of the buffer.
 111                 */
 112                oprofile_put_buff(spu_buff[spu].buff,
 113                                  spu_buff[spu].tail,
 114                                  curr_head, max_spu_buff);
 115
 116                spin_lock_irqsave(&buffer_lock, flags);
 117                spu_buff[spu].tail = curr_head;
 118                spin_unlock_irqrestore(&buffer_lock, flags);
 119        }
 120
 121}
 122
 123static void wq_sync_spu_buff(struct work_struct *work)
 124{
 125        /* move data from spu buffers to kernel buffer */
 126        sync_spu_buff();
 127
 128        /* only reschedule if profiling is not done */
 129        if (spu_prof_running)
 130                schedule_delayed_work(&spu_work, DEFAULT_TIMER_EXPIRE);
 131}
 132
 133/* Container for caching information about an active SPU task. */
 134struct cached_info {
 135        struct vma_to_fileoffset_map *map;
 136        struct spu *the_spu;    /* needed to access pointer to local_store */
 137        struct kref cache_ref;
 138};
 139
 140static struct cached_info *spu_info[MAX_NUMNODES * 8];
 141
 142static void destroy_cached_info(struct kref *kref)
 143{
 144        struct cached_info *info;
 145
 146        info = container_of(kref, struct cached_info, cache_ref);
 147        vma_map_free(info->map);
 148        kfree(info);
 149        module_put(THIS_MODULE);
 150}
 151
 152/* Return the cached_info for the passed SPU number.
 153 * ATTENTION:  Callers are responsible for obtaining the
 154 *             cache_lock if needed prior to invoking this function.
 155 */
 156static struct cached_info *get_cached_info(struct spu *the_spu, int spu_num)
 157{
 158        struct kref *ref;
 159        struct cached_info *ret_info;
 160
 161        if (spu_num >= num_spu_nodes) {
 162                printk(KERN_ERR "SPU_PROF: "
 163                       "%s, line %d: Invalid index %d into spu info cache\n",
 164                       __func__, __LINE__, spu_num);
 165                ret_info = NULL;
 166                goto out;
 167        }
 168        if (!spu_info[spu_num] && the_spu) {
 169                ref = spu_get_profile_private_kref(the_spu->ctx);
 170                if (ref) {
 171                        spu_info[spu_num] = container_of(ref, struct cached_info, cache_ref);
 172                        kref_get(&spu_info[spu_num]->cache_ref);
 173                }
 174        }
 175
 176        ret_info = spu_info[spu_num];
 177 out:
 178        return ret_info;
 179}
 180
 181
 182/* Looks for cached info for the passed spu.  If not found, the
 183 * cached info is created for the passed spu.
 184 * Returns 0 for success; otherwise, -1 for error.
 185 */
 186static int
 187prepare_cached_spu_info(struct spu *spu, unsigned long objectId)
 188{
 189        unsigned long flags;
 190        struct vma_to_fileoffset_map *new_map;
 191        int retval = 0;
 192        struct cached_info *info;
 193
 194        /* We won't bother getting cache_lock here since
 195         * don't do anything with the cached_info that's returned.
 196         */
 197        info = get_cached_info(spu, spu->number);
 198
 199        if (info) {
 200                pr_debug("Found cached SPU info.\n");
 201                goto out;
 202        }
 203
 204        /* Create cached_info and set spu_info[spu->number] to point to it.
 205         * spu->number is a system-wide value, not a per-node value.
 206         */
 207        info = kzalloc(sizeof(*info), GFP_KERNEL);
 208        if (!info) {
 209                printk(KERN_ERR "SPU_PROF: "
 210                       "%s, line %d: create vma_map failed\n",
 211                       __func__, __LINE__);
 212                retval = -ENOMEM;
 213                goto err_alloc;
 214        }
 215        new_map = create_vma_map(spu, objectId);
 216        if (!new_map) {
 217                printk(KERN_ERR "SPU_PROF: "
 218                       "%s, line %d: create vma_map failed\n",
 219                       __func__, __LINE__);
 220                retval = -ENOMEM;
 221                goto err_alloc;
 222        }
 223
 224        pr_debug("Created vma_map\n");
 225        info->map = new_map;
 226        info->the_spu = spu;
 227        kref_init(&info->cache_ref);
 228        spin_lock_irqsave(&cache_lock, flags);
 229        spu_info[spu->number] = info;
 230        /* Increment count before passing off ref to SPUFS. */
 231        kref_get(&info->cache_ref);
 232
 233        /* We increment the module refcount here since SPUFS is
 234         * responsible for the final destruction of the cached_info,
 235         * and it must be able to access the destroy_cached_info()
 236         * function defined in the OProfile module.  We decrement
 237         * the module refcount in destroy_cached_info.
 238         */
 239        try_module_get(THIS_MODULE);
 240        spu_set_profile_private_kref(spu->ctx, &info->cache_ref,
 241                                destroy_cached_info);
 242        spin_unlock_irqrestore(&cache_lock, flags);
 243        goto out;
 244
 245err_alloc:
 246        kfree(info);
 247out:
 248        return retval;
 249}
 250
 251/*
 252 * NOTE:  The caller is responsible for locking the
 253 *        cache_lock prior to calling this function.
 254 */
 255static int release_cached_info(int spu_index)
 256{
 257        int index, end;
 258
 259        if (spu_index == RELEASE_ALL) {
 260                end = num_spu_nodes;
 261                index = 0;
 262        } else {
 263                if (spu_index >= num_spu_nodes) {
 264                        printk(KERN_ERR "SPU_PROF: "
 265                                "%s, line %d: "
 266                                "Invalid index %d into spu info cache\n",
 267                                __func__, __LINE__, spu_index);
 268                        goto out;
 269                }
 270                end = spu_index + 1;
 271                index = spu_index;
 272        }
 273        for (; index < end; index++) {
 274                if (spu_info[index]) {
 275                        kref_put(&spu_info[index]->cache_ref,
 276                                 destroy_cached_info);
 277                        spu_info[index] = NULL;
 278                }
 279        }
 280
 281out:
 282        return 0;
 283}
 284
 285/* The source code for fast_get_dcookie was "borrowed"
 286 * from drivers/oprofile/buffer_sync.c.
 287 */
 288
 289/* Optimisation. We can manage without taking the dcookie sem
 290 * because we cannot reach this code without at least one
 291 * dcookie user still being registered (namely, the reader
 292 * of the event buffer).
 293 */
 294static inline unsigned long fast_get_dcookie(const struct path *path)
 295{
 296        unsigned long cookie;
 297
 298        if (path->dentry->d_flags & DCACHE_COOKIE)
 299                return (unsigned long)path->dentry;
 300        get_dcookie(path, &cookie);
 301        return cookie;
 302}
 303
 304/* Look up the dcookie for the task's mm->exe_file,
 305 * which corresponds loosely to "application name". Also, determine
 306 * the offset for the SPU ELF object.  If computed offset is
 307 * non-zero, it implies an embedded SPU object; otherwise, it's a
 308 * separate SPU binary, in which case we retrieve it's dcookie.
 309 * For the embedded case, we must determine if SPU ELF is embedded
 310 * in the executable application or another file (i.e., shared lib).
 311 * If embedded in a shared lib, we must get the dcookie and return
 312 * that to the caller.
 313 */
 314static unsigned long
 315get_exec_dcookie_and_offset(struct spu *spu, unsigned int *offsetp,
 316                            unsigned long *spu_bin_dcookie,
 317                            unsigned long spu_ref)
 318{
 319        unsigned long app_cookie = 0;
 320        unsigned int my_offset = 0;
 321        struct vm_area_struct *vma;
 322        struct file *exe_file;
 323        struct mm_struct *mm = spu->mm;
 324
 325        if (!mm)
 326                goto out;
 327
 328        exe_file = get_mm_exe_file(mm);
 329        if (exe_file) {
 330                app_cookie = fast_get_dcookie(&exe_file->f_path);
 331                pr_debug("got dcookie for %pD\n", exe_file);
 332                fput(exe_file);
 333        }
 334
 335        down_read(&mm->mmap_sem);
 336        for (vma = mm->mmap; vma; vma = vma->vm_next) {
 337                if (vma->vm_start > spu_ref || vma->vm_end <= spu_ref)
 338                        continue;
 339                my_offset = spu_ref - vma->vm_start;
 340                if (!vma->vm_file)
 341                        goto fail_no_image_cookie;
 342
 343                pr_debug("Found spu ELF at %X(object-id:%lx) for file %pD\n",
 344                         my_offset, spu_ref, vma->vm_file);
 345                *offsetp = my_offset;
 346                break;
 347        }
 348
 349        *spu_bin_dcookie = fast_get_dcookie(&vma->vm_file->f_path);
 350        pr_debug("got dcookie for %pD\n", vma->vm_file);
 351
 352        up_read(&mm->mmap_sem);
 353
 354out:
 355        return app_cookie;
 356
 357fail_no_image_cookie:
 358        up_read(&mm->mmap_sem);
 359
 360        printk(KERN_ERR "SPU_PROF: "
 361                "%s, line %d: Cannot find dcookie for SPU binary\n",
 362                __func__, __LINE__);
 363        goto out;
 364}
 365
 366
 367
 368/* This function finds or creates cached context information for the
 369 * passed SPU and records SPU context information into the OProfile
 370 * event buffer.
 371 */
 372static int process_context_switch(struct spu *spu, unsigned long objectId)
 373{
 374        unsigned long flags;
 375        int retval;
 376        unsigned int offset = 0;
 377        unsigned long spu_cookie = 0, app_dcookie;
 378
 379        retval = prepare_cached_spu_info(spu, objectId);
 380        if (retval)
 381                goto out;
 382
 383        /* Get dcookie first because a mutex_lock is taken in that
 384         * code path, so interrupts must not be disabled.
 385         */
 386        app_dcookie = get_exec_dcookie_and_offset(spu, &offset, &spu_cookie, objectId);
 387        if (!app_dcookie || !spu_cookie) {
 388                retval  = -ENOENT;
 389                goto out;
 390        }
 391
 392        /* Record context info in event buffer */
 393        spin_lock_irqsave(&buffer_lock, flags);
 394        spu_buff_add(ESCAPE_CODE, spu->number);
 395        spu_buff_add(SPU_CTX_SWITCH_CODE, spu->number);
 396        spu_buff_add(spu->number, spu->number);
 397        spu_buff_add(spu->pid, spu->number);
 398        spu_buff_add(spu->tgid, spu->number);
 399        spu_buff_add(app_dcookie, spu->number);
 400        spu_buff_add(spu_cookie, spu->number);
 401        spu_buff_add(offset, spu->number);
 402
 403        /* Set flag to indicate SPU PC data can now be written out.  If
 404         * the SPU program counter data is seen before an SPU context
 405         * record is seen, the postprocessing will fail.
 406         */
 407        spu_buff[spu->number].ctx_sw_seen = 1;
 408
 409        spin_unlock_irqrestore(&buffer_lock, flags);
 410        smp_wmb();      /* insure spu event buffer updates are written */
 411                        /* don't want entries intermingled... */
 412out:
 413        return retval;
 414}
 415
 416/*
 417 * This function is invoked on either a bind_context or unbind_context.
 418 * If called for an unbind_context, the val arg is 0; otherwise,
 419 * it is the object-id value for the spu context.
 420 * The data arg is of type 'struct spu *'.
 421 */
 422static int spu_active_notify(struct notifier_block *self, unsigned long val,
 423                                void *data)
 424{
 425        int retval;
 426        unsigned long flags;
 427        struct spu *the_spu = data;
 428
 429        pr_debug("SPU event notification arrived\n");
 430        if (!val) {
 431                spin_lock_irqsave(&cache_lock, flags);
 432                retval = release_cached_info(the_spu->number);
 433                spin_unlock_irqrestore(&cache_lock, flags);
 434        } else {
 435                retval = process_context_switch(the_spu, val);
 436        }
 437        return retval;
 438}
 439
 440static struct notifier_block spu_active = {
 441        .notifier_call = spu_active_notify,
 442};
 443
 444static int number_of_online_nodes(void)
 445{
 446        u32 cpu; u32 tmp;
 447        int nodes = 0;
 448        for_each_online_cpu(cpu) {
 449                tmp = cbe_cpu_to_node(cpu) + 1;
 450                if (tmp > nodes)
 451                        nodes++;
 452        }
 453        return nodes;
 454}
 455
 456static int oprofile_spu_buff_create(void)
 457{
 458        int spu;
 459
 460        max_spu_buff = oprofile_get_cpu_buffer_size();
 461
 462        for (spu = 0; spu < num_spu_nodes; spu++) {
 463                /* create circular buffers to store the data in.
 464                 * use locks to manage accessing the buffers
 465                 */
 466                spu_buff[spu].head = 0;
 467                spu_buff[spu].tail = 0;
 468
 469                /*
 470                 * Create a buffer for each SPU.  Can't reliably
 471                 * create a single buffer for all spus due to not
 472                 * enough contiguous kernel memory.
 473                 */
 474
 475                spu_buff[spu].buff = kzalloc((max_spu_buff
 476                                              * sizeof(unsigned long)),
 477                                             GFP_KERNEL);
 478
 479                if (!spu_buff[spu].buff) {
 480                        printk(KERN_ERR "SPU_PROF: "
 481                               "%s, line %d:  oprofile_spu_buff_create "
 482                       "failed to allocate spu buffer %d.\n",
 483                               __func__, __LINE__, spu);
 484
 485                        /* release the spu buffers that have been allocated */
 486                        while (spu >= 0) {
 487                                kfree(spu_buff[spu].buff);
 488                                spu_buff[spu].buff = 0;
 489                                spu--;
 490                        }
 491                        return -ENOMEM;
 492                }
 493        }
 494        return 0;
 495}
 496
 497/* The main purpose of this function is to synchronize
 498 * OProfile with SPUFS by registering to be notified of
 499 * SPU task switches.
 500 *
 501 * NOTE: When profiling SPUs, we must ensure that only
 502 * spu_sync_start is invoked and not the generic sync_start
 503 * in drivers/oprofile/oprof.c.  A return value of
 504 * SKIP_GENERIC_SYNC or SYNC_START_ERROR will
 505 * accomplish this.
 506 */
 507int spu_sync_start(void)
 508{
 509        int spu;
 510        int ret = SKIP_GENERIC_SYNC;
 511        int register_ret;
 512        unsigned long flags = 0;
 513
 514        spu_prof_num_nodes = number_of_online_nodes();
 515        num_spu_nodes = spu_prof_num_nodes * 8;
 516        INIT_DELAYED_WORK(&spu_work, wq_sync_spu_buff);
 517
 518        /* create buffer for storing the SPU data to put in
 519         * the kernel buffer.
 520         */
 521        ret = oprofile_spu_buff_create();
 522        if (ret)
 523                goto out;
 524
 525        spin_lock_irqsave(&buffer_lock, flags);
 526        for (spu = 0; spu < num_spu_nodes; spu++) {
 527                spu_buff_add(ESCAPE_CODE, spu);
 528                spu_buff_add(SPU_PROFILING_CODE, spu);
 529                spu_buff_add(num_spu_nodes, spu);
 530        }
 531        spin_unlock_irqrestore(&buffer_lock, flags);
 532
 533        for (spu = 0; spu < num_spu_nodes; spu++) {
 534                spu_buff[spu].ctx_sw_seen = 0;
 535                spu_buff[spu].last_guard_val = 0;
 536        }
 537
 538        /* Register for SPU events  */
 539        register_ret = spu_switch_event_register(&spu_active);
 540        if (register_ret) {
 541                ret = SYNC_START_ERROR;
 542                goto out;
 543        }
 544
 545        pr_debug("spu_sync_start -- running.\n");
 546out:
 547        return ret;
 548}
 549
 550/* Record SPU program counter samples to the oprofile event buffer. */
 551void spu_sync_buffer(int spu_num, unsigned int *samples,
 552                     int num_samples)
 553{
 554        unsigned long long file_offset;
 555        unsigned long flags;
 556        int i;
 557        struct vma_to_fileoffset_map *map;
 558        struct spu *the_spu;
 559        unsigned long long spu_num_ll = spu_num;
 560        unsigned long long spu_num_shifted = spu_num_ll << 32;
 561        struct cached_info *c_info;
 562
 563        /* We need to obtain the cache_lock here because it's
 564         * possible that after getting the cached_info, the SPU job
 565         * corresponding to this cached_info may end, thus resulting
 566         * in the destruction of the cached_info.
 567         */
 568        spin_lock_irqsave(&cache_lock, flags);
 569        c_info = get_cached_info(NULL, spu_num);
 570        if (!c_info) {
 571                /* This legitimately happens when the SPU task ends before all
 572                 * samples are recorded.
 573                 * No big deal -- so we just drop a few samples.
 574                 */
 575                pr_debug("SPU_PROF: No cached SPU contex "
 576                          "for SPU #%d. Dropping samples.\n", spu_num);
 577                goto out;
 578        }
 579
 580        map = c_info->map;
 581        the_spu = c_info->the_spu;
 582        spin_lock(&buffer_lock);
 583        for (i = 0; i < num_samples; i++) {
 584                unsigned int sample = *(samples+i);
 585                int grd_val = 0;
 586                file_offset = 0;
 587                if (sample == 0)
 588                        continue;
 589                file_offset = vma_map_lookup( map, sample, the_spu, &grd_val);
 590
 591                /* If overlays are used by this SPU application, the guard
 592                 * value is non-zero, indicating which overlay section is in
 593                 * use.  We need to discard samples taken during the time
 594                 * period which an overlay occurs (i.e., guard value changes).
 595                 */
 596                if (grd_val && grd_val != spu_buff[spu_num].last_guard_val) {
 597                        spu_buff[spu_num].last_guard_val = grd_val;
 598                        /* Drop the rest of the samples. */
 599                        break;
 600                }
 601
 602                /* We must ensure that the SPU context switch has been written
 603                 * out before samples for the SPU.  Otherwise, the SPU context
 604                 * information is not available and the postprocessing of the
 605                 * SPU PC will fail with no available anonymous map information.
 606                 */
 607                if (spu_buff[spu_num].ctx_sw_seen)
 608                        spu_buff_add((file_offset | spu_num_shifted),
 609                                         spu_num);
 610        }
 611        spin_unlock(&buffer_lock);
 612out:
 613        spin_unlock_irqrestore(&cache_lock, flags);
 614}
 615
 616
 617int spu_sync_stop(void)
 618{
 619        unsigned long flags = 0;
 620        int ret;
 621        int k;
 622
 623        ret = spu_switch_event_unregister(&spu_active);
 624
 625        if (ret)
 626                printk(KERN_ERR "SPU_PROF: "
 627                       "%s, line %d: spu_switch_event_unregister "      \
 628                       "returned %d\n",
 629                       __func__, __LINE__, ret);
 630
 631        /* flush any remaining data in the per SPU buffers */
 632        sync_spu_buff();
 633
 634        spin_lock_irqsave(&cache_lock, flags);
 635        ret = release_cached_info(RELEASE_ALL);
 636        spin_unlock_irqrestore(&cache_lock, flags);
 637
 638        /* remove scheduled work queue item rather then waiting
 639         * for every queued entry to execute.  Then flush pending
 640         * system wide buffer to event buffer.
 641         */
 642        cancel_delayed_work(&spu_work);
 643
 644        for (k = 0; k < num_spu_nodes; k++) {
 645                spu_buff[k].ctx_sw_seen = 0;
 646
 647                /*
 648                 * spu_sys_buff will be null if there was a problem
 649                 * allocating the buffer.  Only delete if it exists.
 650                 */
 651                kfree(spu_buff[k].buff);
 652                spu_buff[k].buff = 0;
 653        }
 654        pr_debug("spu_sync_stop -- done.\n");
 655        return ret;
 656}
 657
 658