1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17#include <linux/cpufreq.h>
18#include <linux/delay.h>
19#include <linux/jiffies.h>
20#include <linux/kthread.h>
21#include <linux/oprofile.h>
22#include <linux/percpu.h>
23#include <linux/smp.h>
24#include <linux/spinlock.h>
25#include <linux/timer.h>
26#include <asm/cell-pmu.h>
27#include <asm/cputable.h>
28#include <asm/firmware.h>
29#include <asm/io.h>
30#include <asm/oprofile_impl.h>
31#include <asm/processor.h>
32#include <asm/prom.h>
33#include <asm/ptrace.h>
34#include <asm/reg.h>
35#include <asm/rtas.h>
36#include <asm/cell-regs.h>
37
38#include "../platforms/cell/interrupt.h"
39#include "cell/pr_util.h"
40
41#define PPU_PROFILING 0
42#define SPU_PROFILING_CYCLES 1
43#define SPU_PROFILING_EVENTS 2
44
45#define SPU_EVENT_NUM_START 4100
46#define SPU_EVENT_NUM_STOP 4399
47#define SPU_PROFILE_EVENT_ADDR 4363
48#define SPU_PROFILE_EVENT_ADDR_MASK_A 0x146
49#define SPU_PROFILE_EVENT_ADDR_MASK_B 0x186
50
51#define NUM_SPUS_PER_NODE 8
52#define SPU_CYCLES_EVENT_NUM 2
53
54#define PPU_CYCLES_EVENT_NUM 1
55#define PPU_CYCLES_GRP_NUM 1
56
57
58#define CBE_COUNT_ALL_CYCLES 0x42800000
59
60#define NUM_THREADS 2
61
62
63#define NUM_DEBUG_BUS_WORDS 4
64#define NUM_INPUT_BUS_WORDS 2
65
66#define MAX_SPU_COUNT 0xFFFFFF
67
68
69
70
71
72#define NUM_INTERVAL_CYC 0xFFFFFFFF - 10
73
74
75
76
77
78
79static unsigned int spu_cycle_reset;
80static unsigned int profiling_mode;
81static int spu_evnt_phys_spu_indx;
82
83struct pmc_cntrl_data {
84 unsigned long vcntr;
85 unsigned long evnts;
86 unsigned long masks;
87 unsigned long enabled;
88};
89
90
91
92
93struct pm_signal {
94 u16 cpu;
95 u16 sub_unit;
96 short int signal_group;
97 u8 bus_word;
98
99
100 u8 bit;
101};
102
103
104
105
106enum {
107 SUBFUNC_RESET = 1,
108 SUBFUNC_ACTIVATE = 2,
109 SUBFUNC_DEACTIVATE = 3,
110
111 PASSTHRU_IGNORE = 0,
112 PASSTHRU_ENABLE = 1,
113 PASSTHRU_DISABLE = 2,
114};
115
116struct pm_cntrl {
117 u16 enable;
118 u16 stop_at_max;
119 u16 trace_mode;
120 u16 freeze;
121 u16 count_mode;
122 u16 spu_addr_trace;
123 u8 trace_buf_ovflw;
124};
125
126static struct {
127 u32 group_control;
128 u32 debug_bus_control;
129 struct pm_cntrl pm_cntrl;
130 u32 pm07_cntrl[NR_PHYS_CTRS];
131} pm_regs;
132
133#define GET_SUB_UNIT(x) ((x & 0x0000f000) >> 12)
134#define GET_BUS_WORD(x) ((x & 0x000000f0) >> 4)
135#define GET_BUS_TYPE(x) ((x & 0x00000300) >> 8)
136#define GET_POLARITY(x) ((x & 0x00000002) >> 1)
137#define GET_COUNT_CYCLES(x) (x & 0x00000001)
138#define GET_INPUT_CONTROL(x) ((x & 0x00000004) >> 2)
139
140static DEFINE_PER_CPU(unsigned long[NR_PHYS_CTRS], pmc_values);
141static unsigned long spu_pm_cnt[MAX_NUMNODES * NUM_SPUS_PER_NODE];
142static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS];
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167static u32 hdw_thread;
168
169static u32 virt_cntr_inter_mask;
170static struct timer_list timer_virt_cntr;
171static struct timer_list timer_spu_event_swap;
172
173
174
175
176
177
178static struct pm_signal pm_signal[NR_PHYS_CTRS];
179static int pm_rtas_token;
180static int spu_rtas_token;
181
182static u32 reset_value[NR_PHYS_CTRS];
183static int num_counters;
184static int oprofile_running;
185static DEFINE_SPINLOCK(cntr_lock);
186
187static u32 ctr_enabled;
188
189static unsigned char input_bus[NUM_INPUT_BUS_WORDS];
190
191
192
193
194static int
195rtas_ibm_cbe_perftools(int subfunc, int passthru,
196 void *address, unsigned long length)
197{
198 u64 paddr = __pa(address);
199
200 return rtas_call(pm_rtas_token, 5, 1, NULL, subfunc,
201 passthru, paddr >> 32, paddr & 0xffffffff, length);
202}
203
204static void pm_rtas_reset_signals(u32 node)
205{
206 int ret;
207 struct pm_signal pm_signal_local;
208
209
210
211
212
213
214
215
216
217
218
219 pm_signal_local.cpu = node;
220 pm_signal_local.signal_group = 21;
221 pm_signal_local.bus_word = 1;
222 pm_signal_local.sub_unit = 0;
223 pm_signal_local.bit = 0;
224
225 ret = rtas_ibm_cbe_perftools(SUBFUNC_RESET, PASSTHRU_DISABLE,
226 &pm_signal_local,
227 sizeof(struct pm_signal));
228
229 if (unlikely(ret))
230
231
232
233
234
235 printk(KERN_WARNING "%s: rtas returned: %d\n",
236 __func__, ret);
237}
238
239static int pm_rtas_activate_signals(u32 node, u32 count)
240{
241 int ret;
242 int i, j;
243 struct pm_signal pm_signal_local[NR_PHYS_CTRS];
244
245
246
247
248
249
250
251
252
253 i = 0;
254 for (j = 0; j < count; j++) {
255 if (pm_signal[j].signal_group != PPU_CYCLES_GRP_NUM) {
256
257
258 pm_signal_local[i].cpu = node;
259 pm_signal_local[i].signal_group
260 = pm_signal[j].signal_group;
261 pm_signal_local[i].bus_word = pm_signal[j].bus_word;
262 pm_signal_local[i].sub_unit = pm_signal[j].sub_unit;
263 pm_signal_local[i].bit = pm_signal[j].bit;
264 i++;
265 }
266 }
267
268 if (i != 0) {
269 ret = rtas_ibm_cbe_perftools(SUBFUNC_ACTIVATE, PASSTHRU_ENABLE,
270 pm_signal_local,
271 i * sizeof(struct pm_signal));
272
273 if (unlikely(ret)) {
274 printk(KERN_WARNING "%s: rtas returned: %d\n",
275 __func__, ret);
276 return -EIO;
277 }
278 }
279
280 return 0;
281}
282
283
284
285
286static void set_pm_event(u32 ctr, int event, u32 unit_mask)
287{
288 struct pm_signal *p;
289 u32 signal_bit;
290 u32 bus_word, bus_type, count_cycles, polarity, input_control;
291 int j, i;
292
293 if (event == PPU_CYCLES_EVENT_NUM) {
294
295 pm_regs.pm07_cntrl[ctr] = CBE_COUNT_ALL_CYCLES;
296 p = &(pm_signal[ctr]);
297 p->signal_group = PPU_CYCLES_GRP_NUM;
298 p->bus_word = 1;
299 p->sub_unit = 0;
300 p->bit = 0;
301 goto out;
302 } else {
303 pm_regs.pm07_cntrl[ctr] = 0;
304 }
305
306 bus_word = GET_BUS_WORD(unit_mask);
307 bus_type = GET_BUS_TYPE(unit_mask);
308 count_cycles = GET_COUNT_CYCLES(unit_mask);
309 polarity = GET_POLARITY(unit_mask);
310 input_control = GET_INPUT_CONTROL(unit_mask);
311 signal_bit = (event % 100);
312
313 p = &(pm_signal[ctr]);
314
315 p->signal_group = event / 100;
316 p->bus_word = bus_word;
317 p->sub_unit = GET_SUB_UNIT(unit_mask);
318
319 pm_regs.pm07_cntrl[ctr] = 0;
320 pm_regs.pm07_cntrl[ctr] |= PM07_CTR_COUNT_CYCLES(count_cycles);
321 pm_regs.pm07_cntrl[ctr] |= PM07_CTR_POLARITY(polarity);
322 pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_CONTROL(input_control);
323
324
325
326
327
328
329
330
331
332
333 if (input_control == 0) {
334 if (signal_bit > 31) {
335 signal_bit -= 32;
336 if (bus_word == 0x3)
337 bus_word = 0x2;
338 else if (bus_word == 0xc)
339 bus_word = 0x8;
340 }
341
342 if ((bus_type == 0) && p->signal_group >= 60)
343 bus_type = 2;
344 if ((bus_type == 1) && p->signal_group >= 50)
345 bus_type = 0;
346
347 pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_MUX(signal_bit);
348 } else {
349 pm_regs.pm07_cntrl[ctr] = 0;
350 p->bit = signal_bit;
351 }
352
353 for (i = 0; i < NUM_DEBUG_BUS_WORDS; i++) {
354 if (bus_word & (1 << i)) {
355 pm_regs.debug_bus_control |=
356 (bus_type << (30 - (2 * i)));
357
358 for (j = 0; j < NUM_INPUT_BUS_WORDS; j++) {
359 if (input_bus[j] == 0xff) {
360 input_bus[j] = i;
361 pm_regs.group_control |=
362 (i << (30 - (2 * j)));
363
364 break;
365 }
366 }
367 }
368 }
369out:
370 ;
371}
372
373static void write_pm_cntrl(int cpu)
374{
375
376
377
378
379
380 u32 val = 0;
381 if (pm_regs.pm_cntrl.enable == 1)
382 val |= CBE_PM_ENABLE_PERF_MON;
383
384 if (pm_regs.pm_cntrl.stop_at_max == 1)
385 val |= CBE_PM_STOP_AT_MAX;
386
387 if (pm_regs.pm_cntrl.trace_mode != 0)
388 val |= CBE_PM_TRACE_MODE_SET(pm_regs.pm_cntrl.trace_mode);
389
390 if (pm_regs.pm_cntrl.trace_buf_ovflw == 1)
391 val |= CBE_PM_TRACE_BUF_OVFLW(pm_regs.pm_cntrl.trace_buf_ovflw);
392 if (pm_regs.pm_cntrl.freeze == 1)
393 val |= CBE_PM_FREEZE_ALL_CTRS;
394
395 val |= CBE_PM_SPU_ADDR_TRACE_SET(pm_regs.pm_cntrl.spu_addr_trace);
396
397
398
399
400
401 val |= CBE_PM_COUNT_MODE_SET(pm_regs.pm_cntrl.count_mode);
402 cbe_write_pm(cpu, pm_control, val);
403}
404
405static inline void
406set_count_mode(u32 kernel, u32 user)
407{
408
409
410
411
412
413 if (kernel) {
414 if (user)
415 pm_regs.pm_cntrl.count_mode = CBE_COUNT_ALL_MODES;
416 else
417 pm_regs.pm_cntrl.count_mode =
418 CBE_COUNT_SUPERVISOR_MODE;
419 } else {
420 if (user)
421 pm_regs.pm_cntrl.count_mode = CBE_COUNT_PROBLEM_MODE;
422 else
423 pm_regs.pm_cntrl.count_mode =
424 CBE_COUNT_HYPERVISOR_MODE;
425 }
426}
427
428static inline void enable_ctr(u32 cpu, u32 ctr, u32 *pm07_cntrl)
429{
430
431 pm07_cntrl[ctr] |= CBE_PM_CTR_ENABLE;
432 cbe_write_pm07_control(cpu, ctr, pm07_cntrl[ctr]);
433}
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454static void cell_virtual_cntr(struct timer_list *unused)
455{
456 int i, prev_hdw_thread, next_hdw_thread;
457 u32 cpu;
458 unsigned long flags;
459
460
461
462
463
464
465 spin_lock_irqsave(&cntr_lock, flags);
466
467 prev_hdw_thread = hdw_thread;
468
469
470 hdw_thread = 1 ^ hdw_thread;
471 next_hdw_thread = hdw_thread;
472
473 pm_regs.group_control = 0;
474 pm_regs.debug_bus_control = 0;
475
476 for (i = 0; i < NUM_INPUT_BUS_WORDS; i++)
477 input_bus[i] = 0xff;
478
479
480
481
482
483 for (i = 0; i < num_counters; i++)
484 set_pm_event(i,
485 pmc_cntrl[next_hdw_thread][i].evnts,
486 pmc_cntrl[next_hdw_thread][i].masks);
487
488
489
490
491
492 for_each_online_cpu(cpu) {
493 if (cbe_get_hw_thread_id(cpu))
494 continue;
495
496
497
498
499
500 cbe_disable_pm(cpu);
501 cbe_disable_pm_interrupts(cpu);
502 for (i = 0; i < num_counters; i++) {
503 per_cpu(pmc_values, cpu + prev_hdw_thread)[i]
504 = cbe_read_ctr(cpu, i);
505
506 if (per_cpu(pmc_values, cpu + next_hdw_thread)[i]
507 == 0xFFFFFFFF)
508
509
510
511
512
513
514
515
516
517
518 cbe_write_ctr(cpu, i, 0xFFFFFFF0);
519 else
520 cbe_write_ctr(cpu, i,
521 per_cpu(pmc_values,
522 cpu +
523 next_hdw_thread)[i]);
524 }
525
526
527
528
529
530
531 for (i = 0; i < num_counters; i++) {
532 if (pmc_cntrl[next_hdw_thread][i].enabled) {
533
534
535
536
537
538 enable_ctr(cpu, i,
539 pm_regs.pm07_cntrl);
540 } else {
541 cbe_write_pm07_control(cpu, i, 0);
542 }
543 }
544
545
546 cbe_enable_pm_interrupts(cpu, next_hdw_thread,
547 virt_cntr_inter_mask);
548 cbe_enable_pm(cpu);
549 }
550
551 spin_unlock_irqrestore(&cntr_lock, flags);
552
553 mod_timer(&timer_virt_cntr, jiffies + HZ / 10);
554}
555
556static void start_virt_cntrs(void)
557{
558 timer_setup(&timer_virt_cntr, cell_virtual_cntr, 0);
559 timer_virt_cntr.expires = jiffies + HZ / 10;
560 add_timer(&timer_virt_cntr);
561}
562
563static int cell_reg_setup_spu_cycles(struct op_counter_config *ctr,
564 struct op_system_config *sys, int num_ctrs)
565{
566 spu_cycle_reset = ctr[0].count;
567
568
569
570
571
572 spu_rtas_token = rtas_token("ibm,cbe-spu-perftools");
573
574 if (unlikely(spu_rtas_token == RTAS_UNKNOWN_SERVICE)) {
575 printk(KERN_ERR
576 "%s: rtas token ibm,cbe-spu-perftools unknown\n",
577 __func__);
578 return -EIO;
579 }
580 return 0;
581}
582
583
584
585
586
587
588
589
590static void spu_evnt_swap(struct timer_list *unused)
591{
592 int node;
593 int cur_phys_spu, nxt_phys_spu, cur_spu_evnt_phys_spu_indx;
594 unsigned long flags;
595 int cpu;
596 int ret;
597 u32 interrupt_mask;
598
599
600
601 interrupt_mask = CBE_PM_CTR_OVERFLOW_INTR(0);
602
603 hdw_thread = 0;
604
605
606
607
608 spin_lock_irqsave(&cntr_lock, flags);
609
610 cur_spu_evnt_phys_spu_indx = spu_evnt_phys_spu_indx;
611
612 if (++(spu_evnt_phys_spu_indx) == NUM_SPUS_PER_NODE)
613 spu_evnt_phys_spu_indx = 0;
614
615 pm_signal[0].sub_unit = spu_evnt_phys_spu_indx;
616 pm_signal[1].sub_unit = spu_evnt_phys_spu_indx;
617 pm_signal[2].sub_unit = spu_evnt_phys_spu_indx;
618
619
620 for_each_online_cpu(cpu) {
621 if (cbe_get_hw_thread_id(cpu))
622 continue;
623
624 node = cbe_cpu_to_node(cpu);
625 cur_phys_spu = (node * NUM_SPUS_PER_NODE)
626 + cur_spu_evnt_phys_spu_indx;
627 nxt_phys_spu = (node * NUM_SPUS_PER_NODE)
628 + spu_evnt_phys_spu_indx;
629
630
631
632
633
634 cbe_disable_pm(cpu);
635 cbe_disable_pm_interrupts(cpu);
636
637 spu_pm_cnt[cur_phys_spu]
638 = cbe_read_ctr(cpu, 0);
639
640
641
642
643
644 if (spu_pm_cnt[nxt_phys_spu] >= 0xFFFFFFFF)
645 cbe_write_ctr(cpu, 0, 0xFFFFFFF0);
646 else
647 cbe_write_ctr(cpu, 0, spu_pm_cnt[nxt_phys_spu]);
648
649 pm_rtas_reset_signals(cbe_cpu_to_node(cpu));
650
651
652
653
654
655 ret = pm_rtas_activate_signals(cbe_cpu_to_node(cpu), 3);
656 if (ret)
657 printk(KERN_ERR "%s: pm_rtas_activate_signals failed, "
658 "SPU event swap\n", __func__);
659
660
661
662 cbe_write_pm(cpu, trace_address, 0);
663
664 enable_ctr(cpu, 0, pm_regs.pm07_cntrl);
665
666
667 cbe_enable_pm_interrupts(cpu, hdw_thread,
668 interrupt_mask);
669 cbe_enable_pm(cpu);
670 }
671
672 spin_unlock_irqrestore(&cntr_lock, flags);
673
674
675 mod_timer(&timer_spu_event_swap, jiffies + HZ / 25);
676}
677
678static void start_spu_event_swap(void)
679{
680 timer_setup(&timer_spu_event_swap, spu_evnt_swap, 0);
681 timer_spu_event_swap.expires = jiffies + HZ / 25;
682 add_timer(&timer_spu_event_swap);
683}
684
685static int cell_reg_setup_spu_events(struct op_counter_config *ctr,
686 struct op_system_config *sys, int num_ctrs)
687{
688 int i;
689
690
691
692 spu_evnt_phys_spu_indx = 0;
693
694
695
696
697
698
699 pm_rtas_token = rtas_token("ibm,cbe-perftools");
700
701 if (unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) {
702 printk(KERN_ERR
703 "%s: rtas token ibm,cbe-perftools unknown\n",
704 __func__);
705 return -EIO;
706 }
707
708
709
710
711
712 pm_regs.pm_cntrl.trace_buf_ovflw = 1;
713
714
715
716
717
718 pm_regs.pm_cntrl.trace_mode = 2;
719
720 pm_regs.pm_cntrl.spu_addr_trace = 0x1;
721
722
723
724
725
726 pm_signal[1].signal_group = SPU_PROFILE_EVENT_ADDR / 100;
727 pm_signal[1].bus_word = GET_BUS_WORD(SPU_PROFILE_EVENT_ADDR_MASK_A);
728 pm_signal[1].bit = SPU_PROFILE_EVENT_ADDR % 100;
729 pm_signal[1].sub_unit = spu_evnt_phys_spu_indx;
730
731 pm_signal[2].signal_group = SPU_PROFILE_EVENT_ADDR / 100;
732 pm_signal[2].bus_word = GET_BUS_WORD(SPU_PROFILE_EVENT_ADDR_MASK_B);
733 pm_signal[2].bit = SPU_PROFILE_EVENT_ADDR % 100;
734 pm_signal[2].sub_unit = spu_evnt_phys_spu_indx;
735
736
737
738
739 num_counters = 1;
740 set_pm_event(0, ctr[0].event, ctr[0].unit_mask);
741
742 reset_value[0] = 0xFFFFFFFF - ctr[0].count;
743
744
745 ctr_enabled |= 1;
746
747
748 for (i=0; i < MAX_NUMNODES * NUM_SPUS_PER_NODE; i++)
749 spu_pm_cnt[i] = reset_value[0];
750
751 return 0;
752}
753
754static int cell_reg_setup_ppu(struct op_counter_config *ctr,
755 struct op_system_config *sys, int num_ctrs)
756{
757
758 int i, j, cpu;
759
760 num_counters = num_ctrs;
761
762 if (unlikely(num_ctrs > NR_PHYS_CTRS)) {
763 printk(KERN_ERR
764 "%s: Oprofile, number of specified events " \
765 "exceeds number of physical counters\n",
766 __func__);
767 return -EIO;
768 }
769
770 set_count_mode(sys->enable_kernel, sys->enable_user);
771
772
773 for (i = 0; i < num_ctrs; ++i) {
774
775 pmc_cntrl[0][i].evnts = ctr[i].event;
776 pmc_cntrl[0][i].masks = ctr[i].unit_mask;
777 pmc_cntrl[0][i].enabled = ctr[i].enabled;
778 pmc_cntrl[0][i].vcntr = i;
779
780 for_each_possible_cpu(j)
781 per_cpu(pmc_values, j)[i] = 0;
782 }
783
784
785
786
787
788 for (i = 0; i < num_ctrs; ++i) {
789 if ((ctr[i].event >= 2100) && (ctr[i].event <= 2111))
790 pmc_cntrl[1][i].evnts = ctr[i].event + 19;
791 else if (ctr[i].event == 2203)
792 pmc_cntrl[1][i].evnts = ctr[i].event;
793 else if ((ctr[i].event >= 2200) && (ctr[i].event <= 2215))
794 pmc_cntrl[1][i].evnts = ctr[i].event + 16;
795 else
796 pmc_cntrl[1][i].evnts = ctr[i].event;
797
798 pmc_cntrl[1][i].masks = ctr[i].unit_mask;
799 pmc_cntrl[1][i].enabled = ctr[i].enabled;
800 pmc_cntrl[1][i].vcntr = i;
801 }
802
803 for (i = 0; i < NUM_INPUT_BUS_WORDS; i++)
804 input_bus[i] = 0xff;
805
806
807
808
809
810
811
812
813 for (i = 0; i < num_counters; ++i) {
814
815 if (pmc_cntrl[0][i].enabled) {
816
817 reset_value[i] = 0xFFFFFFFF - ctr[i].count;
818 set_pm_event(i,
819 pmc_cntrl[0][i].evnts,
820 pmc_cntrl[0][i].masks);
821
822
823 ctr_enabled |= (1 << i);
824 }
825 }
826
827
828 for_each_online_cpu(cpu)
829 for (i = 0; i < num_counters; ++i) {
830 per_cpu(pmc_values, cpu)[i] = reset_value[i];
831 }
832
833 return 0;
834}
835
836
837
838static int cell_reg_setup(struct op_counter_config *ctr,
839 struct op_system_config *sys, int num_ctrs)
840{
841 int ret=0;
842 spu_cycle_reset = 0;
843
844
845
846
847 pm_regs.group_control = 0;
848 pm_regs.debug_bus_control = 0;
849 pm_regs.pm_cntrl.stop_at_max = 1;
850 pm_regs.pm_cntrl.trace_mode = 0;
851 pm_regs.pm_cntrl.freeze = 1;
852 pm_regs.pm_cntrl.trace_buf_ovflw = 0;
853 pm_regs.pm_cntrl.spu_addr_trace = 0;
854
855
856
857
858
859
860
861 pm_rtas_token = rtas_token("ibm,cbe-perftools");
862
863 if (unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) {
864 printk(KERN_ERR
865 "%s: rtas token ibm,cbe-perftools unknown\n",
866 __func__);
867 return -EIO;
868 }
869
870 if (ctr[0].event == SPU_CYCLES_EVENT_NUM) {
871 profiling_mode = SPU_PROFILING_CYCLES;
872 ret = cell_reg_setup_spu_cycles(ctr, sys, num_ctrs);
873 } else if ((ctr[0].event >= SPU_EVENT_NUM_START) &&
874 (ctr[0].event <= SPU_EVENT_NUM_STOP)) {
875 profiling_mode = SPU_PROFILING_EVENTS;
876 spu_cycle_reset = ctr[0].count;
877
878
879
880
881
882
883
884 cell_reg_setup_spu_events(ctr, sys, num_ctrs);
885 } else {
886 profiling_mode = PPU_PROFILING;
887 ret = cell_reg_setup_ppu(ctr, sys, num_ctrs);
888 }
889
890 return ret;
891}
892
893
894
895
896static int cell_cpu_setup(struct op_counter_config *cntr)
897{
898 u32 cpu = smp_processor_id();
899 u32 num_enabled = 0;
900 int i;
901 int ret;
902
903
904
905
906
907 if (profiling_mode == SPU_PROFILING_CYCLES)
908 return 0;
909
910
911
912
913 if (cbe_get_hw_thread_id(cpu))
914 return 0;
915
916
917 cbe_disable_pm(cpu);
918 cbe_disable_pm_interrupts(cpu);
919
920 cbe_write_pm(cpu, pm_start_stop, 0);
921 cbe_write_pm(cpu, group_control, pm_regs.group_control);
922 cbe_write_pm(cpu, debug_bus_control, pm_regs.debug_bus_control);
923 write_pm_cntrl(cpu);
924
925 for (i = 0; i < num_counters; ++i) {
926 if (ctr_enabled & (1 << i)) {
927 pm_signal[num_enabled].cpu = cbe_cpu_to_node(cpu);
928 num_enabled++;
929 }
930 }
931
932
933
934
935
936 if (profiling_mode == SPU_PROFILING_EVENTS) {
937
938
939
940 ret = pm_rtas_activate_signals(cbe_cpu_to_node(cpu),
941 num_enabled+2);
942
943
944
945 cbe_write_pm(cpu, pm_interval, NUM_INTERVAL_CYC);
946 return ret;
947 } else
948 return pm_rtas_activate_signals(cbe_cpu_to_node(cpu),
949 num_enabled);
950}
951
952#define ENTRIES 303
953#define MAXLFSR 0xFFFFFF
954
955
956static int initial_lfsr[] = {
957 8221349, 12579195, 5379618, 10097839, 7512963, 7519310, 3955098, 10753424,
958 15507573, 7458917, 285419, 2641121, 9780088, 3915503, 6668768, 1548716,
959 4885000, 8774424, 9650099, 2044357, 2304411, 9326253, 10332526, 4421547,
960 3440748, 10179459, 13332843, 10375561, 1313462, 8375100, 5198480, 6071392,
961 9341783, 1526887, 3985002, 1439429, 13923762, 7010104, 11969769, 4547026,
962 2040072, 4025602, 3437678, 7939992, 11444177, 4496094, 9803157, 10745556,
963 3671780, 4257846, 5662259, 13196905, 3237343, 12077182, 16222879, 7587769,
964 14706824, 2184640, 12591135, 10420257, 7406075, 3648978, 11042541, 15906893,
965 11914928, 4732944, 10695697, 12928164, 11980531, 4430912, 11939291, 2917017,
966 6119256, 4172004, 9373765, 8410071, 14788383, 5047459, 5474428, 1737756,
967 15967514, 13351758, 6691285, 8034329, 2856544, 14394753, 11310160, 12149558,
968 7487528, 7542781, 15668898, 12525138, 12790975, 3707933, 9106617, 1965401,
969 16219109, 12801644, 2443203, 4909502, 8762329, 3120803, 6360315, 9309720,
970 15164599, 10844842, 4456529, 6667610, 14924259, 884312, 6234963, 3326042,
971 15973422, 13919464, 5272099, 6414643, 3909029, 2764324, 5237926, 4774955,
972 10445906, 4955302, 5203726, 10798229, 11443419, 2303395, 333836, 9646934,
973 3464726, 4159182, 568492, 995747, 10318756, 13299332, 4836017, 8237783,
974 3878992, 2581665, 11394667, 5672745, 14412947, 3159169, 9094251, 16467278,
975 8671392, 15230076, 4843545, 7009238, 15504095, 1494895, 9627886, 14485051,
976 8304291, 252817, 12421642, 16085736, 4774072, 2456177, 4160695, 15409741,
977 4902868, 5793091, 13162925, 16039714, 782255, 11347835, 14884586, 366972,
978 16308990, 11913488, 13390465, 2958444, 10340278, 1177858, 1319431, 10426302,
979 2868597, 126119, 5784857, 5245324, 10903900, 16436004, 3389013, 1742384,
980 14674502, 10279218, 8536112, 10364279, 6877778, 14051163, 1025130, 6072469,
981 1988305, 8354440, 8216060, 16342977, 13112639, 3976679, 5913576, 8816697,
982 6879995, 14043764, 3339515, 9364420, 15808858, 12261651, 2141560, 5636398,
983 10345425, 10414756, 781725, 6155650, 4746914, 5078683, 7469001, 6799140,
984 10156444, 9667150, 10116470, 4133858, 2121972, 1124204, 1003577, 1611214,
985 14304602, 16221850, 13878465, 13577744, 3629235, 8772583, 10881308, 2410386,
986 7300044, 5378855, 9301235, 12755149, 4977682, 8083074, 10327581, 6395087,
987 9155434, 15501696, 7514362, 14520507, 15808945, 3244584, 4741962, 9658130,
988 14336147, 8654727, 7969093, 15759799, 14029445, 5038459, 9894848, 8659300,
989 13699287, 8834306, 10712885, 14753895, 10410465, 3373251, 309501, 9561475,
990 5526688, 14647426, 14209836, 5339224, 207299, 14069911, 8722990, 2290950,
991 3258216, 12505185, 6007317, 9218111, 14661019, 10537428, 11731949, 9027003,
992 6641507, 9490160, 200241, 9720425, 16277895, 10816638, 1554761, 10431375,
993 7467528, 6790302, 3429078, 14633753, 14428997, 11463204, 3576212, 2003426,
994 6123687, 820520, 9992513, 15784513, 5778891, 6428165, 8388607
995};
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050#define V2_16 (0x1 << 16)
1051#define V2_19 (0x1 << 19)
1052#define V2_22 (0x1 << 22)
1053
1054static int calculate_lfsr(int n)
1055{
1056
1057
1058
1059
1060 int index;
1061
1062 if ((n >> 16) == 0)
1063 index = 0;
1064 else if (((n - V2_16) >> 19) == 0)
1065 index = ((n - V2_16) >> 12) + 1;
1066 else if (((n - V2_16 - V2_19) >> 22) == 0)
1067 index = ((n - V2_16 - V2_19) >> 15 ) + 1 + 128;
1068 else if (((n - V2_16 - V2_19 - V2_22) >> 24) == 0)
1069 index = ((n - V2_16 - V2_19 - V2_22) >> 18 ) + 1 + 256;
1070 else
1071 index = ENTRIES-1;
1072
1073
1074 if ((index >= ENTRIES) || (index < 0))
1075 index = ENTRIES-1;
1076
1077 return initial_lfsr[index];
1078}
1079
1080static int pm_rtas_activate_spu_profiling(u32 node)
1081{
1082 int ret, i;
1083 struct pm_signal pm_signal_local[NUM_SPUS_PER_NODE];
1084
1085
1086
1087
1088
1089 for (i = 0; i < ARRAY_SIZE(pm_signal_local); i++) {
1090 pm_signal_local[i].cpu = node;
1091 pm_signal_local[i].signal_group = 41;
1092
1093 pm_signal_local[i].bus_word = 1 << i / 2;
1094
1095 pm_signal_local[i].sub_unit = i;
1096 pm_signal_local[i].bit = 63;
1097 }
1098
1099 ret = rtas_ibm_cbe_perftools(SUBFUNC_ACTIVATE,
1100 PASSTHRU_ENABLE, pm_signal_local,
1101 (ARRAY_SIZE(pm_signal_local)
1102 * sizeof(struct pm_signal)));
1103
1104 if (unlikely(ret)) {
1105 printk(KERN_WARNING "%s: rtas returned: %d\n",
1106 __func__, ret);
1107 return -EIO;
1108 }
1109
1110 return 0;
1111}
1112
1113#ifdef CONFIG_CPU_FREQ
1114static int
1115oprof_cpufreq_notify(struct notifier_block *nb, unsigned long val, void *data)
1116{
1117 int ret = 0;
1118 struct cpufreq_freqs *frq = data;
1119 if ((val == CPUFREQ_PRECHANGE && frq->old < frq->new) ||
1120 (val == CPUFREQ_POSTCHANGE && frq->old > frq->new))
1121 set_spu_profiling_frequency(frq->new, spu_cycle_reset);
1122 return ret;
1123}
1124
1125static struct notifier_block cpu_freq_notifier_block = {
1126 .notifier_call = oprof_cpufreq_notify
1127};
1128#endif
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138static void cell_global_stop_spu_cycles(void)
1139{
1140 int subfunc, rtn_value;
1141 unsigned int lfsr_value;
1142 int cpu;
1143
1144 oprofile_running = 0;
1145 smp_wmb();
1146
1147#ifdef CONFIG_CPU_FREQ
1148 cpufreq_unregister_notifier(&cpu_freq_notifier_block,
1149 CPUFREQ_TRANSITION_NOTIFIER);
1150#endif
1151
1152 for_each_online_cpu(cpu) {
1153 if (cbe_get_hw_thread_id(cpu))
1154 continue;
1155
1156 subfunc = 3;
1157
1158
1159
1160 lfsr_value = 0x8f100000;
1161
1162 rtn_value = rtas_call(spu_rtas_token, 3, 1, NULL,
1163 subfunc, cbe_cpu_to_node(cpu),
1164 lfsr_value);
1165
1166 if (unlikely(rtn_value != 0)) {
1167 printk(KERN_ERR
1168 "%s: rtas call ibm,cbe-spu-perftools " \
1169 "failed, return = %d\n",
1170 __func__, rtn_value);
1171 }
1172
1173
1174 pm_rtas_reset_signals(cbe_cpu_to_node(cpu));
1175 }
1176
1177 stop_spu_profiling_cycles();
1178}
1179
1180static void cell_global_stop_spu_events(void)
1181{
1182 int cpu;
1183 oprofile_running = 0;
1184
1185 stop_spu_profiling_events();
1186 smp_wmb();
1187
1188 for_each_online_cpu(cpu) {
1189 if (cbe_get_hw_thread_id(cpu))
1190 continue;
1191
1192 cbe_sync_irq(cbe_cpu_to_node(cpu));
1193
1194 cbe_disable_pm(cpu);
1195 cbe_write_pm07_control(cpu, 0, 0);
1196
1197
1198 pm_rtas_reset_signals(cbe_cpu_to_node(cpu));
1199
1200
1201 cbe_disable_pm_interrupts(cpu);
1202 }
1203 del_timer_sync(&timer_spu_event_swap);
1204}
1205
1206static void cell_global_stop_ppu(void)
1207{
1208 int cpu;
1209
1210
1211
1212
1213
1214
1215 del_timer_sync(&timer_virt_cntr);
1216 oprofile_running = 0;
1217 smp_wmb();
1218
1219 for_each_online_cpu(cpu) {
1220 if (cbe_get_hw_thread_id(cpu))
1221 continue;
1222
1223 cbe_sync_irq(cbe_cpu_to_node(cpu));
1224
1225 cbe_disable_pm(cpu);
1226
1227
1228 pm_rtas_reset_signals(cbe_cpu_to_node(cpu));
1229
1230
1231 cbe_disable_pm_interrupts(cpu);
1232 }
1233}
1234
1235static void cell_global_stop(void)
1236{
1237 if (profiling_mode == PPU_PROFILING)
1238 cell_global_stop_ppu();
1239 else if (profiling_mode == SPU_PROFILING_EVENTS)
1240 cell_global_stop_spu_events();
1241 else
1242 cell_global_stop_spu_cycles();
1243}
1244
1245static int cell_global_start_spu_cycles(struct op_counter_config *ctr)
1246{
1247 int subfunc;
1248 unsigned int lfsr_value;
1249 int cpu;
1250 int ret;
1251 int rtas_error;
1252 unsigned int cpu_khzfreq = 0;
1253
1254
1255
1256
1257
1258
1259#ifdef CONFIG_CPU_FREQ
1260 ret = cpufreq_register_notifier(&cpu_freq_notifier_block,
1261 CPUFREQ_TRANSITION_NOTIFIER);
1262 if (ret < 0)
1263
1264 printk(KERN_ERR "CPU freq change registration failed: %d\n",
1265 ret);
1266
1267 else
1268 cpu_khzfreq = cpufreq_quick_get(smp_processor_id());
1269#endif
1270
1271 set_spu_profiling_frequency(cpu_khzfreq, spu_cycle_reset);
1272
1273 for_each_online_cpu(cpu) {
1274 if (cbe_get_hw_thread_id(cpu))
1275 continue;
1276
1277
1278
1279
1280
1281
1282 cbe_write_pm(cpu, pm_control, 0);
1283
1284 if (spu_cycle_reset > MAX_SPU_COUNT)
1285
1286 lfsr_value = calculate_lfsr(MAX_SPU_COUNT-1);
1287 else
1288 lfsr_value = calculate_lfsr(spu_cycle_reset);
1289
1290
1291 if (lfsr_value == 0)
1292 lfsr_value = calculate_lfsr(1);
1293
1294 lfsr_value = lfsr_value << 8;
1295
1296
1297
1298
1299 ret = pm_rtas_activate_spu_profiling(cbe_cpu_to_node(cpu));
1300
1301 if (unlikely(ret)) {
1302 rtas_error = ret;
1303 goto out;
1304 }
1305
1306
1307 subfunc = 2;
1308
1309
1310 ret = rtas_call(spu_rtas_token, 3, 1, NULL, subfunc,
1311 cbe_cpu_to_node(cpu), lfsr_value);
1312
1313 if (unlikely(ret != 0)) {
1314 printk(KERN_ERR
1315 "%s: rtas call ibm,cbe-spu-perftools failed, " \
1316 "return = %d\n", __func__, ret);
1317 rtas_error = -EIO;
1318 goto out;
1319 }
1320 }
1321
1322 rtas_error = start_spu_profiling_cycles(spu_cycle_reset);
1323 if (rtas_error)
1324 goto out_stop;
1325
1326 oprofile_running = 1;
1327 return 0;
1328
1329out_stop:
1330 cell_global_stop_spu_cycles();
1331out:
1332 return rtas_error;
1333}
1334
1335static int cell_global_start_spu_events(struct op_counter_config *ctr)
1336{
1337 int cpu;
1338 u32 interrupt_mask = 0;
1339 int rtn = 0;
1340
1341 hdw_thread = 0;
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357 for_each_online_cpu(cpu) {
1358 if (cbe_get_hw_thread_id(cpu))
1359 continue;
1360
1361
1362
1363
1364
1365
1366
1367
1368 if (ctr_enabled & 1) {
1369 cbe_write_ctr(cpu, 0, reset_value[0]);
1370 enable_ctr(cpu, 0, pm_regs.pm07_cntrl);
1371 interrupt_mask |=
1372 CBE_PM_CTR_OVERFLOW_INTR(0);
1373 } else {
1374
1375 cbe_write_pm07_control(cpu, 0, 0);
1376 }
1377
1378 cbe_get_and_clear_pm_interrupts(cpu);
1379 cbe_enable_pm_interrupts(cpu, hdw_thread, interrupt_mask);
1380 cbe_enable_pm(cpu);
1381
1382
1383 cbe_write_pm(cpu, trace_address, 0);
1384 }
1385
1386
1387
1388
1389
1390 start_spu_event_swap();
1391 start_spu_profiling_events();
1392 oprofile_running = 1;
1393 smp_wmb();
1394
1395 return rtn;
1396}
1397
1398static int cell_global_start_ppu(struct op_counter_config *ctr)
1399{
1400 u32 cpu, i;
1401 u32 interrupt_mask = 0;
1402
1403
1404
1405
1406
1407 for_each_online_cpu(cpu) {
1408 if (cbe_get_hw_thread_id(cpu))
1409 continue;
1410
1411 interrupt_mask = 0;
1412
1413 for (i = 0; i < num_counters; ++i) {
1414 if (ctr_enabled & (1 << i)) {
1415 cbe_write_ctr(cpu, i, reset_value[i]);
1416 enable_ctr(cpu, i, pm_regs.pm07_cntrl);
1417 interrupt_mask |= CBE_PM_CTR_OVERFLOW_INTR(i);
1418 } else {
1419
1420 cbe_write_pm07_control(cpu, i, 0);
1421 }
1422 }
1423
1424 cbe_get_and_clear_pm_interrupts(cpu);
1425 cbe_enable_pm_interrupts(cpu, hdw_thread, interrupt_mask);
1426 cbe_enable_pm(cpu);
1427 }
1428
1429 virt_cntr_inter_mask = interrupt_mask;
1430 oprofile_running = 1;
1431 smp_wmb();
1432
1433
1434
1435
1436
1437
1438
1439 start_virt_cntrs();
1440
1441 return 0;
1442}
1443
1444static int cell_global_start(struct op_counter_config *ctr)
1445{
1446 if (profiling_mode == SPU_PROFILING_CYCLES)
1447 return cell_global_start_spu_cycles(ctr);
1448 else if (profiling_mode == SPU_PROFILING_EVENTS)
1449 return cell_global_start_spu_events(ctr);
1450 else
1451 return cell_global_start_ppu(ctr);
1452}
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482static void cell_handle_interrupt_spu(struct pt_regs *regs,
1483 struct op_counter_config *ctr)
1484{
1485 u32 cpu, cpu_tmp;
1486 u64 trace_entry;
1487 u32 interrupt_mask;
1488 u64 trace_buffer[2];
1489 u64 last_trace_buffer;
1490 u32 sample;
1491 u32 trace_addr;
1492 unsigned long sample_array_lock_flags;
1493 int spu_num;
1494 unsigned long flags;
1495
1496
1497
1498
1499 cpu = smp_processor_id();
1500 spin_lock_irqsave(&cntr_lock, flags);
1501
1502 cpu_tmp = cpu;
1503 cbe_disable_pm(cpu);
1504
1505 interrupt_mask = cbe_get_and_clear_pm_interrupts(cpu);
1506
1507 sample = 0xABCDEF;
1508 trace_entry = 0xfedcba;
1509 last_trace_buffer = 0xdeadbeaf;
1510
1511 if ((oprofile_running == 1) && (interrupt_mask != 0)) {
1512
1513 cbe_write_pm(cpu, pm_interval, 0);
1514
1515
1516 if ((interrupt_mask & CBE_PM_CTR_OVERFLOW_INTR(0))
1517 && ctr[0].enabled)
1518
1519
1520
1521
1522 cbe_write_ctr(cpu, 0, reset_value[0]);
1523
1524 trace_addr = cbe_read_pm(cpu, trace_address);
1525
1526 while (!(trace_addr & CBE_PM_TRACE_BUF_EMPTY)) {
1527
1528
1529
1530
1531
1532 cbe_read_trace_buffer(cpu, trace_buffer);
1533 trace_addr = cbe_read_pm(cpu, trace_address);
1534 }
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551 trace_entry = trace_buffer[0]
1552 & 0x00000000FFFF0000;
1553
1554
1555
1556
1557 sample = trace_entry >> 14;
1558 last_trace_buffer = trace_buffer[0];
1559
1560 spu_num = spu_evnt_phys_spu_indx
1561 + (cbe_cpu_to_node(cpu) * NUM_SPUS_PER_NODE);
1562
1563
1564
1565
1566 spin_lock_irqsave(&oprof_spu_smpl_arry_lck,
1567 sample_array_lock_flags);
1568 spu_sync_buffer(spu_num, &sample, 1);
1569 spin_unlock_irqrestore(&oprof_spu_smpl_arry_lck,
1570 sample_array_lock_flags);
1571
1572 smp_wmb();
1573
1574
1575
1576
1577
1578 cbe_write_pm(cpu, pm_interval, NUM_INTERVAL_CYC);
1579 cbe_enable_pm_interrupts(cpu, hdw_thread,
1580 virt_cntr_inter_mask);
1581
1582
1583 cbe_write_pm(cpu, trace_address, 0);
1584 cbe_write_pm(cpu, pm_interval, NUM_INTERVAL_CYC);
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594 write_pm_cntrl(cpu);
1595 cbe_enable_pm(cpu);
1596 }
1597 spin_unlock_irqrestore(&cntr_lock, flags);
1598}
1599
1600static void cell_handle_interrupt_ppu(struct pt_regs *regs,
1601 struct op_counter_config *ctr)
1602{
1603 u32 cpu;
1604 u64 pc;
1605 int is_kernel;
1606 unsigned long flags = 0;
1607 u32 interrupt_mask;
1608 int i;
1609
1610 cpu = smp_processor_id();
1611
1612
1613
1614
1615
1616
1617 spin_lock_irqsave(&cntr_lock, flags);
1618
1619
1620
1621
1622
1623
1624
1625 cbe_disable_pm(cpu);
1626
1627 interrupt_mask = cbe_get_and_clear_pm_interrupts(cpu);
1628
1629
1630
1631
1632
1633
1634
1635
1636 if ((oprofile_running == 1) && (interrupt_mask != 0)) {
1637 pc = regs->nip;
1638 is_kernel = is_kernel_addr(pc);
1639
1640 for (i = 0; i < num_counters; ++i) {
1641 if ((interrupt_mask & CBE_PM_CTR_OVERFLOW_INTR(i))
1642 && ctr[i].enabled) {
1643 oprofile_add_ext_sample(pc, regs, i, is_kernel);
1644 cbe_write_ctr(cpu, i, reset_value[i]);
1645 }
1646 }
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656 cbe_enable_pm_interrupts(cpu, hdw_thread,
1657 virt_cntr_inter_mask);
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668 cbe_enable_pm(cpu);
1669 }
1670 spin_unlock_irqrestore(&cntr_lock, flags);
1671}
1672
1673static void cell_handle_interrupt(struct pt_regs *regs,
1674 struct op_counter_config *ctr)
1675{
1676 if (profiling_mode == PPU_PROFILING)
1677 cell_handle_interrupt_ppu(regs, ctr);
1678 else
1679 cell_handle_interrupt_spu(regs, ctr);
1680}
1681
1682
1683
1684
1685
1686
1687static int cell_sync_start(void)
1688{
1689 if ((profiling_mode == SPU_PROFILING_CYCLES) ||
1690 (profiling_mode == SPU_PROFILING_EVENTS))
1691 return spu_sync_start();
1692 else
1693 return DO_GENERIC_SYNC;
1694}
1695
1696static int cell_sync_stop(void)
1697{
1698 if ((profiling_mode == SPU_PROFILING_CYCLES) ||
1699 (profiling_mode == SPU_PROFILING_EVENTS))
1700 return spu_sync_stop();
1701 else
1702 return 1;
1703}
1704
1705struct op_powerpc_model op_model_cell = {
1706 .reg_setup = cell_reg_setup,
1707 .cpu_setup = cell_cpu_setup,
1708 .global_start = cell_global_start,
1709 .global_stop = cell_global_stop,
1710 .sync_start = cell_sync_start,
1711 .sync_stop = cell_sync_stop,
1712 .handle_interrupt = cell_handle_interrupt,
1713};
1714