1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17#include <linux/cpufreq.h>
18#include <linux/delay.h>
19#include <linux/jiffies.h>
20#include <linux/kthread.h>
21#include <linux/oprofile.h>
22#include <linux/percpu.h>
23#include <linux/smp.h>
24#include <linux/spinlock.h>
25#include <linux/timer.h>
26#include <asm/cell-pmu.h>
27#include <asm/cputable.h>
28#include <asm/firmware.h>
29#include <asm/io.h>
30#include <asm/oprofile_impl.h>
31#include <asm/processor.h>
32#include <asm/prom.h>
33#include <asm/ptrace.h>
34#include <asm/reg.h>
35#include <asm/rtas.h>
36#include <asm/cell-regs.h>
37
38#include "../platforms/cell/interrupt.h"
39#include "cell/pr_util.h"
40
41#define PPU_PROFILING 0
42#define SPU_PROFILING_CYCLES 1
43#define SPU_PROFILING_EVENTS 2
44
45#define SPU_EVENT_NUM_START 4100
46#define SPU_EVENT_NUM_STOP 4399
47#define SPU_PROFILE_EVENT_ADDR 4363
48#define SPU_PROFILE_EVENT_ADDR_MASK_A 0x146
49#define SPU_PROFILE_EVENT_ADDR_MASK_B 0x186
50
51#define NUM_SPUS_PER_NODE 8
52#define SPU_CYCLES_EVENT_NUM 2
53
54#define PPU_CYCLES_EVENT_NUM 1
55#define PPU_CYCLES_GRP_NUM 1
56
57
58#define CBE_COUNT_ALL_CYCLES 0x42800000
59
60#define NUM_THREADS 2
61
62
63#define NUM_DEBUG_BUS_WORDS 4
64#define NUM_INPUT_BUS_WORDS 2
65
66#define MAX_SPU_COUNT 0xFFFFFF
67
68
69
70
71
72#define NUM_INTERVAL_CYC 0xFFFFFFFF - 10
73
74
75
76
77
78
79static unsigned int spu_cycle_reset;
80static unsigned int profiling_mode;
81static int spu_evnt_phys_spu_indx;
82
83struct pmc_cntrl_data {
84 unsigned long vcntr;
85 unsigned long evnts;
86 unsigned long masks;
87 unsigned long enabled;
88};
89
90
91
92
93struct pm_signal {
94 u16 cpu;
95 u16 sub_unit;
96 short int signal_group;
97 u8 bus_word;
98
99
100 u8 bit;
101};
102
103
104
105
106enum {
107 SUBFUNC_RESET = 1,
108 SUBFUNC_ACTIVATE = 2,
109 SUBFUNC_DEACTIVATE = 3,
110
111 PASSTHRU_IGNORE = 0,
112 PASSTHRU_ENABLE = 1,
113 PASSTHRU_DISABLE = 2,
114};
115
116struct pm_cntrl {
117 u16 enable;
118 u16 stop_at_max;
119 u16 trace_mode;
120 u16 freeze;
121 u16 count_mode;
122 u16 spu_addr_trace;
123 u8 trace_buf_ovflw;
124};
125
126static struct {
127 u32 group_control;
128 u32 debug_bus_control;
129 struct pm_cntrl pm_cntrl;
130 u32 pm07_cntrl[NR_PHYS_CTRS];
131} pm_regs;
132
133#define GET_SUB_UNIT(x) ((x & 0x0000f000) >> 12)
134#define GET_BUS_WORD(x) ((x & 0x000000f0) >> 4)
135#define GET_BUS_TYPE(x) ((x & 0x00000300) >> 8)
136#define GET_POLARITY(x) ((x & 0x00000002) >> 1)
137#define GET_COUNT_CYCLES(x) (x & 0x00000001)
138#define GET_INPUT_CONTROL(x) ((x & 0x00000004) >> 2)
139
140static DEFINE_PER_CPU(unsigned long[NR_PHYS_CTRS], pmc_values);
141static unsigned long spu_pm_cnt[MAX_NUMNODES * NUM_SPUS_PER_NODE];
142static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS];
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167static u32 hdw_thread;
168
169static u32 virt_cntr_inter_mask;
170static struct timer_list timer_virt_cntr;
171static struct timer_list timer_spu_event_swap;
172
173
174
175
176
177
178static struct pm_signal pm_signal[NR_PHYS_CTRS];
179static int pm_rtas_token;
180static int spu_rtas_token;
181
182static u32 reset_value[NR_PHYS_CTRS];
183static int num_counters;
184static int oprofile_running;
185static DEFINE_SPINLOCK(cntr_lock);
186
187static u32 ctr_enabled;
188
189static unsigned char input_bus[NUM_INPUT_BUS_WORDS];
190
191
192
193
194static int
195rtas_ibm_cbe_perftools(int subfunc, int passthru,
196 void *address, unsigned long length)
197{
198 u64 paddr = __pa(address);
199
200 return rtas_call(pm_rtas_token, 5, 1, NULL, subfunc,
201 passthru, paddr >> 32, paddr & 0xffffffff, length);
202}
203
204static void pm_rtas_reset_signals(u32 node)
205{
206 int ret;
207 struct pm_signal pm_signal_local;
208
209
210
211
212
213
214
215
216
217
218
219 pm_signal_local.cpu = node;
220 pm_signal_local.signal_group = 21;
221 pm_signal_local.bus_word = 1;
222 pm_signal_local.sub_unit = 0;
223 pm_signal_local.bit = 0;
224
225 ret = rtas_ibm_cbe_perftools(SUBFUNC_RESET, PASSTHRU_DISABLE,
226 &pm_signal_local,
227 sizeof(struct pm_signal));
228
229 if (unlikely(ret))
230
231
232
233
234
235 printk(KERN_WARNING "%s: rtas returned: %d\n",
236 __func__, ret);
237}
238
239static int pm_rtas_activate_signals(u32 node, u32 count)
240{
241 int ret;
242 int i, j;
243 struct pm_signal pm_signal_local[NR_PHYS_CTRS];
244
245
246
247
248
249
250
251
252
253 i = 0;
254 for (j = 0; j < count; j++) {
255 if (pm_signal[j].signal_group != PPU_CYCLES_GRP_NUM) {
256
257
258 pm_signal_local[i].cpu = node;
259 pm_signal_local[i].signal_group
260 = pm_signal[j].signal_group;
261 pm_signal_local[i].bus_word = pm_signal[j].bus_word;
262 pm_signal_local[i].sub_unit = pm_signal[j].sub_unit;
263 pm_signal_local[i].bit = pm_signal[j].bit;
264 i++;
265 }
266 }
267
268 if (i != 0) {
269 ret = rtas_ibm_cbe_perftools(SUBFUNC_ACTIVATE, PASSTHRU_ENABLE,
270 pm_signal_local,
271 i * sizeof(struct pm_signal));
272
273 if (unlikely(ret)) {
274 printk(KERN_WARNING "%s: rtas returned: %d\n",
275 __func__, ret);
276 return -EIO;
277 }
278 }
279
280 return 0;
281}
282
283
284
285
286static void set_pm_event(u32 ctr, int event, u32 unit_mask)
287{
288 struct pm_signal *p;
289 u32 signal_bit;
290 u32 bus_word, bus_type, count_cycles, polarity, input_control;
291 int j, i;
292
293 if (event == PPU_CYCLES_EVENT_NUM) {
294
295 pm_regs.pm07_cntrl[ctr] = CBE_COUNT_ALL_CYCLES;
296 p = &(pm_signal[ctr]);
297 p->signal_group = PPU_CYCLES_GRP_NUM;
298 p->bus_word = 1;
299 p->sub_unit = 0;
300 p->bit = 0;
301 goto out;
302 } else {
303 pm_regs.pm07_cntrl[ctr] = 0;
304 }
305
306 bus_word = GET_BUS_WORD(unit_mask);
307 bus_type = GET_BUS_TYPE(unit_mask);
308 count_cycles = GET_COUNT_CYCLES(unit_mask);
309 polarity = GET_POLARITY(unit_mask);
310 input_control = GET_INPUT_CONTROL(unit_mask);
311 signal_bit = (event % 100);
312
313 p = &(pm_signal[ctr]);
314
315 p->signal_group = event / 100;
316 p->bus_word = bus_word;
317 p->sub_unit = GET_SUB_UNIT(unit_mask);
318
319 pm_regs.pm07_cntrl[ctr] = 0;
320 pm_regs.pm07_cntrl[ctr] |= PM07_CTR_COUNT_CYCLES(count_cycles);
321 pm_regs.pm07_cntrl[ctr] |= PM07_CTR_POLARITY(polarity);
322 pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_CONTROL(input_control);
323
324
325
326
327
328
329
330
331
332
333 if (input_control == 0) {
334 if (signal_bit > 31) {
335 signal_bit -= 32;
336 if (bus_word == 0x3)
337 bus_word = 0x2;
338 else if (bus_word == 0xc)
339 bus_word = 0x8;
340 }
341
342 if ((bus_type == 0) && p->signal_group >= 60)
343 bus_type = 2;
344 if ((bus_type == 1) && p->signal_group >= 50)
345 bus_type = 0;
346
347 pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_MUX(signal_bit);
348 } else {
349 pm_regs.pm07_cntrl[ctr] = 0;
350 p->bit = signal_bit;
351 }
352
353 for (i = 0; i < NUM_DEBUG_BUS_WORDS; i++) {
354 if (bus_word & (1 << i)) {
355 pm_regs.debug_bus_control |=
356 (bus_type << (30 - (2 * i)));
357
358 for (j = 0; j < NUM_INPUT_BUS_WORDS; j++) {
359 if (input_bus[j] == 0xff) {
360 input_bus[j] = i;
361 pm_regs.group_control |=
362 (i << (30 - (2 * j)));
363
364 break;
365 }
366 }
367 }
368 }
369out:
370 ;
371}
372
373static void write_pm_cntrl(int cpu)
374{
375
376
377
378
379
380 u32 val = 0;
381 if (pm_regs.pm_cntrl.enable == 1)
382 val |= CBE_PM_ENABLE_PERF_MON;
383
384 if (pm_regs.pm_cntrl.stop_at_max == 1)
385 val |= CBE_PM_STOP_AT_MAX;
386
387 if (pm_regs.pm_cntrl.trace_mode != 0)
388 val |= CBE_PM_TRACE_MODE_SET(pm_regs.pm_cntrl.trace_mode);
389
390 if (pm_regs.pm_cntrl.trace_buf_ovflw == 1)
391 val |= CBE_PM_TRACE_BUF_OVFLW(pm_regs.pm_cntrl.trace_buf_ovflw);
392 if (pm_regs.pm_cntrl.freeze == 1)
393 val |= CBE_PM_FREEZE_ALL_CTRS;
394
395 val |= CBE_PM_SPU_ADDR_TRACE_SET(pm_regs.pm_cntrl.spu_addr_trace);
396
397
398
399
400
401 val |= CBE_PM_COUNT_MODE_SET(pm_regs.pm_cntrl.count_mode);
402 cbe_write_pm(cpu, pm_control, val);
403}
404
405static inline void
406set_count_mode(u32 kernel, u32 user)
407{
408
409
410
411
412
413 if (kernel) {
414 if (user)
415 pm_regs.pm_cntrl.count_mode = CBE_COUNT_ALL_MODES;
416 else
417 pm_regs.pm_cntrl.count_mode =
418 CBE_COUNT_SUPERVISOR_MODE;
419 } else {
420 if (user)
421 pm_regs.pm_cntrl.count_mode = CBE_COUNT_PROBLEM_MODE;
422 else
423 pm_regs.pm_cntrl.count_mode =
424 CBE_COUNT_HYPERVISOR_MODE;
425 }
426}
427
428static inline void enable_ctr(u32 cpu, u32 ctr, u32 *pm07_cntrl)
429{
430
431 pm07_cntrl[ctr] |= CBE_PM_CTR_ENABLE;
432 cbe_write_pm07_control(cpu, ctr, pm07_cntrl[ctr]);
433}
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454static void cell_virtual_cntr(unsigned long data)
455{
456 int i, prev_hdw_thread, next_hdw_thread;
457 u32 cpu;
458 unsigned long flags;
459
460
461
462
463
464
465 spin_lock_irqsave(&cntr_lock, flags);
466
467 prev_hdw_thread = hdw_thread;
468
469
470 hdw_thread = 1 ^ hdw_thread;
471 next_hdw_thread = hdw_thread;
472
473 pm_regs.group_control = 0;
474 pm_regs.debug_bus_control = 0;
475
476 for (i = 0; i < NUM_INPUT_BUS_WORDS; i++)
477 input_bus[i] = 0xff;
478
479
480
481
482
483 for (i = 0; i < num_counters; i++)
484 set_pm_event(i,
485 pmc_cntrl[next_hdw_thread][i].evnts,
486 pmc_cntrl[next_hdw_thread][i].masks);
487
488
489
490
491
492 for_each_online_cpu(cpu) {
493 if (cbe_get_hw_thread_id(cpu))
494 continue;
495
496
497
498
499
500 cbe_disable_pm(cpu);
501 cbe_disable_pm_interrupts(cpu);
502 for (i = 0; i < num_counters; i++) {
503 per_cpu(pmc_values, cpu + prev_hdw_thread)[i]
504 = cbe_read_ctr(cpu, i);
505
506 if (per_cpu(pmc_values, cpu + next_hdw_thread)[i]
507 == 0xFFFFFFFF)
508
509
510
511
512
513
514
515
516
517
518 cbe_write_ctr(cpu, i, 0xFFFFFFF0);
519 else
520 cbe_write_ctr(cpu, i,
521 per_cpu(pmc_values,
522 cpu +
523 next_hdw_thread)[i]);
524 }
525
526
527
528
529
530
531 for (i = 0; i < num_counters; i++) {
532 if (pmc_cntrl[next_hdw_thread][i].enabled) {
533
534
535
536
537
538 enable_ctr(cpu, i,
539 pm_regs.pm07_cntrl);
540 } else {
541 cbe_write_pm07_control(cpu, i, 0);
542 }
543 }
544
545
546 cbe_enable_pm_interrupts(cpu, next_hdw_thread,
547 virt_cntr_inter_mask);
548 cbe_enable_pm(cpu);
549 }
550
551 spin_unlock_irqrestore(&cntr_lock, flags);
552
553 mod_timer(&timer_virt_cntr, jiffies + HZ / 10);
554}
555
556static void start_virt_cntrs(void)
557{
558 init_timer(&timer_virt_cntr);
559 timer_virt_cntr.function = cell_virtual_cntr;
560 timer_virt_cntr.data = 0UL;
561 timer_virt_cntr.expires = jiffies + HZ / 10;
562 add_timer(&timer_virt_cntr);
563}
564
565static int cell_reg_setup_spu_cycles(struct op_counter_config *ctr,
566 struct op_system_config *sys, int num_ctrs)
567{
568 spu_cycle_reset = ctr[0].count;
569
570
571
572
573
574 spu_rtas_token = rtas_token("ibm,cbe-spu-perftools");
575
576 if (unlikely(spu_rtas_token == RTAS_UNKNOWN_SERVICE)) {
577 printk(KERN_ERR
578 "%s: rtas token ibm,cbe-spu-perftools unknown\n",
579 __func__);
580 return -EIO;
581 }
582 return 0;
583}
584
585
586
587
588
589
590
591
592static void spu_evnt_swap(unsigned long data)
593{
594 int node;
595 int cur_phys_spu, nxt_phys_spu, cur_spu_evnt_phys_spu_indx;
596 unsigned long flags;
597 int cpu;
598 int ret;
599 u32 interrupt_mask;
600
601
602
603 interrupt_mask = CBE_PM_CTR_OVERFLOW_INTR(0);
604
605 hdw_thread = 0;
606
607
608
609
610 spin_lock_irqsave(&cntr_lock, flags);
611
612 cur_spu_evnt_phys_spu_indx = spu_evnt_phys_spu_indx;
613
614 if (++(spu_evnt_phys_spu_indx) == NUM_SPUS_PER_NODE)
615 spu_evnt_phys_spu_indx = 0;
616
617 pm_signal[0].sub_unit = spu_evnt_phys_spu_indx;
618 pm_signal[1].sub_unit = spu_evnt_phys_spu_indx;
619 pm_signal[2].sub_unit = spu_evnt_phys_spu_indx;
620
621
622 for_each_online_cpu(cpu) {
623 if (cbe_get_hw_thread_id(cpu))
624 continue;
625
626 node = cbe_cpu_to_node(cpu);
627 cur_phys_spu = (node * NUM_SPUS_PER_NODE)
628 + cur_spu_evnt_phys_spu_indx;
629 nxt_phys_spu = (node * NUM_SPUS_PER_NODE)
630 + spu_evnt_phys_spu_indx;
631
632
633
634
635
636 cbe_disable_pm(cpu);
637 cbe_disable_pm_interrupts(cpu);
638
639 spu_pm_cnt[cur_phys_spu]
640 = cbe_read_ctr(cpu, 0);
641
642
643
644
645
646 if (spu_pm_cnt[nxt_phys_spu] >= 0xFFFFFFFF)
647 cbe_write_ctr(cpu, 0, 0xFFFFFFF0);
648 else
649 cbe_write_ctr(cpu, 0, spu_pm_cnt[nxt_phys_spu]);
650
651 pm_rtas_reset_signals(cbe_cpu_to_node(cpu));
652
653
654
655
656
657 ret = pm_rtas_activate_signals(cbe_cpu_to_node(cpu), 3);
658 if (ret)
659 printk(KERN_ERR "%s: pm_rtas_activate_signals failed, "
660 "SPU event swap\n", __func__);
661
662
663
664 cbe_write_pm(cpu, trace_address, 0);
665
666 enable_ctr(cpu, 0, pm_regs.pm07_cntrl);
667
668
669 cbe_enable_pm_interrupts(cpu, hdw_thread,
670 interrupt_mask);
671 cbe_enable_pm(cpu);
672 }
673
674 spin_unlock_irqrestore(&cntr_lock, flags);
675
676
677 mod_timer(&timer_spu_event_swap, jiffies + HZ / 25);
678}
679
680static void start_spu_event_swap(void)
681{
682 init_timer(&timer_spu_event_swap);
683 timer_spu_event_swap.function = spu_evnt_swap;
684 timer_spu_event_swap.data = 0UL;
685 timer_spu_event_swap.expires = jiffies + HZ / 25;
686 add_timer(&timer_spu_event_swap);
687}
688
689static int cell_reg_setup_spu_events(struct op_counter_config *ctr,
690 struct op_system_config *sys, int num_ctrs)
691{
692 int i;
693
694
695
696 spu_evnt_phys_spu_indx = 0;
697
698
699
700
701
702
703 pm_rtas_token = rtas_token("ibm,cbe-perftools");
704
705 if (unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) {
706 printk(KERN_ERR
707 "%s: rtas token ibm,cbe-perftools unknown\n",
708 __func__);
709 return -EIO;
710 }
711
712
713
714
715
716 pm_regs.pm_cntrl.trace_buf_ovflw = 1;
717
718
719
720
721
722 pm_regs.pm_cntrl.trace_mode = 2;
723
724 pm_regs.pm_cntrl.spu_addr_trace = 0x1;
725
726
727
728
729
730 pm_signal[1].signal_group = SPU_PROFILE_EVENT_ADDR / 100;
731 pm_signal[1].bus_word = GET_BUS_WORD(SPU_PROFILE_EVENT_ADDR_MASK_A);
732 pm_signal[1].bit = SPU_PROFILE_EVENT_ADDR % 100;
733 pm_signal[1].sub_unit = spu_evnt_phys_spu_indx;
734
735 pm_signal[2].signal_group = SPU_PROFILE_EVENT_ADDR / 100;
736 pm_signal[2].bus_word = GET_BUS_WORD(SPU_PROFILE_EVENT_ADDR_MASK_B);
737 pm_signal[2].bit = SPU_PROFILE_EVENT_ADDR % 100;
738 pm_signal[2].sub_unit = spu_evnt_phys_spu_indx;
739
740
741
742
743 num_counters = 1;
744 set_pm_event(0, ctr[0].event, ctr[0].unit_mask);
745
746 reset_value[0] = 0xFFFFFFFF - ctr[0].count;
747
748
749 ctr_enabled |= 1;
750
751
752 for (i=0; i < MAX_NUMNODES * NUM_SPUS_PER_NODE; i++)
753 spu_pm_cnt[i] = reset_value[0];
754
755 return 0;
756}
757
758static int cell_reg_setup_ppu(struct op_counter_config *ctr,
759 struct op_system_config *sys, int num_ctrs)
760{
761
762 int i, j, cpu;
763
764 num_counters = num_ctrs;
765
766 if (unlikely(num_ctrs > NR_PHYS_CTRS)) {
767 printk(KERN_ERR
768 "%s: Oprofile, number of specified events " \
769 "exceeds number of physical counters\n",
770 __func__);
771 return -EIO;
772 }
773
774 set_count_mode(sys->enable_kernel, sys->enable_user);
775
776
777 for (i = 0; i < num_ctrs; ++i) {
778
779 pmc_cntrl[0][i].evnts = ctr[i].event;
780 pmc_cntrl[0][i].masks = ctr[i].unit_mask;
781 pmc_cntrl[0][i].enabled = ctr[i].enabled;
782 pmc_cntrl[0][i].vcntr = i;
783
784 for_each_possible_cpu(j)
785 per_cpu(pmc_values, j)[i] = 0;
786 }
787
788
789
790
791
792 for (i = 0; i < num_ctrs; ++i) {
793 if ((ctr[i].event >= 2100) && (ctr[i].event <= 2111))
794 pmc_cntrl[1][i].evnts = ctr[i].event + 19;
795 else if (ctr[i].event == 2203)
796 pmc_cntrl[1][i].evnts = ctr[i].event;
797 else if ((ctr[i].event >= 2200) && (ctr[i].event <= 2215))
798 pmc_cntrl[1][i].evnts = ctr[i].event + 16;
799 else
800 pmc_cntrl[1][i].evnts = ctr[i].event;
801
802 pmc_cntrl[1][i].masks = ctr[i].unit_mask;
803 pmc_cntrl[1][i].enabled = ctr[i].enabled;
804 pmc_cntrl[1][i].vcntr = i;
805 }
806
807 for (i = 0; i < NUM_INPUT_BUS_WORDS; i++)
808 input_bus[i] = 0xff;
809
810
811
812
813
814
815
816
817 for (i = 0; i < num_counters; ++i) {
818
819 if (pmc_cntrl[0][i].enabled) {
820
821 reset_value[i] = 0xFFFFFFFF - ctr[i].count;
822 set_pm_event(i,
823 pmc_cntrl[0][i].evnts,
824 pmc_cntrl[0][i].masks);
825
826
827 ctr_enabled |= (1 << i);
828 }
829 }
830
831
832 for_each_online_cpu(cpu)
833 for (i = 0; i < num_counters; ++i) {
834 per_cpu(pmc_values, cpu)[i] = reset_value[i];
835 }
836
837 return 0;
838}
839
840
841
842static int cell_reg_setup(struct op_counter_config *ctr,
843 struct op_system_config *sys, int num_ctrs)
844{
845 int ret=0;
846 spu_cycle_reset = 0;
847
848
849
850
851 pm_regs.group_control = 0;
852 pm_regs.debug_bus_control = 0;
853 pm_regs.pm_cntrl.stop_at_max = 1;
854 pm_regs.pm_cntrl.trace_mode = 0;
855 pm_regs.pm_cntrl.freeze = 1;
856 pm_regs.pm_cntrl.trace_buf_ovflw = 0;
857 pm_regs.pm_cntrl.spu_addr_trace = 0;
858
859
860
861
862
863
864
865 pm_rtas_token = rtas_token("ibm,cbe-perftools");
866
867 if (unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) {
868 printk(KERN_ERR
869 "%s: rtas token ibm,cbe-perftools unknown\n",
870 __func__);
871 return -EIO;
872 }
873
874 if (ctr[0].event == SPU_CYCLES_EVENT_NUM) {
875 profiling_mode = SPU_PROFILING_CYCLES;
876 ret = cell_reg_setup_spu_cycles(ctr, sys, num_ctrs);
877 } else if ((ctr[0].event >= SPU_EVENT_NUM_START) &&
878 (ctr[0].event <= SPU_EVENT_NUM_STOP)) {
879 profiling_mode = SPU_PROFILING_EVENTS;
880 spu_cycle_reset = ctr[0].count;
881
882
883
884
885
886
887
888 cell_reg_setup_spu_events(ctr, sys, num_ctrs);
889 } else {
890 profiling_mode = PPU_PROFILING;
891 ret = cell_reg_setup_ppu(ctr, sys, num_ctrs);
892 }
893
894 return ret;
895}
896
897
898
899
900static int cell_cpu_setup(struct op_counter_config *cntr)
901{
902 u32 cpu = smp_processor_id();
903 u32 num_enabled = 0;
904 int i;
905 int ret;
906
907
908
909
910
911 if (profiling_mode == SPU_PROFILING_CYCLES)
912 return 0;
913
914
915
916
917 if (cbe_get_hw_thread_id(cpu))
918 return 0;
919
920
921 cbe_disable_pm(cpu);
922 cbe_disable_pm_interrupts(cpu);
923
924 cbe_write_pm(cpu, pm_start_stop, 0);
925 cbe_write_pm(cpu, group_control, pm_regs.group_control);
926 cbe_write_pm(cpu, debug_bus_control, pm_regs.debug_bus_control);
927 write_pm_cntrl(cpu);
928
929 for (i = 0; i < num_counters; ++i) {
930 if (ctr_enabled & (1 << i)) {
931 pm_signal[num_enabled].cpu = cbe_cpu_to_node(cpu);
932 num_enabled++;
933 }
934 }
935
936
937
938
939
940 if (profiling_mode == SPU_PROFILING_EVENTS) {
941
942
943
944 ret = pm_rtas_activate_signals(cbe_cpu_to_node(cpu),
945 num_enabled+2);
946
947
948
949 cbe_write_pm(cpu, pm_interval, NUM_INTERVAL_CYC);
950 return ret;
951 } else
952 return pm_rtas_activate_signals(cbe_cpu_to_node(cpu),
953 num_enabled);
954}
955
956#define ENTRIES 303
957#define MAXLFSR 0xFFFFFF
958
959
960static int initial_lfsr[] = {
961 8221349, 12579195, 5379618, 10097839, 7512963, 7519310, 3955098, 10753424,
962 15507573, 7458917, 285419, 2641121, 9780088, 3915503, 6668768, 1548716,
963 4885000, 8774424, 9650099, 2044357, 2304411, 9326253, 10332526, 4421547,
964 3440748, 10179459, 13332843, 10375561, 1313462, 8375100, 5198480, 6071392,
965 9341783, 1526887, 3985002, 1439429, 13923762, 7010104, 11969769, 4547026,
966 2040072, 4025602, 3437678, 7939992, 11444177, 4496094, 9803157, 10745556,
967 3671780, 4257846, 5662259, 13196905, 3237343, 12077182, 16222879, 7587769,
968 14706824, 2184640, 12591135, 10420257, 7406075, 3648978, 11042541, 15906893,
969 11914928, 4732944, 10695697, 12928164, 11980531, 4430912, 11939291, 2917017,
970 6119256, 4172004, 9373765, 8410071, 14788383, 5047459, 5474428, 1737756,
971 15967514, 13351758, 6691285, 8034329, 2856544, 14394753, 11310160, 12149558,
972 7487528, 7542781, 15668898, 12525138, 12790975, 3707933, 9106617, 1965401,
973 16219109, 12801644, 2443203, 4909502, 8762329, 3120803, 6360315, 9309720,
974 15164599, 10844842, 4456529, 6667610, 14924259, 884312, 6234963, 3326042,
975 15973422, 13919464, 5272099, 6414643, 3909029, 2764324, 5237926, 4774955,
976 10445906, 4955302, 5203726, 10798229, 11443419, 2303395, 333836, 9646934,
977 3464726, 4159182, 568492, 995747, 10318756, 13299332, 4836017, 8237783,
978 3878992, 2581665, 11394667, 5672745, 14412947, 3159169, 9094251, 16467278,
979 8671392, 15230076, 4843545, 7009238, 15504095, 1494895, 9627886, 14485051,
980 8304291, 252817, 12421642, 16085736, 4774072, 2456177, 4160695, 15409741,
981 4902868, 5793091, 13162925, 16039714, 782255, 11347835, 14884586, 366972,
982 16308990, 11913488, 13390465, 2958444, 10340278, 1177858, 1319431, 10426302,
983 2868597, 126119, 5784857, 5245324, 10903900, 16436004, 3389013, 1742384,
984 14674502, 10279218, 8536112, 10364279, 6877778, 14051163, 1025130, 6072469,
985 1988305, 8354440, 8216060, 16342977, 13112639, 3976679, 5913576, 8816697,
986 6879995, 14043764, 3339515, 9364420, 15808858, 12261651, 2141560, 5636398,
987 10345425, 10414756, 781725, 6155650, 4746914, 5078683, 7469001, 6799140,
988 10156444, 9667150, 10116470, 4133858, 2121972, 1124204, 1003577, 1611214,
989 14304602, 16221850, 13878465, 13577744, 3629235, 8772583, 10881308, 2410386,
990 7300044, 5378855, 9301235, 12755149, 4977682, 8083074, 10327581, 6395087,
991 9155434, 15501696, 7514362, 14520507, 15808945, 3244584, 4741962, 9658130,
992 14336147, 8654727, 7969093, 15759799, 14029445, 5038459, 9894848, 8659300,
993 13699287, 8834306, 10712885, 14753895, 10410465, 3373251, 309501, 9561475,
994 5526688, 14647426, 14209836, 5339224, 207299, 14069911, 8722990, 2290950,
995 3258216, 12505185, 6007317, 9218111, 14661019, 10537428, 11731949, 9027003,
996 6641507, 9490160, 200241, 9720425, 16277895, 10816638, 1554761, 10431375,
997 7467528, 6790302, 3429078, 14633753, 14428997, 11463204, 3576212, 2003426,
998 6123687, 820520, 9992513, 15784513, 5778891, 6428165, 8388607
999};
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054#define V2_16 (0x1 << 16)
1055#define V2_19 (0x1 << 19)
1056#define V2_22 (0x1 << 22)
1057
1058static int calculate_lfsr(int n)
1059{
1060
1061
1062
1063
1064 int index;
1065
1066 if ((n >> 16) == 0)
1067 index = 0;
1068 else if (((n - V2_16) >> 19) == 0)
1069 index = ((n - V2_16) >> 12) + 1;
1070 else if (((n - V2_16 - V2_19) >> 22) == 0)
1071 index = ((n - V2_16 - V2_19) >> 15 ) + 1 + 128;
1072 else if (((n - V2_16 - V2_19 - V2_22) >> 24) == 0)
1073 index = ((n - V2_16 - V2_19 - V2_22) >> 18 ) + 1 + 256;
1074 else
1075 index = ENTRIES-1;
1076
1077
1078 if ((index >= ENTRIES) || (index < 0))
1079 index = ENTRIES-1;
1080
1081 return initial_lfsr[index];
1082}
1083
1084static int pm_rtas_activate_spu_profiling(u32 node)
1085{
1086 int ret, i;
1087 struct pm_signal pm_signal_local[NUM_SPUS_PER_NODE];
1088
1089
1090
1091
1092
1093 for (i = 0; i < ARRAY_SIZE(pm_signal_local); i++) {
1094 pm_signal_local[i].cpu = node;
1095 pm_signal_local[i].signal_group = 41;
1096
1097 pm_signal_local[i].bus_word = 1 << i / 2;
1098
1099 pm_signal_local[i].sub_unit = i;
1100 pm_signal_local[i].bit = 63;
1101 }
1102
1103 ret = rtas_ibm_cbe_perftools(SUBFUNC_ACTIVATE,
1104 PASSTHRU_ENABLE, pm_signal_local,
1105 (ARRAY_SIZE(pm_signal_local)
1106 * sizeof(struct pm_signal)));
1107
1108 if (unlikely(ret)) {
1109 printk(KERN_WARNING "%s: rtas returned: %d\n",
1110 __func__, ret);
1111 return -EIO;
1112 }
1113
1114 return 0;
1115}
1116
1117#ifdef CONFIG_CPU_FREQ
1118static int
1119oprof_cpufreq_notify(struct notifier_block *nb, unsigned long val, void *data)
1120{
1121 int ret = 0;
1122 struct cpufreq_freqs *frq = data;
1123 if ((val == CPUFREQ_PRECHANGE && frq->old < frq->new) ||
1124 (val == CPUFREQ_POSTCHANGE && frq->old > frq->new))
1125 set_spu_profiling_frequency(frq->new, spu_cycle_reset);
1126 return ret;
1127}
1128
1129static struct notifier_block cpu_freq_notifier_block = {
1130 .notifier_call = oprof_cpufreq_notify
1131};
1132#endif
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142static void cell_global_stop_spu_cycles(void)
1143{
1144 int subfunc, rtn_value;
1145 unsigned int lfsr_value;
1146 int cpu;
1147
1148 oprofile_running = 0;
1149 smp_wmb();
1150
1151#ifdef CONFIG_CPU_FREQ
1152 cpufreq_unregister_notifier(&cpu_freq_notifier_block,
1153 CPUFREQ_TRANSITION_NOTIFIER);
1154#endif
1155
1156 for_each_online_cpu(cpu) {
1157 if (cbe_get_hw_thread_id(cpu))
1158 continue;
1159
1160 subfunc = 3;
1161
1162
1163
1164 lfsr_value = 0x8f100000;
1165
1166 rtn_value = rtas_call(spu_rtas_token, 3, 1, NULL,
1167 subfunc, cbe_cpu_to_node(cpu),
1168 lfsr_value);
1169
1170 if (unlikely(rtn_value != 0)) {
1171 printk(KERN_ERR
1172 "%s: rtas call ibm,cbe-spu-perftools " \
1173 "failed, return = %d\n",
1174 __func__, rtn_value);
1175 }
1176
1177
1178 pm_rtas_reset_signals(cbe_cpu_to_node(cpu));
1179 }
1180
1181 stop_spu_profiling_cycles();
1182}
1183
1184static void cell_global_stop_spu_events(void)
1185{
1186 int cpu;
1187 oprofile_running = 0;
1188
1189 stop_spu_profiling_events();
1190 smp_wmb();
1191
1192 for_each_online_cpu(cpu) {
1193 if (cbe_get_hw_thread_id(cpu))
1194 continue;
1195
1196 cbe_sync_irq(cbe_cpu_to_node(cpu));
1197
1198 cbe_disable_pm(cpu);
1199 cbe_write_pm07_control(cpu, 0, 0);
1200
1201
1202 pm_rtas_reset_signals(cbe_cpu_to_node(cpu));
1203
1204
1205 cbe_disable_pm_interrupts(cpu);
1206 }
1207 del_timer_sync(&timer_spu_event_swap);
1208}
1209
1210static void cell_global_stop_ppu(void)
1211{
1212 int cpu;
1213
1214
1215
1216
1217
1218
1219 del_timer_sync(&timer_virt_cntr);
1220 oprofile_running = 0;
1221 smp_wmb();
1222
1223 for_each_online_cpu(cpu) {
1224 if (cbe_get_hw_thread_id(cpu))
1225 continue;
1226
1227 cbe_sync_irq(cbe_cpu_to_node(cpu));
1228
1229 cbe_disable_pm(cpu);
1230
1231
1232 pm_rtas_reset_signals(cbe_cpu_to_node(cpu));
1233
1234
1235 cbe_disable_pm_interrupts(cpu);
1236 }
1237}
1238
1239static void cell_global_stop(void)
1240{
1241 if (profiling_mode == PPU_PROFILING)
1242 cell_global_stop_ppu();
1243 else if (profiling_mode == SPU_PROFILING_EVENTS)
1244 cell_global_stop_spu_events();
1245 else
1246 cell_global_stop_spu_cycles();
1247}
1248
1249static int cell_global_start_spu_cycles(struct op_counter_config *ctr)
1250{
1251 int subfunc;
1252 unsigned int lfsr_value;
1253 int cpu;
1254 int ret;
1255 int rtas_error;
1256 unsigned int cpu_khzfreq = 0;
1257
1258
1259
1260
1261
1262
1263#ifdef CONFIG_CPU_FREQ
1264 ret = cpufreq_register_notifier(&cpu_freq_notifier_block,
1265 CPUFREQ_TRANSITION_NOTIFIER);
1266 if (ret < 0)
1267
1268 printk(KERN_ERR "CPU freq change registration failed: %d\n",
1269 ret);
1270
1271 else
1272 cpu_khzfreq = cpufreq_quick_get(smp_processor_id());
1273#endif
1274
1275 set_spu_profiling_frequency(cpu_khzfreq, spu_cycle_reset);
1276
1277 for_each_online_cpu(cpu) {
1278 if (cbe_get_hw_thread_id(cpu))
1279 continue;
1280
1281
1282
1283
1284
1285
1286 cbe_write_pm(cpu, pm_control, 0);
1287
1288 if (spu_cycle_reset > MAX_SPU_COUNT)
1289
1290 lfsr_value = calculate_lfsr(MAX_SPU_COUNT-1);
1291 else
1292 lfsr_value = calculate_lfsr(spu_cycle_reset);
1293
1294
1295 if (lfsr_value == 0)
1296 lfsr_value = calculate_lfsr(1);
1297
1298 lfsr_value = lfsr_value << 8;
1299
1300
1301
1302
1303 ret = pm_rtas_activate_spu_profiling(cbe_cpu_to_node(cpu));
1304
1305 if (unlikely(ret)) {
1306 rtas_error = ret;
1307 goto out;
1308 }
1309
1310
1311 subfunc = 2;
1312
1313
1314 ret = rtas_call(spu_rtas_token, 3, 1, NULL, subfunc,
1315 cbe_cpu_to_node(cpu), lfsr_value);
1316
1317 if (unlikely(ret != 0)) {
1318 printk(KERN_ERR
1319 "%s: rtas call ibm,cbe-spu-perftools failed, " \
1320 "return = %d\n", __func__, ret);
1321 rtas_error = -EIO;
1322 goto out;
1323 }
1324 }
1325
1326 rtas_error = start_spu_profiling_cycles(spu_cycle_reset);
1327 if (rtas_error)
1328 goto out_stop;
1329
1330 oprofile_running = 1;
1331 return 0;
1332
1333out_stop:
1334 cell_global_stop_spu_cycles();
1335out:
1336 return rtas_error;
1337}
1338
1339static int cell_global_start_spu_events(struct op_counter_config *ctr)
1340{
1341 int cpu;
1342 u32 interrupt_mask = 0;
1343 int rtn = 0;
1344
1345 hdw_thread = 0;
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361 for_each_online_cpu(cpu) {
1362 if (cbe_get_hw_thread_id(cpu))
1363 continue;
1364
1365
1366
1367
1368
1369
1370
1371
1372 if (ctr_enabled & 1) {
1373 cbe_write_ctr(cpu, 0, reset_value[0]);
1374 enable_ctr(cpu, 0, pm_regs.pm07_cntrl);
1375 interrupt_mask |=
1376 CBE_PM_CTR_OVERFLOW_INTR(0);
1377 } else {
1378
1379 cbe_write_pm07_control(cpu, 0, 0);
1380 }
1381
1382 cbe_get_and_clear_pm_interrupts(cpu);
1383 cbe_enable_pm_interrupts(cpu, hdw_thread, interrupt_mask);
1384 cbe_enable_pm(cpu);
1385
1386
1387 cbe_write_pm(cpu, trace_address, 0);
1388 }
1389
1390
1391
1392
1393
1394 start_spu_event_swap();
1395 start_spu_profiling_events();
1396 oprofile_running = 1;
1397 smp_wmb();
1398
1399 return rtn;
1400}
1401
1402static int cell_global_start_ppu(struct op_counter_config *ctr)
1403{
1404 u32 cpu, i;
1405 u32 interrupt_mask = 0;
1406
1407
1408
1409
1410
1411 for_each_online_cpu(cpu) {
1412 if (cbe_get_hw_thread_id(cpu))
1413 continue;
1414
1415 interrupt_mask = 0;
1416
1417 for (i = 0; i < num_counters; ++i) {
1418 if (ctr_enabled & (1 << i)) {
1419 cbe_write_ctr(cpu, i, reset_value[i]);
1420 enable_ctr(cpu, i, pm_regs.pm07_cntrl);
1421 interrupt_mask |= CBE_PM_CTR_OVERFLOW_INTR(i);
1422 } else {
1423
1424 cbe_write_pm07_control(cpu, i, 0);
1425 }
1426 }
1427
1428 cbe_get_and_clear_pm_interrupts(cpu);
1429 cbe_enable_pm_interrupts(cpu, hdw_thread, interrupt_mask);
1430 cbe_enable_pm(cpu);
1431 }
1432
1433 virt_cntr_inter_mask = interrupt_mask;
1434 oprofile_running = 1;
1435 smp_wmb();
1436
1437
1438
1439
1440
1441
1442
1443 start_virt_cntrs();
1444
1445 return 0;
1446}
1447
1448static int cell_global_start(struct op_counter_config *ctr)
1449{
1450 if (profiling_mode == SPU_PROFILING_CYCLES)
1451 return cell_global_start_spu_cycles(ctr);
1452 else if (profiling_mode == SPU_PROFILING_EVENTS)
1453 return cell_global_start_spu_events(ctr);
1454 else
1455 return cell_global_start_ppu(ctr);
1456}
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486static void cell_handle_interrupt_spu(struct pt_regs *regs,
1487 struct op_counter_config *ctr)
1488{
1489 u32 cpu, cpu_tmp;
1490 u64 trace_entry;
1491 u32 interrupt_mask;
1492 u64 trace_buffer[2];
1493 u64 last_trace_buffer;
1494 u32 sample;
1495 u32 trace_addr;
1496 unsigned long sample_array_lock_flags;
1497 int spu_num;
1498 unsigned long flags;
1499
1500
1501
1502
1503 cpu = smp_processor_id();
1504 spin_lock_irqsave(&cntr_lock, flags);
1505
1506 cpu_tmp = cpu;
1507 cbe_disable_pm(cpu);
1508
1509 interrupt_mask = cbe_get_and_clear_pm_interrupts(cpu);
1510
1511 sample = 0xABCDEF;
1512 trace_entry = 0xfedcba;
1513 last_trace_buffer = 0xdeadbeaf;
1514
1515 if ((oprofile_running == 1) && (interrupt_mask != 0)) {
1516
1517 cbe_write_pm(cpu, pm_interval, 0);
1518
1519
1520 if ((interrupt_mask & CBE_PM_CTR_OVERFLOW_INTR(0))
1521 && ctr[0].enabled)
1522
1523
1524
1525
1526 cbe_write_ctr(cpu, 0, reset_value[0]);
1527
1528 trace_addr = cbe_read_pm(cpu, trace_address);
1529
1530 while (!(trace_addr & CBE_PM_TRACE_BUF_EMPTY)) {
1531
1532
1533
1534
1535
1536 cbe_read_trace_buffer(cpu, trace_buffer);
1537 trace_addr = cbe_read_pm(cpu, trace_address);
1538 }
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555 trace_entry = trace_buffer[0]
1556 & 0x00000000FFFF0000;
1557
1558
1559
1560
1561 sample = trace_entry >> 14;
1562 last_trace_buffer = trace_buffer[0];
1563
1564 spu_num = spu_evnt_phys_spu_indx
1565 + (cbe_cpu_to_node(cpu) * NUM_SPUS_PER_NODE);
1566
1567
1568
1569
1570 spin_lock_irqsave(&oprof_spu_smpl_arry_lck,
1571 sample_array_lock_flags);
1572 spu_sync_buffer(spu_num, &sample, 1);
1573 spin_unlock_irqrestore(&oprof_spu_smpl_arry_lck,
1574 sample_array_lock_flags);
1575
1576 smp_wmb();
1577
1578
1579
1580
1581
1582 cbe_write_pm(cpu, pm_interval, NUM_INTERVAL_CYC);
1583 cbe_enable_pm_interrupts(cpu, hdw_thread,
1584 virt_cntr_inter_mask);
1585
1586
1587 cbe_write_pm(cpu, trace_address, 0);
1588 cbe_write_pm(cpu, pm_interval, NUM_INTERVAL_CYC);
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598 write_pm_cntrl(cpu);
1599 cbe_enable_pm(cpu);
1600 }
1601 spin_unlock_irqrestore(&cntr_lock, flags);
1602}
1603
1604static void cell_handle_interrupt_ppu(struct pt_regs *regs,
1605 struct op_counter_config *ctr)
1606{
1607 u32 cpu;
1608 u64 pc;
1609 int is_kernel;
1610 unsigned long flags = 0;
1611 u32 interrupt_mask;
1612 int i;
1613
1614 cpu = smp_processor_id();
1615
1616
1617
1618
1619
1620
1621 spin_lock_irqsave(&cntr_lock, flags);
1622
1623
1624
1625
1626
1627
1628
1629 cbe_disable_pm(cpu);
1630
1631 interrupt_mask = cbe_get_and_clear_pm_interrupts(cpu);
1632
1633
1634
1635
1636
1637
1638
1639
1640 if ((oprofile_running == 1) && (interrupt_mask != 0)) {
1641 pc = regs->nip;
1642 is_kernel = is_kernel_addr(pc);
1643
1644 for (i = 0; i < num_counters; ++i) {
1645 if ((interrupt_mask & CBE_PM_CTR_OVERFLOW_INTR(i))
1646 && ctr[i].enabled) {
1647 oprofile_add_ext_sample(pc, regs, i, is_kernel);
1648 cbe_write_ctr(cpu, i, reset_value[i]);
1649 }
1650 }
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660 cbe_enable_pm_interrupts(cpu, hdw_thread,
1661 virt_cntr_inter_mask);
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672 cbe_enable_pm(cpu);
1673 }
1674 spin_unlock_irqrestore(&cntr_lock, flags);
1675}
1676
1677static void cell_handle_interrupt(struct pt_regs *regs,
1678 struct op_counter_config *ctr)
1679{
1680 if (profiling_mode == PPU_PROFILING)
1681 cell_handle_interrupt_ppu(regs, ctr);
1682 else
1683 cell_handle_interrupt_spu(regs, ctr);
1684}
1685
1686
1687
1688
1689
1690
1691static int cell_sync_start(void)
1692{
1693 if ((profiling_mode == SPU_PROFILING_CYCLES) ||
1694 (profiling_mode == SPU_PROFILING_EVENTS))
1695 return spu_sync_start();
1696 else
1697 return DO_GENERIC_SYNC;
1698}
1699
1700static int cell_sync_stop(void)
1701{
1702 if ((profiling_mode == SPU_PROFILING_CYCLES) ||
1703 (profiling_mode == SPU_PROFILING_EVENTS))
1704 return spu_sync_stop();
1705 else
1706 return 1;
1707}
1708
1709struct op_powerpc_model op_model_cell = {
1710 .reg_setup = cell_reg_setup,
1711 .cpu_setup = cell_cpu_setup,
1712 .global_start = cell_global_start,
1713 .global_stop = cell_global_stop,
1714 .sync_start = cell_sync_start,
1715 .sync_stop = cell_sync_stop,
1716 .handle_interrupt = cell_handle_interrupt,
1717};
1718