1
2
3
4
5
6#include <linux/module.h>
7#include <linux/kernel.h>
8#include <linux/sched.h>
9#include <linux/mm.h>
10#include <linux/pagemap.h>
11#include <linux/threads.h>
12#include <linux/smp.h>
13#include <linux/interrupt.h>
14#include <linux/kernel_stat.h>
15#include <linux/delay.h>
16#include <linux/init.h>
17#include <linux/spinlock.h>
18#include <linux/fs.h>
19#include <linux/seq_file.h>
20#include <linux/cache.h>
21#include <linux/jiffies.h>
22#include <linux/profile.h>
23#include <linux/bootmem.h>
24#include <linux/vmalloc.h>
25#include <linux/ftrace.h>
26#include <linux/cpu.h>
27#include <linux/slab.h>
28
29#include <asm/head.h>
30#include <asm/ptrace.h>
31#include <asm/atomic.h>
32#include <asm/tlbflush.h>
33#include <asm/mmu_context.h>
34#include <asm/cpudata.h>
35#include <asm/hvtramp.h>
36#include <asm/io.h>
37#include <asm/timer.h>
38
39#include <asm/irq.h>
40#include <asm/irq_regs.h>
41#include <asm/page.h>
42#include <asm/pgtable.h>
43#include <asm/oplib.h>
44#include <asm/uaccess.h>
45#include <asm/starfire.h>
46#include <asm/tlb.h>
47#include <asm/sections.h>
48#include <asm/prom.h>
49#include <asm/mdesc.h>
50#include <asm/ldc.h>
51#include <asm/hypervisor.h>
52#include <asm/pcr.h>
53
54#include "cpumap.h"
55
56int sparc64_multi_core __read_mostly;
57
58DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE;
59cpumask_t cpu_core_map[NR_CPUS] __read_mostly =
60 { [0 ... NR_CPUS-1] = CPU_MASK_NONE };
61
62EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
63EXPORT_SYMBOL(cpu_core_map);
64
65static cpumask_t smp_commenced_mask;
66
67void smp_info(struct seq_file *m)
68{
69 int i;
70
71 seq_printf(m, "State:\n");
72 for_each_online_cpu(i)
73 seq_printf(m, "CPU%d:\t\tonline\n", i);
74}
75
76void smp_bogo(struct seq_file *m)
77{
78 int i;
79
80 for_each_online_cpu(i)
81 seq_printf(m,
82 "Cpu%dClkTck\t: %016lx\n",
83 i, cpu_data(i).clock_tick);
84}
85
86extern void setup_sparc64_timer(void);
87
88static volatile unsigned long callin_flag = 0;
89
90void __cpuinit smp_callin(void)
91{
92 int cpuid = hard_smp_processor_id();
93
94 __local_per_cpu_offset = __per_cpu_offset(cpuid);
95
96 if (tlb_type == hypervisor)
97 sun4v_ktsb_register();
98
99 __flush_tlb_all();
100
101 setup_sparc64_timer();
102
103 if (cheetah_pcache_forced_on)
104 cheetah_enable_pcache();
105
106 local_irq_enable();
107
108 callin_flag = 1;
109 __asm__ __volatile__("membar #Sync\n\t"
110 "flush %%g6" : : : "memory");
111
112
113
114
115 current_thread_info()->new_child = 0;
116
117
118 atomic_inc(&init_mm.mm_count);
119 current->active_mm = &init_mm;
120
121
122 notify_cpu_starting(cpuid);
123
124 while (!cpu_isset(cpuid, smp_commenced_mask))
125 rmb();
126
127 ipi_call_lock_irq();
128 cpu_set(cpuid, cpu_online_map);
129 ipi_call_unlock_irq();
130
131
132 preempt_disable();
133}
134
135void cpu_panic(void)
136{
137 printk("CPU[%d]: Returns from cpu_idle!\n", smp_processor_id());
138 panic("SMP bolixed\n");
139}
140
141
142
143
144
145
146
147
148#define MASTER 0
149#define SLAVE (SMP_CACHE_BYTES/sizeof(unsigned long))
150
151#define NUM_ROUNDS 64
152#define NUM_ITERS 5
153
154static DEFINE_SPINLOCK(itc_sync_lock);
155static unsigned long go[SLAVE + 1];
156
157#define DEBUG_TICK_SYNC 0
158
159static inline long get_delta (long *rt, long *master)
160{
161 unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0;
162 unsigned long tcenter, t0, t1, tm;
163 unsigned long i;
164
165 for (i = 0; i < NUM_ITERS; i++) {
166 t0 = tick_ops->get_tick();
167 go[MASTER] = 1;
168 membar_safe("#StoreLoad");
169 while (!(tm = go[SLAVE]))
170 rmb();
171 go[SLAVE] = 0;
172 wmb();
173 t1 = tick_ops->get_tick();
174
175 if (t1 - t0 < best_t1 - best_t0)
176 best_t0 = t0, best_t1 = t1, best_tm = tm;
177 }
178
179 *rt = best_t1 - best_t0;
180 *master = best_tm - best_t0;
181
182
183 tcenter = (best_t0/2 + best_t1/2);
184 if (best_t0 % 2 + best_t1 % 2 == 2)
185 tcenter++;
186 return tcenter - best_tm;
187}
188
189void smp_synchronize_tick_client(void)
190{
191 long i, delta, adj, adjust_latency = 0, done = 0;
192 unsigned long flags, rt, master_time_stamp, bound;
193#if DEBUG_TICK_SYNC
194 struct {
195 long rt;
196 long master;
197 long diff;
198 long lat;
199 } t[NUM_ROUNDS];
200#endif
201
202 go[MASTER] = 1;
203
204 while (go[MASTER])
205 rmb();
206
207 local_irq_save(flags);
208 {
209 for (i = 0; i < NUM_ROUNDS; i++) {
210 delta = get_delta(&rt, &master_time_stamp);
211 if (delta == 0) {
212 done = 1;
213 bound = rt;
214 }
215
216 if (!done) {
217 if (i > 0) {
218 adjust_latency += -delta;
219 adj = -delta + adjust_latency/4;
220 } else
221 adj = -delta;
222
223 tick_ops->add_tick(adj);
224 }
225#if DEBUG_TICK_SYNC
226 t[i].rt = rt;
227 t[i].master = master_time_stamp;
228 t[i].diff = delta;
229 t[i].lat = adjust_latency/4;
230#endif
231 }
232 }
233 local_irq_restore(flags);
234
235#if DEBUG_TICK_SYNC
236 for (i = 0; i < NUM_ROUNDS; i++)
237 printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n",
238 t[i].rt, t[i].master, t[i].diff, t[i].lat);
239#endif
240
241 printk(KERN_INFO "CPU %d: synchronized TICK with master CPU "
242 "(last diff %ld cycles, maxerr %lu cycles)\n",
243 smp_processor_id(), delta, rt);
244}
245
246static void smp_start_sync_tick_client(int cpu);
247
248static void smp_synchronize_one_tick(int cpu)
249{
250 unsigned long flags, i;
251
252 go[MASTER] = 0;
253
254 smp_start_sync_tick_client(cpu);
255
256
257 while (!go[MASTER])
258 rmb();
259
260
261 go[MASTER] = 0;
262 membar_safe("#StoreLoad");
263
264 spin_lock_irqsave(&itc_sync_lock, flags);
265 {
266 for (i = 0; i < NUM_ROUNDS*NUM_ITERS; i++) {
267 while (!go[MASTER])
268 rmb();
269 go[MASTER] = 0;
270 wmb();
271 go[SLAVE] = tick_ops->get_tick();
272 membar_safe("#StoreLoad");
273 }
274 }
275 spin_unlock_irqrestore(&itc_sync_lock, flags);
276}
277
278#if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU)
279
280static unsigned long kimage_addr_to_ra(void *p)
281{
282 unsigned long val = (unsigned long) p;
283
284 return kern_base + (val - KERNBASE);
285}
286
287static void __cpuinit ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg, void **descrp)
288{
289 extern unsigned long sparc64_ttable_tl0;
290 extern unsigned long kern_locked_tte_data;
291 struct hvtramp_descr *hdesc;
292 unsigned long trampoline_ra;
293 struct trap_per_cpu *tb;
294 u64 tte_vaddr, tte_data;
295 unsigned long hv_err;
296 int i;
297
298 hdesc = kzalloc(sizeof(*hdesc) +
299 (sizeof(struct hvtramp_mapping) *
300 num_kernel_image_mappings - 1),
301 GFP_KERNEL);
302 if (!hdesc) {
303 printk(KERN_ERR "ldom_startcpu_cpuid: Cannot allocate "
304 "hvtramp_descr.\n");
305 return;
306 }
307 *descrp = hdesc;
308
309 hdesc->cpu = cpu;
310 hdesc->num_mappings = num_kernel_image_mappings;
311
312 tb = &trap_block[cpu];
313
314 hdesc->fault_info_va = (unsigned long) &tb->fault_info;
315 hdesc->fault_info_pa = kimage_addr_to_ra(&tb->fault_info);
316
317 hdesc->thread_reg = thread_reg;
318
319 tte_vaddr = (unsigned long) KERNBASE;
320 tte_data = kern_locked_tte_data;
321
322 for (i = 0; i < hdesc->num_mappings; i++) {
323 hdesc->maps[i].vaddr = tte_vaddr;
324 hdesc->maps[i].tte = tte_data;
325 tte_vaddr += 0x400000;
326 tte_data += 0x400000;
327 }
328
329 trampoline_ra = kimage_addr_to_ra(hv_cpu_startup);
330
331 hv_err = sun4v_cpu_start(cpu, trampoline_ra,
332 kimage_addr_to_ra(&sparc64_ttable_tl0),
333 __pa(hdesc));
334 if (hv_err)
335 printk(KERN_ERR "ldom_startcpu_cpuid: sun4v_cpu_start() "
336 "gives error %lu\n", hv_err);
337}
338#endif
339
340extern unsigned long sparc64_cpu_startup;
341
342
343
344
345
346static struct thread_info *cpu_new_thread = NULL;
347
348static int __cpuinit smp_boot_one_cpu(unsigned int cpu)
349{
350 unsigned long entry =
351 (unsigned long)(&sparc64_cpu_startup);
352 unsigned long cookie =
353 (unsigned long)(&cpu_new_thread);
354 struct task_struct *p;
355 void *descr = NULL;
356 int timeout, ret;
357
358 p = fork_idle(cpu);
359 if (IS_ERR(p))
360 return PTR_ERR(p);
361 callin_flag = 0;
362 cpu_new_thread = task_thread_info(p);
363
364 if (tlb_type == hypervisor) {
365#if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU)
366 if (ldom_domaining_enabled)
367 ldom_startcpu_cpuid(cpu,
368 (unsigned long) cpu_new_thread,
369 &descr);
370 else
371#endif
372 prom_startcpu_cpuid(cpu, entry, cookie);
373 } else {
374 struct device_node *dp = of_find_node_by_cpuid(cpu);
375
376 prom_startcpu(dp->phandle, entry, cookie);
377 }
378
379 for (timeout = 0; timeout < 50000; timeout++) {
380 if (callin_flag)
381 break;
382 udelay(100);
383 }
384
385 if (callin_flag) {
386 ret = 0;
387 } else {
388 printk("Processor %d is stuck.\n", cpu);
389 ret = -ENODEV;
390 }
391 cpu_new_thread = NULL;
392
393 kfree(descr);
394
395 return ret;
396}
397
398static void spitfire_xcall_helper(u64 data0, u64 data1, u64 data2, u64 pstate, unsigned long cpu)
399{
400 u64 result, target;
401 int stuck, tmp;
402
403 if (this_is_starfire) {
404
405 cpu = (((cpu & 0x3c) << 1) |
406 ((cpu & 0x40) >> 4) |
407 (cpu & 0x3));
408 }
409
410 target = (cpu << 14) | 0x70;
411again:
412
413
414
415
416
417
418
419 tmp = 0x40;
420 __asm__ __volatile__(
421 "wrpr %1, %2, %%pstate\n\t"
422 "stxa %4, [%0] %3\n\t"
423 "stxa %5, [%0+%8] %3\n\t"
424 "add %0, %8, %0\n\t"
425 "stxa %6, [%0+%8] %3\n\t"
426 "membar #Sync\n\t"
427 "stxa %%g0, [%7] %3\n\t"
428 "membar #Sync\n\t"
429 "mov 0x20, %%g1\n\t"
430 "ldxa [%%g1] 0x7f, %%g0\n\t"
431 "membar #Sync"
432 : "=r" (tmp)
433 : "r" (pstate), "i" (PSTATE_IE), "i" (ASI_INTR_W),
434 "r" (data0), "r" (data1), "r" (data2), "r" (target),
435 "r" (0x10), "0" (tmp)
436 : "g1");
437
438
439 stuck = 100000;
440 do {
441 __asm__ __volatile__("ldxa [%%g0] %1, %0"
442 : "=r" (result)
443 : "i" (ASI_INTR_DISPATCH_STAT));
444 if (result == 0) {
445 __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
446 : : "r" (pstate));
447 return;
448 }
449 stuck -= 1;
450 if (stuck == 0)
451 break;
452 } while (result & 0x1);
453 __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
454 : : "r" (pstate));
455 if (stuck == 0) {
456 printk("CPU[%d]: mondo stuckage result[%016llx]\n",
457 smp_processor_id(), result);
458 } else {
459 udelay(2);
460 goto again;
461 }
462}
463
464static void spitfire_xcall_deliver(struct trap_per_cpu *tb, int cnt)
465{
466 u64 *mondo, data0, data1, data2;
467 u16 *cpu_list;
468 u64 pstate;
469 int i;
470
471 __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
472 cpu_list = __va(tb->cpu_list_pa);
473 mondo = __va(tb->cpu_mondo_block_pa);
474 data0 = mondo[0];
475 data1 = mondo[1];
476 data2 = mondo[2];
477 for (i = 0; i < cnt; i++)
478 spitfire_xcall_helper(data0, data1, data2, pstate, cpu_list[i]);
479}
480
481
482
483
484
485static void cheetah_xcall_deliver(struct trap_per_cpu *tb, int cnt)
486{
487 int nack_busy_id, is_jbus, need_more;
488 u64 *mondo, pstate, ver, busy_mask;
489 u16 *cpu_list;
490
491 cpu_list = __va(tb->cpu_list_pa);
492 mondo = __va(tb->cpu_mondo_block_pa);
493
494
495
496
497
498 __asm__ ("rdpr %%ver, %0" : "=r" (ver));
499 is_jbus = ((ver >> 32) == __JALAPENO_ID ||
500 (ver >> 32) == __SERRANO_ID);
501
502 __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
503
504retry:
505 need_more = 0;
506 __asm__ __volatile__("wrpr %0, %1, %%pstate\n\t"
507 : : "r" (pstate), "i" (PSTATE_IE));
508
509
510 __asm__ __volatile__("stxa %0, [%3] %6\n\t"
511 "stxa %1, [%4] %6\n\t"
512 "stxa %2, [%5] %6\n\t"
513 "membar #Sync\n\t"
514 :
515 : "r" (mondo[0]), "r" (mondo[1]), "r" (mondo[2]),
516 "r" (0x40), "r" (0x50), "r" (0x60),
517 "i" (ASI_INTR_W));
518
519 nack_busy_id = 0;
520 busy_mask = 0;
521 {
522 int i;
523
524 for (i = 0; i < cnt; i++) {
525 u64 target, nr;
526
527 nr = cpu_list[i];
528 if (nr == 0xffff)
529 continue;
530
531 target = (nr << 14) | 0x70;
532 if (is_jbus) {
533 busy_mask |= (0x1UL << (nr * 2));
534 } else {
535 target |= (nack_busy_id << 24);
536 busy_mask |= (0x1UL <<
537 (nack_busy_id * 2));
538 }
539 __asm__ __volatile__(
540 "stxa %%g0, [%0] %1\n\t"
541 "membar #Sync\n\t"
542 :
543 : "r" (target), "i" (ASI_INTR_W));
544 nack_busy_id++;
545 if (nack_busy_id == 32) {
546 need_more = 1;
547 break;
548 }
549 }
550 }
551
552
553 {
554 u64 dispatch_stat, nack_mask;
555 long stuck;
556
557 stuck = 100000 * nack_busy_id;
558 nack_mask = busy_mask << 1;
559 do {
560 __asm__ __volatile__("ldxa [%%g0] %1, %0"
561 : "=r" (dispatch_stat)
562 : "i" (ASI_INTR_DISPATCH_STAT));
563 if (!(dispatch_stat & (busy_mask | nack_mask))) {
564 __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
565 : : "r" (pstate));
566 if (unlikely(need_more)) {
567 int i, this_cnt = 0;
568 for (i = 0; i < cnt; i++) {
569 if (cpu_list[i] == 0xffff)
570 continue;
571 cpu_list[i] = 0xffff;
572 this_cnt++;
573 if (this_cnt == 32)
574 break;
575 }
576 goto retry;
577 }
578 return;
579 }
580 if (!--stuck)
581 break;
582 } while (dispatch_stat & busy_mask);
583
584 __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
585 : : "r" (pstate));
586
587 if (dispatch_stat & busy_mask) {
588
589
590
591 printk("CPU[%d]: mondo stuckage result[%016llx]\n",
592 smp_processor_id(), dispatch_stat);
593 } else {
594 int i, this_busy_nack = 0;
595
596
597
598
599 udelay(2 * nack_busy_id);
600
601
602
603
604 for (i = 0; i < cnt; i++) {
605 u64 check_mask, nr;
606
607 nr = cpu_list[i];
608 if (nr == 0xffff)
609 continue;
610
611 if (is_jbus)
612 check_mask = (0x2UL << (2*nr));
613 else
614 check_mask = (0x2UL <<
615 this_busy_nack);
616 if ((dispatch_stat & check_mask) == 0)
617 cpu_list[i] = 0xffff;
618 this_busy_nack += 2;
619 if (this_busy_nack == 64)
620 break;
621 }
622
623 goto retry;
624 }
625 }
626}
627
628
629static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt)
630{
631 int retries, this_cpu, prev_sent, i, saw_cpu_error;
632 unsigned long status;
633 u16 *cpu_list;
634
635 this_cpu = smp_processor_id();
636
637 cpu_list = __va(tb->cpu_list_pa);
638
639 saw_cpu_error = 0;
640 retries = 0;
641 prev_sent = 0;
642 do {
643 int forward_progress, n_sent;
644
645 status = sun4v_cpu_mondo_send(cnt,
646 tb->cpu_list_pa,
647 tb->cpu_mondo_block_pa);
648
649
650 if (likely(status == HV_EOK))
651 break;
652
653
654
655
656
657
658 n_sent = 0;
659 for (i = 0; i < cnt; i++) {
660 if (likely(cpu_list[i] == 0xffff))
661 n_sent++;
662 }
663
664 forward_progress = 0;
665 if (n_sent > prev_sent)
666 forward_progress = 1;
667
668 prev_sent = n_sent;
669
670
671
672
673
674 if (unlikely(status == HV_ECPUERROR)) {
675 for (i = 0; i < cnt; i++) {
676 long err;
677 u16 cpu;
678
679 cpu = cpu_list[i];
680 if (cpu == 0xffff)
681 continue;
682
683 err = sun4v_cpu_state(cpu);
684 if (err == HV_CPU_STATE_ERROR) {
685 saw_cpu_error = (cpu + 1);
686 cpu_list[i] = 0xffff;
687 }
688 }
689 } else if (unlikely(status != HV_EWOULDBLOCK))
690 goto fatal_mondo_error;
691
692
693
694
695
696
697
698
699 if (unlikely(!forward_progress)) {
700 if (unlikely(++retries > 10000))
701 goto fatal_mondo_timeout;
702
703
704
705
706 udelay(2 * cnt);
707 }
708 } while (1);
709
710 if (unlikely(saw_cpu_error))
711 goto fatal_mondo_cpu_error;
712
713 return;
714
715fatal_mondo_cpu_error:
716 printk(KERN_CRIT "CPU[%d]: SUN4V mondo cpu error, some target cpus "
717 "(including %d) were in error state\n",
718 this_cpu, saw_cpu_error - 1);
719 return;
720
721fatal_mondo_timeout:
722 printk(KERN_CRIT "CPU[%d]: SUN4V mondo timeout, no forward "
723 " progress after %d retries.\n",
724 this_cpu, retries);
725 goto dump_cpu_list_and_out;
726
727fatal_mondo_error:
728 printk(KERN_CRIT "CPU[%d]: Unexpected SUN4V mondo error %lu\n",
729 this_cpu, status);
730 printk(KERN_CRIT "CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) "
731 "mondo_block_pa(%lx)\n",
732 this_cpu, cnt, tb->cpu_list_pa, tb->cpu_mondo_block_pa);
733
734dump_cpu_list_and_out:
735 printk(KERN_CRIT "CPU[%d]: CPU list [ ", this_cpu);
736 for (i = 0; i < cnt; i++)
737 printk("%u ", cpu_list[i]);
738 printk("]\n");
739}
740
741static void (*xcall_deliver_impl)(struct trap_per_cpu *, int);
742
743static void xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask)
744{
745 struct trap_per_cpu *tb;
746 int this_cpu, i, cnt;
747 unsigned long flags;
748 u16 *cpu_list;
749 u64 *mondo;
750
751
752
753
754
755
756
757
758
759
760
761 local_irq_save(flags);
762
763 this_cpu = smp_processor_id();
764 tb = &trap_block[this_cpu];
765
766 mondo = __va(tb->cpu_mondo_block_pa);
767 mondo[0] = data0;
768 mondo[1] = data1;
769 mondo[2] = data2;
770 wmb();
771
772 cpu_list = __va(tb->cpu_list_pa);
773
774
775 cnt = 0;
776 for_each_cpu(i, mask) {
777 if (i == this_cpu || !cpu_online(i))
778 continue;
779 cpu_list[cnt++] = i;
780 }
781
782 if (cnt)
783 xcall_deliver_impl(tb, cnt);
784
785 local_irq_restore(flags);
786}
787
788
789
790
791
792static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 data2, const cpumask_t *mask)
793{
794 u64 data0 = (((u64)ctx)<<32 | (((u64)func) & 0xffffffff));
795
796 xcall_deliver(data0, data1, data2, mask);
797}
798
799
800static void smp_cross_call(unsigned long *func, u32 ctx, u64 data1, u64 data2)
801{
802 smp_cross_call_masked(func, ctx, data1, data2, &cpu_online_map);
803}
804
805extern unsigned long xcall_sync_tick;
806
807static void smp_start_sync_tick_client(int cpu)
808{
809 xcall_deliver((u64) &xcall_sync_tick, 0, 0,
810 &cpumask_of_cpu(cpu));
811}
812
813extern unsigned long xcall_call_function;
814
815void arch_send_call_function_ipi_mask(const struct cpumask *mask)
816{
817 xcall_deliver((u64) &xcall_call_function, 0, 0, mask);
818}
819
820extern unsigned long xcall_call_function_single;
821
822void arch_send_call_function_single_ipi(int cpu)
823{
824 xcall_deliver((u64) &xcall_call_function_single, 0, 0,
825 &cpumask_of_cpu(cpu));
826}
827
828void __irq_entry smp_call_function_client(int irq, struct pt_regs *regs)
829{
830 clear_softint(1 << irq);
831 generic_smp_call_function_interrupt();
832}
833
834void __irq_entry smp_call_function_single_client(int irq, struct pt_regs *regs)
835{
836 clear_softint(1 << irq);
837 generic_smp_call_function_single_interrupt();
838}
839
840static void tsb_sync(void *info)
841{
842 struct trap_per_cpu *tp = &trap_block[raw_smp_processor_id()];
843 struct mm_struct *mm = info;
844
845
846
847
848
849
850
851 if (tp->pgd_paddr == __pa(mm->pgd))
852 tsb_context_switch(mm);
853}
854
855void smp_tsb_sync(struct mm_struct *mm)
856{
857 smp_call_function_many(mm_cpumask(mm), tsb_sync, mm, 1);
858}
859
860extern unsigned long xcall_flush_tlb_mm;
861extern unsigned long xcall_flush_tlb_pending;
862extern unsigned long xcall_flush_tlb_kernel_range;
863extern unsigned long xcall_fetch_glob_regs;
864extern unsigned long xcall_receive_signal;
865extern unsigned long xcall_new_mmu_context_version;
866#ifdef CONFIG_KGDB
867extern unsigned long xcall_kgdb_capture;
868#endif
869
870#ifdef DCACHE_ALIASING_POSSIBLE
871extern unsigned long xcall_flush_dcache_page_cheetah;
872#endif
873extern unsigned long xcall_flush_dcache_page_spitfire;
874
875#ifdef CONFIG_DEBUG_DCFLUSH
876extern atomic_t dcpage_flushes;
877extern atomic_t dcpage_flushes_xcall;
878#endif
879
880static inline void __local_flush_dcache_page(struct page *page)
881{
882#ifdef DCACHE_ALIASING_POSSIBLE
883 __flush_dcache_page(page_address(page),
884 ((tlb_type == spitfire) &&
885 page_mapping(page) != NULL));
886#else
887 if (page_mapping(page) != NULL &&
888 tlb_type == spitfire)
889 __flush_icache_page(__pa(page_address(page)));
890#endif
891}
892
893void smp_flush_dcache_page_impl(struct page *page, int cpu)
894{
895 int this_cpu;
896
897 if (tlb_type == hypervisor)
898 return;
899
900#ifdef CONFIG_DEBUG_DCFLUSH
901 atomic_inc(&dcpage_flushes);
902#endif
903
904 this_cpu = get_cpu();
905
906 if (cpu == this_cpu) {
907 __local_flush_dcache_page(page);
908 } else if (cpu_online(cpu)) {
909 void *pg_addr = page_address(page);
910 u64 data0 = 0;
911
912 if (tlb_type == spitfire) {
913 data0 = ((u64)&xcall_flush_dcache_page_spitfire);
914 if (page_mapping(page) != NULL)
915 data0 |= ((u64)1 << 32);
916 } else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
917#ifdef DCACHE_ALIASING_POSSIBLE
918 data0 = ((u64)&xcall_flush_dcache_page_cheetah);
919#endif
920 }
921 if (data0) {
922 xcall_deliver(data0, __pa(pg_addr),
923 (u64) pg_addr, &cpumask_of_cpu(cpu));
924#ifdef CONFIG_DEBUG_DCFLUSH
925 atomic_inc(&dcpage_flushes_xcall);
926#endif
927 }
928 }
929
930 put_cpu();
931}
932
933void flush_dcache_page_all(struct mm_struct *mm, struct page *page)
934{
935 void *pg_addr;
936 int this_cpu;
937 u64 data0;
938
939 if (tlb_type == hypervisor)
940 return;
941
942 this_cpu = get_cpu();
943
944#ifdef CONFIG_DEBUG_DCFLUSH
945 atomic_inc(&dcpage_flushes);
946#endif
947 data0 = 0;
948 pg_addr = page_address(page);
949 if (tlb_type == spitfire) {
950 data0 = ((u64)&xcall_flush_dcache_page_spitfire);
951 if (page_mapping(page) != NULL)
952 data0 |= ((u64)1 << 32);
953 } else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
954#ifdef DCACHE_ALIASING_POSSIBLE
955 data0 = ((u64)&xcall_flush_dcache_page_cheetah);
956#endif
957 }
958 if (data0) {
959 xcall_deliver(data0, __pa(pg_addr),
960 (u64) pg_addr, &cpu_online_map);
961#ifdef CONFIG_DEBUG_DCFLUSH
962 atomic_inc(&dcpage_flushes_xcall);
963#endif
964 }
965 __local_flush_dcache_page(page);
966
967 put_cpu();
968}
969
970void __irq_entry smp_new_mmu_context_version_client(int irq, struct pt_regs *regs)
971{
972 struct mm_struct *mm;
973 unsigned long flags;
974
975 clear_softint(1 << irq);
976
977
978
979
980 mm = current->active_mm;
981 if (unlikely(!mm || (mm == &init_mm)))
982 return;
983
984 spin_lock_irqsave(&mm->context.lock, flags);
985
986 if (unlikely(!CTX_VALID(mm->context)))
987 get_new_mmu_context(mm);
988
989 spin_unlock_irqrestore(&mm->context.lock, flags);
990
991 load_secondary_context(mm);
992 __flush_tlb_mm(CTX_HWBITS(mm->context),
993 SECONDARY_CONTEXT);
994}
995
996void smp_new_mmu_context_version(void)
997{
998 smp_cross_call(&xcall_new_mmu_context_version, 0, 0, 0);
999}
1000
1001#ifdef CONFIG_KGDB
1002void kgdb_roundup_cpus(unsigned long flags)
1003{
1004 smp_cross_call(&xcall_kgdb_capture, 0, 0, 0);
1005}
1006#endif
1007
1008void smp_fetch_global_regs(void)
1009{
1010 smp_cross_call(&xcall_fetch_glob_regs, 0, 0, 0);
1011}
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056void smp_flush_tlb_mm(struct mm_struct *mm)
1057{
1058 u32 ctx = CTX_HWBITS(mm->context);
1059 int cpu = get_cpu();
1060
1061 if (atomic_read(&mm->mm_users) == 1) {
1062 cpumask_copy(mm_cpumask(mm), cpumask_of(cpu));
1063 goto local_flush_and_out;
1064 }
1065
1066 smp_cross_call_masked(&xcall_flush_tlb_mm,
1067 ctx, 0, 0,
1068 mm_cpumask(mm));
1069
1070local_flush_and_out:
1071 __flush_tlb_mm(ctx, SECONDARY_CONTEXT);
1072
1073 put_cpu();
1074}
1075
1076void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long *vaddrs)
1077{
1078 u32 ctx = CTX_HWBITS(mm->context);
1079 int cpu = get_cpu();
1080
1081 if (mm == current->mm && atomic_read(&mm->mm_users) == 1)
1082 cpumask_copy(mm_cpumask(mm), cpumask_of(cpu));
1083 else
1084 smp_cross_call_masked(&xcall_flush_tlb_pending,
1085 ctx, nr, (unsigned long) vaddrs,
1086 mm_cpumask(mm));
1087
1088 __flush_tlb_pending(ctx, nr, vaddrs);
1089
1090 put_cpu();
1091}
1092
1093void smp_flush_tlb_kernel_range(unsigned long start, unsigned long end)
1094{
1095 start &= PAGE_MASK;
1096 end = PAGE_ALIGN(end);
1097 if (start != end) {
1098 smp_cross_call(&xcall_flush_tlb_kernel_range,
1099 0, start, end);
1100
1101 __flush_tlb_kernel_range(start, end);
1102 }
1103}
1104
1105
1106
1107extern unsigned long xcall_capture;
1108
1109static atomic_t smp_capture_depth = ATOMIC_INIT(0);
1110static atomic_t smp_capture_registry = ATOMIC_INIT(0);
1111static unsigned long penguins_are_doing_time;
1112
1113void smp_capture(void)
1114{
1115 int result = atomic_add_ret(1, &smp_capture_depth);
1116
1117 if (result == 1) {
1118 int ncpus = num_online_cpus();
1119
1120#ifdef CAPTURE_DEBUG
1121 printk("CPU[%d]: Sending penguins to jail...",
1122 smp_processor_id());
1123#endif
1124 penguins_are_doing_time = 1;
1125 atomic_inc(&smp_capture_registry);
1126 smp_cross_call(&xcall_capture, 0, 0, 0);
1127 while (atomic_read(&smp_capture_registry) != ncpus)
1128 rmb();
1129#ifdef CAPTURE_DEBUG
1130 printk("done\n");
1131#endif
1132 }
1133}
1134
1135void smp_release(void)
1136{
1137 if (atomic_dec_and_test(&smp_capture_depth)) {
1138#ifdef CAPTURE_DEBUG
1139 printk("CPU[%d]: Giving pardon to "
1140 "imprisoned penguins\n",
1141 smp_processor_id());
1142#endif
1143 penguins_are_doing_time = 0;
1144 membar_safe("#StoreLoad");
1145 atomic_dec(&smp_capture_registry);
1146 }
1147}
1148
1149
1150
1151
1152extern void prom_world(int);
1153
1154void __irq_entry smp_penguin_jailcell(int irq, struct pt_regs *regs)
1155{
1156 clear_softint(1 << irq);
1157
1158 preempt_disable();
1159
1160 __asm__ __volatile__("flushw");
1161 prom_world(1);
1162 atomic_inc(&smp_capture_registry);
1163 membar_safe("#StoreLoad");
1164 while (penguins_are_doing_time)
1165 rmb();
1166 atomic_dec(&smp_capture_registry);
1167 prom_world(0);
1168
1169 preempt_enable();
1170}
1171
1172
1173int setup_profiling_timer(unsigned int multiplier)
1174{
1175 return -EINVAL;
1176}
1177
1178void __init smp_prepare_cpus(unsigned int max_cpus)
1179{
1180}
1181
1182void __devinit smp_prepare_boot_cpu(void)
1183{
1184}
1185
1186void __init smp_setup_processor_id(void)
1187{
1188 if (tlb_type == spitfire)
1189 xcall_deliver_impl = spitfire_xcall_deliver;
1190 else if (tlb_type == cheetah || tlb_type == cheetah_plus)
1191 xcall_deliver_impl = cheetah_xcall_deliver;
1192 else
1193 xcall_deliver_impl = hypervisor_xcall_deliver;
1194}
1195
1196void __devinit smp_fill_in_sib_core_maps(void)
1197{
1198 unsigned int i;
1199
1200 for_each_present_cpu(i) {
1201 unsigned int j;
1202
1203 cpus_clear(cpu_core_map[i]);
1204 if (cpu_data(i).core_id == 0) {
1205 cpu_set(i, cpu_core_map[i]);
1206 continue;
1207 }
1208
1209 for_each_present_cpu(j) {
1210 if (cpu_data(i).core_id ==
1211 cpu_data(j).core_id)
1212 cpu_set(j, cpu_core_map[i]);
1213 }
1214 }
1215
1216 for_each_present_cpu(i) {
1217 unsigned int j;
1218
1219 cpus_clear(per_cpu(cpu_sibling_map, i));
1220 if (cpu_data(i).proc_id == -1) {
1221 cpu_set(i, per_cpu(cpu_sibling_map, i));
1222 continue;
1223 }
1224
1225 for_each_present_cpu(j) {
1226 if (cpu_data(i).proc_id ==
1227 cpu_data(j).proc_id)
1228 cpu_set(j, per_cpu(cpu_sibling_map, i));
1229 }
1230 }
1231}
1232
1233int __cpuinit __cpu_up(unsigned int cpu)
1234{
1235 int ret = smp_boot_one_cpu(cpu);
1236
1237 if (!ret) {
1238 cpu_set(cpu, smp_commenced_mask);
1239 while (!cpu_isset(cpu, cpu_online_map))
1240 mb();
1241 if (!cpu_isset(cpu, cpu_online_map)) {
1242 ret = -ENODEV;
1243 } else {
1244
1245
1246
1247 if (tlb_type != hypervisor)
1248 smp_synchronize_one_tick(cpu);
1249 }
1250 }
1251 return ret;
1252}
1253
1254#ifdef CONFIG_HOTPLUG_CPU
1255void cpu_play_dead(void)
1256{
1257 int cpu = smp_processor_id();
1258 unsigned long pstate;
1259
1260 idle_task_exit();
1261
1262 if (tlb_type == hypervisor) {
1263 struct trap_per_cpu *tb = &trap_block[cpu];
1264
1265 sun4v_cpu_qconf(HV_CPU_QUEUE_CPU_MONDO,
1266 tb->cpu_mondo_pa, 0);
1267 sun4v_cpu_qconf(HV_CPU_QUEUE_DEVICE_MONDO,
1268 tb->dev_mondo_pa, 0);
1269 sun4v_cpu_qconf(HV_CPU_QUEUE_RES_ERROR,
1270 tb->resum_mondo_pa, 0);
1271 sun4v_cpu_qconf(HV_CPU_QUEUE_NONRES_ERROR,
1272 tb->nonresum_mondo_pa, 0);
1273 }
1274
1275 cpu_clear(cpu, smp_commenced_mask);
1276 membar_safe("#Sync");
1277
1278 local_irq_disable();
1279
1280 __asm__ __volatile__(
1281 "rdpr %%pstate, %0\n\t"
1282 "wrpr %0, %1, %%pstate"
1283 : "=r" (pstate)
1284 : "i" (PSTATE_IE));
1285
1286 while (1)
1287 barrier();
1288}
1289
1290int __cpu_disable(void)
1291{
1292 int cpu = smp_processor_id();
1293 cpuinfo_sparc *c;
1294 int i;
1295
1296 for_each_cpu_mask(i, cpu_core_map[cpu])
1297 cpu_clear(cpu, cpu_core_map[i]);
1298 cpus_clear(cpu_core_map[cpu]);
1299
1300 for_each_cpu_mask(i, per_cpu(cpu_sibling_map, cpu))
1301 cpu_clear(cpu, per_cpu(cpu_sibling_map, i));
1302 cpus_clear(per_cpu(cpu_sibling_map, cpu));
1303
1304 c = &cpu_data(cpu);
1305
1306 c->core_id = 0;
1307 c->proc_id = -1;
1308
1309 smp_wmb();
1310
1311
1312 fixup_irqs();
1313
1314 local_irq_enable();
1315 mdelay(1);
1316 local_irq_disable();
1317
1318 ipi_call_lock();
1319 cpu_clear(cpu, cpu_online_map);
1320 ipi_call_unlock();
1321
1322 cpu_map_rebuild();
1323
1324 return 0;
1325}
1326
1327void __cpu_die(unsigned int cpu)
1328{
1329 int i;
1330
1331 for (i = 0; i < 100; i++) {
1332 smp_rmb();
1333 if (!cpu_isset(cpu, smp_commenced_mask))
1334 break;
1335 msleep(100);
1336 }
1337 if (cpu_isset(cpu, smp_commenced_mask)) {
1338 printk(KERN_ERR "CPU %u didn't die...\n", cpu);
1339 } else {
1340#if defined(CONFIG_SUN_LDOMS)
1341 unsigned long hv_err;
1342 int limit = 100;
1343
1344 do {
1345 hv_err = sun4v_cpu_stop(cpu);
1346 if (hv_err == HV_EOK) {
1347 cpu_clear(cpu, cpu_present_map);
1348 break;
1349 }
1350 } while (--limit > 0);
1351 if (limit <= 0) {
1352 printk(KERN_ERR "sun4v_cpu_stop() fails err=%lu\n",
1353 hv_err);
1354 }
1355#endif
1356 }
1357}
1358#endif
1359
1360void __init smp_cpus_done(unsigned int max_cpus)
1361{
1362 pcr_arch_init();
1363}
1364
1365void smp_send_reschedule(int cpu)
1366{
1367 xcall_deliver((u64) &xcall_receive_signal, 0, 0,
1368 &cpumask_of_cpu(cpu));
1369}
1370
1371void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs)
1372{
1373 clear_softint(1 << irq);
1374}
1375
1376
1377
1378
1379void smp_send_stop(void)
1380{
1381}
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
1397 size_t align)
1398{
1399 const unsigned long goal = __pa(MAX_DMA_ADDRESS);
1400#ifdef CONFIG_NEED_MULTIPLE_NODES
1401 int node = cpu_to_node(cpu);
1402 void *ptr;
1403
1404 if (!node_online(node) || !NODE_DATA(node)) {
1405 ptr = __alloc_bootmem(size, align, goal);
1406 pr_info("cpu %d has no node %d or node-local memory\n",
1407 cpu, node);
1408 pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
1409 cpu, size, __pa(ptr));
1410 } else {
1411 ptr = __alloc_bootmem_node(NODE_DATA(node),
1412 size, align, goal);
1413 pr_debug("per cpu data for cpu%d %lu bytes on node%d at "
1414 "%016lx\n", cpu, size, node, __pa(ptr));
1415 }
1416 return ptr;
1417#else
1418 return __alloc_bootmem(size, align, goal);
1419#endif
1420}
1421
1422static void __init pcpu_free_bootmem(void *ptr, size_t size)
1423{
1424 free_bootmem(__pa(ptr), size);
1425}
1426
1427static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
1428{
1429 if (cpu_to_node(from) == cpu_to_node(to))
1430 return LOCAL_DISTANCE;
1431 else
1432 return REMOTE_DISTANCE;
1433}
1434
1435static void __init pcpu_populate_pte(unsigned long addr)
1436{
1437 pgd_t *pgd = pgd_offset_k(addr);
1438 pud_t *pud;
1439 pmd_t *pmd;
1440
1441 pud = pud_offset(pgd, addr);
1442 if (pud_none(*pud)) {
1443 pmd_t *new;
1444
1445 new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
1446 pud_populate(&init_mm, pud, new);
1447 }
1448
1449 pmd = pmd_offset(pud, addr);
1450 if (!pmd_present(*pmd)) {
1451 pte_t *new;
1452
1453 new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
1454 pmd_populate_kernel(&init_mm, pmd, new);
1455 }
1456}
1457
1458void __init setup_per_cpu_areas(void)
1459{
1460 unsigned long delta;
1461 unsigned int cpu;
1462 int rc = -EINVAL;
1463
1464 if (pcpu_chosen_fc != PCPU_FC_PAGE) {
1465 rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
1466 PERCPU_DYNAMIC_RESERVE, 4 << 20,
1467 pcpu_cpu_distance,
1468 pcpu_alloc_bootmem,
1469 pcpu_free_bootmem);
1470 if (rc)
1471 pr_warning("PERCPU: %s allocator failed (%d), "
1472 "falling back to page size\n",
1473 pcpu_fc_names[pcpu_chosen_fc], rc);
1474 }
1475 if (rc < 0)
1476 rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
1477 pcpu_alloc_bootmem,
1478 pcpu_free_bootmem,
1479 pcpu_populate_pte);
1480 if (rc < 0)
1481 panic("cannot initialize percpu area (err=%d)", rc);
1482
1483 delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
1484 for_each_possible_cpu(cpu)
1485 __per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu];
1486
1487
1488 __local_per_cpu_offset = __per_cpu_offset(smp_processor_id());
1489
1490 of_fill_in_cpu_data();
1491 if (tlb_type == hypervisor)
1492 mdesc_fill_in_cpu_data(cpu_all_mask);
1493}
1494