1
2
3
4
5
6#include <linux/module.h>
7#include <linux/kernel.h>
8#include <linux/sched.h>
9#include <linux/mm.h>
10#include <linux/pagemap.h>
11#include <linux/threads.h>
12#include <linux/smp.h>
13#include <linux/interrupt.h>
14#include <linux/kernel_stat.h>
15#include <linux/delay.h>
16#include <linux/init.h>
17#include <linux/spinlock.h>
18#include <linux/fs.h>
19#include <linux/seq_file.h>
20#include <linux/cache.h>
21#include <linux/jiffies.h>
22#include <linux/profile.h>
23#include <linux/bootmem.h>
24#include <linux/vmalloc.h>
25#include <linux/cpu.h>
26
27#include <asm/head.h>
28#include <asm/ptrace.h>
29#include <asm/atomic.h>
30#include <asm/tlbflush.h>
31#include <asm/mmu_context.h>
32#include <asm/cpudata.h>
33#include <asm/hvtramp.h>
34#include <asm/io.h>
35#include <asm/timer.h>
36
37#include <asm/irq.h>
38#include <asm/irq_regs.h>
39#include <asm/page.h>
40#include <asm/pgtable.h>
41#include <asm/oplib.h>
42#include <asm/uaccess.h>
43#include <asm/starfire.h>
44#include <asm/tlb.h>
45#include <asm/sections.h>
46#include <asm/prom.h>
47#include <asm/mdesc.h>
48#include <asm/ldc.h>
49#include <asm/hypervisor.h>
50
51#include "cpumap.h"
52
53int sparc64_multi_core __read_mostly;
54
55DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE;
56cpumask_t cpu_core_map[NR_CPUS] __read_mostly =
57 { [0 ... NR_CPUS-1] = CPU_MASK_NONE };
58
59EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
60EXPORT_SYMBOL(cpu_core_map);
61
62static cpumask_t smp_commenced_mask;
63
64void smp_info(struct seq_file *m)
65{
66 int i;
67
68 seq_printf(m, "State:\n");
69 for_each_online_cpu(i)
70 seq_printf(m, "CPU%d:\t\tonline\n", i);
71}
72
73void smp_bogo(struct seq_file *m)
74{
75 int i;
76
77 for_each_online_cpu(i)
78 seq_printf(m,
79 "Cpu%dClkTck\t: %016lx\n",
80 i, cpu_data(i).clock_tick);
81}
82
83extern void setup_sparc64_timer(void);
84
85static volatile unsigned long callin_flag = 0;
86
87void __cpuinit smp_callin(void)
88{
89 int cpuid = hard_smp_processor_id();
90
91 __local_per_cpu_offset = __per_cpu_offset(cpuid);
92
93 if (tlb_type == hypervisor)
94 sun4v_ktsb_register();
95
96 __flush_tlb_all();
97
98 setup_sparc64_timer();
99
100 if (cheetah_pcache_forced_on)
101 cheetah_enable_pcache();
102
103 local_irq_enable();
104
105 callin_flag = 1;
106 __asm__ __volatile__("membar #Sync\n\t"
107 "flush %%g6" : : : "memory");
108
109
110
111
112 current_thread_info()->new_child = 0;
113
114
115 atomic_inc(&init_mm.mm_count);
116 current->active_mm = &init_mm;
117
118
119 notify_cpu_starting(cpuid);
120
121 while (!cpu_isset(cpuid, smp_commenced_mask))
122 rmb();
123
124 ipi_call_lock_irq();
125 cpu_set(cpuid, cpu_online_map);
126 ipi_call_unlock_irq();
127
128
129 preempt_disable();
130}
131
132void cpu_panic(void)
133{
134 printk("CPU[%d]: Returns from cpu_idle!\n", smp_processor_id());
135 panic("SMP bolixed\n");
136}
137
138
139
140
141
142
143
144
145#define MASTER 0
146#define SLAVE (SMP_CACHE_BYTES/sizeof(unsigned long))
147
148#define NUM_ROUNDS 64
149#define NUM_ITERS 5
150
151static DEFINE_SPINLOCK(itc_sync_lock);
152static unsigned long go[SLAVE + 1];
153
154#define DEBUG_TICK_SYNC 0
155
156static inline long get_delta (long *rt, long *master)
157{
158 unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0;
159 unsigned long tcenter, t0, t1, tm;
160 unsigned long i;
161
162 for (i = 0; i < NUM_ITERS; i++) {
163 t0 = tick_ops->get_tick();
164 go[MASTER] = 1;
165 membar_safe("#StoreLoad");
166 while (!(tm = go[SLAVE]))
167 rmb();
168 go[SLAVE] = 0;
169 wmb();
170 t1 = tick_ops->get_tick();
171
172 if (t1 - t0 < best_t1 - best_t0)
173 best_t0 = t0, best_t1 = t1, best_tm = tm;
174 }
175
176 *rt = best_t1 - best_t0;
177 *master = best_tm - best_t0;
178
179
180 tcenter = (best_t0/2 + best_t1/2);
181 if (best_t0 % 2 + best_t1 % 2 == 2)
182 tcenter++;
183 return tcenter - best_tm;
184}
185
186void smp_synchronize_tick_client(void)
187{
188 long i, delta, adj, adjust_latency = 0, done = 0;
189 unsigned long flags, rt, master_time_stamp, bound;
190#if DEBUG_TICK_SYNC
191 struct {
192 long rt;
193 long master;
194 long diff;
195 long lat;
196 } t[NUM_ROUNDS];
197#endif
198
199 go[MASTER] = 1;
200
201 while (go[MASTER])
202 rmb();
203
204 local_irq_save(flags);
205 {
206 for (i = 0; i < NUM_ROUNDS; i++) {
207 delta = get_delta(&rt, &master_time_stamp);
208 if (delta == 0) {
209 done = 1;
210 bound = rt;
211 }
212
213 if (!done) {
214 if (i > 0) {
215 adjust_latency += -delta;
216 adj = -delta + adjust_latency/4;
217 } else
218 adj = -delta;
219
220 tick_ops->add_tick(adj);
221 }
222#if DEBUG_TICK_SYNC
223 t[i].rt = rt;
224 t[i].master = master_time_stamp;
225 t[i].diff = delta;
226 t[i].lat = adjust_latency/4;
227#endif
228 }
229 }
230 local_irq_restore(flags);
231
232#if DEBUG_TICK_SYNC
233 for (i = 0; i < NUM_ROUNDS; i++)
234 printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n",
235 t[i].rt, t[i].master, t[i].diff, t[i].lat);
236#endif
237
238 printk(KERN_INFO "CPU %d: synchronized TICK with master CPU "
239 "(last diff %ld cycles, maxerr %lu cycles)\n",
240 smp_processor_id(), delta, rt);
241}
242
243static void smp_start_sync_tick_client(int cpu);
244
245static void smp_synchronize_one_tick(int cpu)
246{
247 unsigned long flags, i;
248
249 go[MASTER] = 0;
250
251 smp_start_sync_tick_client(cpu);
252
253
254 while (!go[MASTER])
255 rmb();
256
257
258 go[MASTER] = 0;
259 membar_safe("#StoreLoad");
260
261 spin_lock_irqsave(&itc_sync_lock, flags);
262 {
263 for (i = 0; i < NUM_ROUNDS*NUM_ITERS; i++) {
264 while (!go[MASTER])
265 rmb();
266 go[MASTER] = 0;
267 wmb();
268 go[SLAVE] = tick_ops->get_tick();
269 membar_safe("#StoreLoad");
270 }
271 }
272 spin_unlock_irqrestore(&itc_sync_lock, flags);
273}
274
275#if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU)
276
277static unsigned long kimage_addr_to_ra(void *p)
278{
279 unsigned long val = (unsigned long) p;
280
281 return kern_base + (val - KERNBASE);
282}
283
284static void __cpuinit ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg, void **descrp)
285{
286 extern unsigned long sparc64_ttable_tl0;
287 extern unsigned long kern_locked_tte_data;
288 struct hvtramp_descr *hdesc;
289 unsigned long trampoline_ra;
290 struct trap_per_cpu *tb;
291 u64 tte_vaddr, tte_data;
292 unsigned long hv_err;
293 int i;
294
295 hdesc = kzalloc(sizeof(*hdesc) +
296 (sizeof(struct hvtramp_mapping) *
297 num_kernel_image_mappings - 1),
298 GFP_KERNEL);
299 if (!hdesc) {
300 printk(KERN_ERR "ldom_startcpu_cpuid: Cannot allocate "
301 "hvtramp_descr.\n");
302 return;
303 }
304 *descrp = hdesc;
305
306 hdesc->cpu = cpu;
307 hdesc->num_mappings = num_kernel_image_mappings;
308
309 tb = &trap_block[cpu];
310
311 hdesc->fault_info_va = (unsigned long) &tb->fault_info;
312 hdesc->fault_info_pa = kimage_addr_to_ra(&tb->fault_info);
313
314 hdesc->thread_reg = thread_reg;
315
316 tte_vaddr = (unsigned long) KERNBASE;
317 tte_data = kern_locked_tte_data;
318
319 for (i = 0; i < hdesc->num_mappings; i++) {
320 hdesc->maps[i].vaddr = tte_vaddr;
321 hdesc->maps[i].tte = tte_data;
322 tte_vaddr += 0x400000;
323 tte_data += 0x400000;
324 }
325
326 trampoline_ra = kimage_addr_to_ra(hv_cpu_startup);
327
328 hv_err = sun4v_cpu_start(cpu, trampoline_ra,
329 kimage_addr_to_ra(&sparc64_ttable_tl0),
330 __pa(hdesc));
331 if (hv_err)
332 printk(KERN_ERR "ldom_startcpu_cpuid: sun4v_cpu_start() "
333 "gives error %lu\n", hv_err);
334}
335#endif
336
337extern unsigned long sparc64_cpu_startup;
338
339
340
341
342
343static struct thread_info *cpu_new_thread = NULL;
344
345static int __cpuinit smp_boot_one_cpu(unsigned int cpu)
346{
347 unsigned long entry =
348 (unsigned long)(&sparc64_cpu_startup);
349 unsigned long cookie =
350 (unsigned long)(&cpu_new_thread);
351 struct task_struct *p;
352 void *descr = NULL;
353 int timeout, ret;
354
355 p = fork_idle(cpu);
356 if (IS_ERR(p))
357 return PTR_ERR(p);
358 callin_flag = 0;
359 cpu_new_thread = task_thread_info(p);
360
361 if (tlb_type == hypervisor) {
362#if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU)
363 if (ldom_domaining_enabled)
364 ldom_startcpu_cpuid(cpu,
365 (unsigned long) cpu_new_thread,
366 &descr);
367 else
368#endif
369 prom_startcpu_cpuid(cpu, entry, cookie);
370 } else {
371 struct device_node *dp = of_find_node_by_cpuid(cpu);
372
373 prom_startcpu(dp->node, entry, cookie);
374 }
375
376 for (timeout = 0; timeout < 50000; timeout++) {
377 if (callin_flag)
378 break;
379 udelay(100);
380 }
381
382 if (callin_flag) {
383 ret = 0;
384 } else {
385 printk("Processor %d is stuck.\n", cpu);
386 ret = -ENODEV;
387 }
388 cpu_new_thread = NULL;
389
390 kfree(descr);
391
392 return ret;
393}
394
395static void spitfire_xcall_helper(u64 data0, u64 data1, u64 data2, u64 pstate, unsigned long cpu)
396{
397 u64 result, target;
398 int stuck, tmp;
399
400 if (this_is_starfire) {
401
402 cpu = (((cpu & 0x3c) << 1) |
403 ((cpu & 0x40) >> 4) |
404 (cpu & 0x3));
405 }
406
407 target = (cpu << 14) | 0x70;
408again:
409
410
411
412
413
414
415
416 tmp = 0x40;
417 __asm__ __volatile__(
418 "wrpr %1, %2, %%pstate\n\t"
419 "stxa %4, [%0] %3\n\t"
420 "stxa %5, [%0+%8] %3\n\t"
421 "add %0, %8, %0\n\t"
422 "stxa %6, [%0+%8] %3\n\t"
423 "membar #Sync\n\t"
424 "stxa %%g0, [%7] %3\n\t"
425 "membar #Sync\n\t"
426 "mov 0x20, %%g1\n\t"
427 "ldxa [%%g1] 0x7f, %%g0\n\t"
428 "membar #Sync"
429 : "=r" (tmp)
430 : "r" (pstate), "i" (PSTATE_IE), "i" (ASI_INTR_W),
431 "r" (data0), "r" (data1), "r" (data2), "r" (target),
432 "r" (0x10), "0" (tmp)
433 : "g1");
434
435
436 stuck = 100000;
437 do {
438 __asm__ __volatile__("ldxa [%%g0] %1, %0"
439 : "=r" (result)
440 : "i" (ASI_INTR_DISPATCH_STAT));
441 if (result == 0) {
442 __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
443 : : "r" (pstate));
444 return;
445 }
446 stuck -= 1;
447 if (stuck == 0)
448 break;
449 } while (result & 0x1);
450 __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
451 : : "r" (pstate));
452 if (stuck == 0) {
453 printk("CPU[%d]: mondo stuckage result[%016llx]\n",
454 smp_processor_id(), result);
455 } else {
456 udelay(2);
457 goto again;
458 }
459}
460
461static void spitfire_xcall_deliver(struct trap_per_cpu *tb, int cnt)
462{
463 u64 *mondo, data0, data1, data2;
464 u16 *cpu_list;
465 u64 pstate;
466 int i;
467
468 __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
469 cpu_list = __va(tb->cpu_list_pa);
470 mondo = __va(tb->cpu_mondo_block_pa);
471 data0 = mondo[0];
472 data1 = mondo[1];
473 data2 = mondo[2];
474 for (i = 0; i < cnt; i++)
475 spitfire_xcall_helper(data0, data1, data2, pstate, cpu_list[i]);
476}
477
478
479
480
481
482static void cheetah_xcall_deliver(struct trap_per_cpu *tb, int cnt)
483{
484 int nack_busy_id, is_jbus, need_more;
485 u64 *mondo, pstate, ver, busy_mask;
486 u16 *cpu_list;
487
488 cpu_list = __va(tb->cpu_list_pa);
489 mondo = __va(tb->cpu_mondo_block_pa);
490
491
492
493
494
495 __asm__ ("rdpr %%ver, %0" : "=r" (ver));
496 is_jbus = ((ver >> 32) == __JALAPENO_ID ||
497 (ver >> 32) == __SERRANO_ID);
498
499 __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
500
501retry:
502 need_more = 0;
503 __asm__ __volatile__("wrpr %0, %1, %%pstate\n\t"
504 : : "r" (pstate), "i" (PSTATE_IE));
505
506
507 __asm__ __volatile__("stxa %0, [%3] %6\n\t"
508 "stxa %1, [%4] %6\n\t"
509 "stxa %2, [%5] %6\n\t"
510 "membar #Sync\n\t"
511 :
512 : "r" (mondo[0]), "r" (mondo[1]), "r" (mondo[2]),
513 "r" (0x40), "r" (0x50), "r" (0x60),
514 "i" (ASI_INTR_W));
515
516 nack_busy_id = 0;
517 busy_mask = 0;
518 {
519 int i;
520
521 for (i = 0; i < cnt; i++) {
522 u64 target, nr;
523
524 nr = cpu_list[i];
525 if (nr == 0xffff)
526 continue;
527
528 target = (nr << 14) | 0x70;
529 if (is_jbus) {
530 busy_mask |= (0x1UL << (nr * 2));
531 } else {
532 target |= (nack_busy_id << 24);
533 busy_mask |= (0x1UL <<
534 (nack_busy_id * 2));
535 }
536 __asm__ __volatile__(
537 "stxa %%g0, [%0] %1\n\t"
538 "membar #Sync\n\t"
539 :
540 : "r" (target), "i" (ASI_INTR_W));
541 nack_busy_id++;
542 if (nack_busy_id == 32) {
543 need_more = 1;
544 break;
545 }
546 }
547 }
548
549
550 {
551 u64 dispatch_stat, nack_mask;
552 long stuck;
553
554 stuck = 100000 * nack_busy_id;
555 nack_mask = busy_mask << 1;
556 do {
557 __asm__ __volatile__("ldxa [%%g0] %1, %0"
558 : "=r" (dispatch_stat)
559 : "i" (ASI_INTR_DISPATCH_STAT));
560 if (!(dispatch_stat & (busy_mask | nack_mask))) {
561 __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
562 : : "r" (pstate));
563 if (unlikely(need_more)) {
564 int i, this_cnt = 0;
565 for (i = 0; i < cnt; i++) {
566 if (cpu_list[i] == 0xffff)
567 continue;
568 cpu_list[i] = 0xffff;
569 this_cnt++;
570 if (this_cnt == 32)
571 break;
572 }
573 goto retry;
574 }
575 return;
576 }
577 if (!--stuck)
578 break;
579 } while (dispatch_stat & busy_mask);
580
581 __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
582 : : "r" (pstate));
583
584 if (dispatch_stat & busy_mask) {
585
586
587
588 printk("CPU[%d]: mondo stuckage result[%016llx]\n",
589 smp_processor_id(), dispatch_stat);
590 } else {
591 int i, this_busy_nack = 0;
592
593
594
595
596 udelay(2 * nack_busy_id);
597
598
599
600
601 for (i = 0; i < cnt; i++) {
602 u64 check_mask, nr;
603
604 nr = cpu_list[i];
605 if (nr == 0xffff)
606 continue;
607
608 if (is_jbus)
609 check_mask = (0x2UL << (2*nr));
610 else
611 check_mask = (0x2UL <<
612 this_busy_nack);
613 if ((dispatch_stat & check_mask) == 0)
614 cpu_list[i] = 0xffff;
615 this_busy_nack += 2;
616 if (this_busy_nack == 64)
617 break;
618 }
619
620 goto retry;
621 }
622 }
623}
624
625
626static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt)
627{
628 int retries, this_cpu, prev_sent, i, saw_cpu_error;
629 unsigned long status;
630 u16 *cpu_list;
631
632 this_cpu = smp_processor_id();
633
634 cpu_list = __va(tb->cpu_list_pa);
635
636 saw_cpu_error = 0;
637 retries = 0;
638 prev_sent = 0;
639 do {
640 int forward_progress, n_sent;
641
642 status = sun4v_cpu_mondo_send(cnt,
643 tb->cpu_list_pa,
644 tb->cpu_mondo_block_pa);
645
646
647 if (likely(status == HV_EOK))
648 break;
649
650
651
652
653
654
655 n_sent = 0;
656 for (i = 0; i < cnt; i++) {
657 if (likely(cpu_list[i] == 0xffff))
658 n_sent++;
659 }
660
661 forward_progress = 0;
662 if (n_sent > prev_sent)
663 forward_progress = 1;
664
665 prev_sent = n_sent;
666
667
668
669
670
671 if (unlikely(status == HV_ECPUERROR)) {
672 for (i = 0; i < cnt; i++) {
673 long err;
674 u16 cpu;
675
676 cpu = cpu_list[i];
677 if (cpu == 0xffff)
678 continue;
679
680 err = sun4v_cpu_state(cpu);
681 if (err == HV_CPU_STATE_ERROR) {
682 saw_cpu_error = (cpu + 1);
683 cpu_list[i] = 0xffff;
684 }
685 }
686 } else if (unlikely(status != HV_EWOULDBLOCK))
687 goto fatal_mondo_error;
688
689
690
691
692
693
694
695
696 if (unlikely(!forward_progress)) {
697 if (unlikely(++retries > 10000))
698 goto fatal_mondo_timeout;
699
700
701
702
703 udelay(2 * cnt);
704 }
705 } while (1);
706
707 if (unlikely(saw_cpu_error))
708 goto fatal_mondo_cpu_error;
709
710 return;
711
712fatal_mondo_cpu_error:
713 printk(KERN_CRIT "CPU[%d]: SUN4V mondo cpu error, some target cpus "
714 "(including %d) were in error state\n",
715 this_cpu, saw_cpu_error - 1);
716 return;
717
718fatal_mondo_timeout:
719 printk(KERN_CRIT "CPU[%d]: SUN4V mondo timeout, no forward "
720 " progress after %d retries.\n",
721 this_cpu, retries);
722 goto dump_cpu_list_and_out;
723
724fatal_mondo_error:
725 printk(KERN_CRIT "CPU[%d]: Unexpected SUN4V mondo error %lu\n",
726 this_cpu, status);
727 printk(KERN_CRIT "CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) "
728 "mondo_block_pa(%lx)\n",
729 this_cpu, cnt, tb->cpu_list_pa, tb->cpu_mondo_block_pa);
730
731dump_cpu_list_and_out:
732 printk(KERN_CRIT "CPU[%d]: CPU list [ ", this_cpu);
733 for (i = 0; i < cnt; i++)
734 printk("%u ", cpu_list[i]);
735 printk("]\n");
736}
737
738static void (*xcall_deliver_impl)(struct trap_per_cpu *, int);
739
740static void xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask)
741{
742 struct trap_per_cpu *tb;
743 int this_cpu, i, cnt;
744 unsigned long flags;
745 u16 *cpu_list;
746 u64 *mondo;
747
748
749
750
751
752
753
754
755
756
757
758 local_irq_save(flags);
759
760 this_cpu = smp_processor_id();
761 tb = &trap_block[this_cpu];
762
763 mondo = __va(tb->cpu_mondo_block_pa);
764 mondo[0] = data0;
765 mondo[1] = data1;
766 mondo[2] = data2;
767 wmb();
768
769 cpu_list = __va(tb->cpu_list_pa);
770
771
772 cnt = 0;
773 for_each_cpu(i, mask) {
774 if (i == this_cpu || !cpu_online(i))
775 continue;
776 cpu_list[cnt++] = i;
777 }
778
779 if (cnt)
780 xcall_deliver_impl(tb, cnt);
781
782 local_irq_restore(flags);
783}
784
785
786
787
788
789static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 data2, const cpumask_t *mask)
790{
791 u64 data0 = (((u64)ctx)<<32 | (((u64)func) & 0xffffffff));
792
793 xcall_deliver(data0, data1, data2, mask);
794}
795
796
797static void smp_cross_call(unsigned long *func, u32 ctx, u64 data1, u64 data2)
798{
799 smp_cross_call_masked(func, ctx, data1, data2, &cpu_online_map);
800}
801
802extern unsigned long xcall_sync_tick;
803
804static void smp_start_sync_tick_client(int cpu)
805{
806 xcall_deliver((u64) &xcall_sync_tick, 0, 0,
807 &cpumask_of_cpu(cpu));
808}
809
810extern unsigned long xcall_call_function;
811
812void arch_send_call_function_ipi_mask(const struct cpumask *mask)
813{
814 xcall_deliver((u64) &xcall_call_function, 0, 0, mask);
815}
816
817extern unsigned long xcall_call_function_single;
818
819void arch_send_call_function_single_ipi(int cpu)
820{
821 xcall_deliver((u64) &xcall_call_function_single, 0, 0,
822 &cpumask_of_cpu(cpu));
823}
824
825void smp_call_function_client(int irq, struct pt_regs *regs)
826{
827 clear_softint(1 << irq);
828 generic_smp_call_function_interrupt();
829}
830
831void smp_call_function_single_client(int irq, struct pt_regs *regs)
832{
833 clear_softint(1 << irq);
834 generic_smp_call_function_single_interrupt();
835}
836
837static void tsb_sync(void *info)
838{
839 struct trap_per_cpu *tp = &trap_block[raw_smp_processor_id()];
840 struct mm_struct *mm = info;
841
842
843
844
845
846
847
848 if (tp->pgd_paddr == __pa(mm->pgd))
849 tsb_context_switch(mm);
850}
851
852void smp_tsb_sync(struct mm_struct *mm)
853{
854 smp_call_function_many(mm_cpumask(mm), tsb_sync, mm, 1);
855}
856
857extern unsigned long xcall_flush_tlb_mm;
858extern unsigned long xcall_flush_tlb_pending;
859extern unsigned long xcall_flush_tlb_kernel_range;
860extern unsigned long xcall_fetch_glob_regs;
861extern unsigned long xcall_receive_signal;
862extern unsigned long xcall_new_mmu_context_version;
863#ifdef CONFIG_KGDB
864extern unsigned long xcall_kgdb_capture;
865#endif
866
867#ifdef DCACHE_ALIASING_POSSIBLE
868extern unsigned long xcall_flush_dcache_page_cheetah;
869#endif
870extern unsigned long xcall_flush_dcache_page_spitfire;
871
872#ifdef CONFIG_DEBUG_DCFLUSH
873extern atomic_t dcpage_flushes;
874extern atomic_t dcpage_flushes_xcall;
875#endif
876
877static inline void __local_flush_dcache_page(struct page *page)
878{
879#ifdef DCACHE_ALIASING_POSSIBLE
880 __flush_dcache_page(page_address(page),
881 ((tlb_type == spitfire) &&
882 page_mapping(page) != NULL));
883#else
884 if (page_mapping(page) != NULL &&
885 tlb_type == spitfire)
886 __flush_icache_page(__pa(page_address(page)));
887#endif
888}
889
890void smp_flush_dcache_page_impl(struct page *page, int cpu)
891{
892 int this_cpu;
893
894 if (tlb_type == hypervisor)
895 return;
896
897#ifdef CONFIG_DEBUG_DCFLUSH
898 atomic_inc(&dcpage_flushes);
899#endif
900
901 this_cpu = get_cpu();
902
903 if (cpu == this_cpu) {
904 __local_flush_dcache_page(page);
905 } else if (cpu_online(cpu)) {
906 void *pg_addr = page_address(page);
907 u64 data0 = 0;
908
909 if (tlb_type == spitfire) {
910 data0 = ((u64)&xcall_flush_dcache_page_spitfire);
911 if (page_mapping(page) != NULL)
912 data0 |= ((u64)1 << 32);
913 } else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
914#ifdef DCACHE_ALIASING_POSSIBLE
915 data0 = ((u64)&xcall_flush_dcache_page_cheetah);
916#endif
917 }
918 if (data0) {
919 xcall_deliver(data0, __pa(pg_addr),
920 (u64) pg_addr, &cpumask_of_cpu(cpu));
921#ifdef CONFIG_DEBUG_DCFLUSH
922 atomic_inc(&dcpage_flushes_xcall);
923#endif
924 }
925 }
926
927 put_cpu();
928}
929
930void flush_dcache_page_all(struct mm_struct *mm, struct page *page)
931{
932 void *pg_addr;
933 int this_cpu;
934 u64 data0;
935
936 if (tlb_type == hypervisor)
937 return;
938
939 this_cpu = get_cpu();
940
941#ifdef CONFIG_DEBUG_DCFLUSH
942 atomic_inc(&dcpage_flushes);
943#endif
944 data0 = 0;
945 pg_addr = page_address(page);
946 if (tlb_type == spitfire) {
947 data0 = ((u64)&xcall_flush_dcache_page_spitfire);
948 if (page_mapping(page) != NULL)
949 data0 |= ((u64)1 << 32);
950 } else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
951#ifdef DCACHE_ALIASING_POSSIBLE
952 data0 = ((u64)&xcall_flush_dcache_page_cheetah);
953#endif
954 }
955 if (data0) {
956 xcall_deliver(data0, __pa(pg_addr),
957 (u64) pg_addr, &cpu_online_map);
958#ifdef CONFIG_DEBUG_DCFLUSH
959 atomic_inc(&dcpage_flushes_xcall);
960#endif
961 }
962 __local_flush_dcache_page(page);
963
964 put_cpu();
965}
966
967void smp_new_mmu_context_version_client(int irq, struct pt_regs *regs)
968{
969 struct mm_struct *mm;
970 unsigned long flags;
971
972 clear_softint(1 << irq);
973
974
975
976
977 mm = current->active_mm;
978 if (unlikely(!mm || (mm == &init_mm)))
979 return;
980
981 spin_lock_irqsave(&mm->context.lock, flags);
982
983 if (unlikely(!CTX_VALID(mm->context)))
984 get_new_mmu_context(mm);
985
986 spin_unlock_irqrestore(&mm->context.lock, flags);
987
988 load_secondary_context(mm);
989 __flush_tlb_mm(CTX_HWBITS(mm->context),
990 SECONDARY_CONTEXT);
991}
992
993void smp_new_mmu_context_version(void)
994{
995 smp_cross_call(&xcall_new_mmu_context_version, 0, 0, 0);
996}
997
998#ifdef CONFIG_KGDB
999void kgdb_roundup_cpus(unsigned long flags)
1000{
1001 smp_cross_call(&xcall_kgdb_capture, 0, 0, 0);
1002}
1003#endif
1004
1005void smp_fetch_global_regs(void)
1006{
1007 smp_cross_call(&xcall_fetch_glob_regs, 0, 0, 0);
1008}
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053void smp_flush_tlb_mm(struct mm_struct *mm)
1054{
1055 u32 ctx = CTX_HWBITS(mm->context);
1056 int cpu = get_cpu();
1057
1058 if (atomic_read(&mm->mm_users) == 1) {
1059 cpumask_copy(mm_cpumask(mm), cpumask_of(cpu));
1060 goto local_flush_and_out;
1061 }
1062
1063 smp_cross_call_masked(&xcall_flush_tlb_mm,
1064 ctx, 0, 0,
1065 mm_cpumask(mm));
1066
1067local_flush_and_out:
1068 __flush_tlb_mm(ctx, SECONDARY_CONTEXT);
1069
1070 put_cpu();
1071}
1072
1073void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long *vaddrs)
1074{
1075 u32 ctx = CTX_HWBITS(mm->context);
1076 int cpu = get_cpu();
1077
1078 if (mm == current->mm && atomic_read(&mm->mm_users) == 1)
1079 cpumask_copy(mm_cpumask(mm), cpumask_of(cpu));
1080 else
1081 smp_cross_call_masked(&xcall_flush_tlb_pending,
1082 ctx, nr, (unsigned long) vaddrs,
1083 mm_cpumask(mm));
1084
1085 __flush_tlb_pending(ctx, nr, vaddrs);
1086
1087 put_cpu();
1088}
1089
1090void smp_flush_tlb_kernel_range(unsigned long start, unsigned long end)
1091{
1092 start &= PAGE_MASK;
1093 end = PAGE_ALIGN(end);
1094 if (start != end) {
1095 smp_cross_call(&xcall_flush_tlb_kernel_range,
1096 0, start, end);
1097
1098 __flush_tlb_kernel_range(start, end);
1099 }
1100}
1101
1102
1103
1104extern unsigned long xcall_capture;
1105
1106static atomic_t smp_capture_depth = ATOMIC_INIT(0);
1107static atomic_t smp_capture_registry = ATOMIC_INIT(0);
1108static unsigned long penguins_are_doing_time;
1109
1110void smp_capture(void)
1111{
1112 int result = atomic_add_ret(1, &smp_capture_depth);
1113
1114 if (result == 1) {
1115 int ncpus = num_online_cpus();
1116
1117#ifdef CAPTURE_DEBUG
1118 printk("CPU[%d]: Sending penguins to jail...",
1119 smp_processor_id());
1120#endif
1121 penguins_are_doing_time = 1;
1122 atomic_inc(&smp_capture_registry);
1123 smp_cross_call(&xcall_capture, 0, 0, 0);
1124 while (atomic_read(&smp_capture_registry) != ncpus)
1125 rmb();
1126#ifdef CAPTURE_DEBUG
1127 printk("done\n");
1128#endif
1129 }
1130}
1131
1132void smp_release(void)
1133{
1134 if (atomic_dec_and_test(&smp_capture_depth)) {
1135#ifdef CAPTURE_DEBUG
1136 printk("CPU[%d]: Giving pardon to "
1137 "imprisoned penguins\n",
1138 smp_processor_id());
1139#endif
1140 penguins_are_doing_time = 0;
1141 membar_safe("#StoreLoad");
1142 atomic_dec(&smp_capture_registry);
1143 }
1144}
1145
1146
1147
1148
1149extern void prom_world(int);
1150
1151void smp_penguin_jailcell(int irq, struct pt_regs *regs)
1152{
1153 clear_softint(1 << irq);
1154
1155 preempt_disable();
1156
1157 __asm__ __volatile__("flushw");
1158 prom_world(1);
1159 atomic_inc(&smp_capture_registry);
1160 membar_safe("#StoreLoad");
1161 while (penguins_are_doing_time)
1162 rmb();
1163 atomic_dec(&smp_capture_registry);
1164 prom_world(0);
1165
1166 preempt_enable();
1167}
1168
1169
1170int setup_profiling_timer(unsigned int multiplier)
1171{
1172 return -EINVAL;
1173}
1174
1175void __init smp_prepare_cpus(unsigned int max_cpus)
1176{
1177}
1178
1179void __devinit smp_prepare_boot_cpu(void)
1180{
1181}
1182
1183void __init smp_setup_processor_id(void)
1184{
1185 if (tlb_type == spitfire)
1186 xcall_deliver_impl = spitfire_xcall_deliver;
1187 else if (tlb_type == cheetah || tlb_type == cheetah_plus)
1188 xcall_deliver_impl = cheetah_xcall_deliver;
1189 else
1190 xcall_deliver_impl = hypervisor_xcall_deliver;
1191}
1192
1193void __devinit smp_fill_in_sib_core_maps(void)
1194{
1195 unsigned int i;
1196
1197 for_each_present_cpu(i) {
1198 unsigned int j;
1199
1200 cpus_clear(cpu_core_map[i]);
1201 if (cpu_data(i).core_id == 0) {
1202 cpu_set(i, cpu_core_map[i]);
1203 continue;
1204 }
1205
1206 for_each_present_cpu(j) {
1207 if (cpu_data(i).core_id ==
1208 cpu_data(j).core_id)
1209 cpu_set(j, cpu_core_map[i]);
1210 }
1211 }
1212
1213 for_each_present_cpu(i) {
1214 unsigned int j;
1215
1216 cpus_clear(per_cpu(cpu_sibling_map, i));
1217 if (cpu_data(i).proc_id == -1) {
1218 cpu_set(i, per_cpu(cpu_sibling_map, i));
1219 continue;
1220 }
1221
1222 for_each_present_cpu(j) {
1223 if (cpu_data(i).proc_id ==
1224 cpu_data(j).proc_id)
1225 cpu_set(j, per_cpu(cpu_sibling_map, i));
1226 }
1227 }
1228}
1229
1230int __cpuinit __cpu_up(unsigned int cpu)
1231{
1232 int ret = smp_boot_one_cpu(cpu);
1233
1234 if (!ret) {
1235 cpu_set(cpu, smp_commenced_mask);
1236 while (!cpu_isset(cpu, cpu_online_map))
1237 mb();
1238 if (!cpu_isset(cpu, cpu_online_map)) {
1239 ret = -ENODEV;
1240 } else {
1241
1242
1243
1244 if (tlb_type != hypervisor)
1245 smp_synchronize_one_tick(cpu);
1246 }
1247 }
1248 return ret;
1249}
1250
1251#ifdef CONFIG_HOTPLUG_CPU
1252void cpu_play_dead(void)
1253{
1254 int cpu = smp_processor_id();
1255 unsigned long pstate;
1256
1257 idle_task_exit();
1258
1259 if (tlb_type == hypervisor) {
1260 struct trap_per_cpu *tb = &trap_block[cpu];
1261
1262 sun4v_cpu_qconf(HV_CPU_QUEUE_CPU_MONDO,
1263 tb->cpu_mondo_pa, 0);
1264 sun4v_cpu_qconf(HV_CPU_QUEUE_DEVICE_MONDO,
1265 tb->dev_mondo_pa, 0);
1266 sun4v_cpu_qconf(HV_CPU_QUEUE_RES_ERROR,
1267 tb->resum_mondo_pa, 0);
1268 sun4v_cpu_qconf(HV_CPU_QUEUE_NONRES_ERROR,
1269 tb->nonresum_mondo_pa, 0);
1270 }
1271
1272 cpu_clear(cpu, smp_commenced_mask);
1273 membar_safe("#Sync");
1274
1275 local_irq_disable();
1276
1277 __asm__ __volatile__(
1278 "rdpr %%pstate, %0\n\t"
1279 "wrpr %0, %1, %%pstate"
1280 : "=r" (pstate)
1281 : "i" (PSTATE_IE));
1282
1283 while (1)
1284 barrier();
1285}
1286
1287int __cpu_disable(void)
1288{
1289 int cpu = smp_processor_id();
1290 cpuinfo_sparc *c;
1291 int i;
1292
1293 for_each_cpu_mask(i, cpu_core_map[cpu])
1294 cpu_clear(cpu, cpu_core_map[i]);
1295 cpus_clear(cpu_core_map[cpu]);
1296
1297 for_each_cpu_mask(i, per_cpu(cpu_sibling_map, cpu))
1298 cpu_clear(cpu, per_cpu(cpu_sibling_map, i));
1299 cpus_clear(per_cpu(cpu_sibling_map, cpu));
1300
1301 c = &cpu_data(cpu);
1302
1303 c->core_id = 0;
1304 c->proc_id = -1;
1305
1306 smp_wmb();
1307
1308
1309 fixup_irqs();
1310
1311 local_irq_enable();
1312 mdelay(1);
1313 local_irq_disable();
1314
1315 ipi_call_lock();
1316 cpu_clear(cpu, cpu_online_map);
1317 ipi_call_unlock();
1318
1319 cpu_map_rebuild();
1320
1321 return 0;
1322}
1323
1324void __cpu_die(unsigned int cpu)
1325{
1326 int i;
1327
1328 for (i = 0; i < 100; i++) {
1329 smp_rmb();
1330 if (!cpu_isset(cpu, smp_commenced_mask))
1331 break;
1332 msleep(100);
1333 }
1334 if (cpu_isset(cpu, smp_commenced_mask)) {
1335 printk(KERN_ERR "CPU %u didn't die...\n", cpu);
1336 } else {
1337#if defined(CONFIG_SUN_LDOMS)
1338 unsigned long hv_err;
1339 int limit = 100;
1340
1341 do {
1342 hv_err = sun4v_cpu_stop(cpu);
1343 if (hv_err == HV_EOK) {
1344 cpu_clear(cpu, cpu_present_map);
1345 break;
1346 }
1347 } while (--limit > 0);
1348 if (limit <= 0) {
1349 printk(KERN_ERR "sun4v_cpu_stop() fails err=%lu\n",
1350 hv_err);
1351 }
1352#endif
1353 }
1354}
1355#endif
1356
1357void __init smp_cpus_done(unsigned int max_cpus)
1358{
1359}
1360
1361void smp_send_reschedule(int cpu)
1362{
1363 xcall_deliver((u64) &xcall_receive_signal, 0, 0,
1364 &cpumask_of_cpu(cpu));
1365}
1366
1367void smp_receive_signal_client(int irq, struct pt_regs *regs)
1368{
1369 clear_softint(1 << irq);
1370}
1371
1372
1373
1374
1375void smp_send_stop(void)
1376{
1377}
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
1393 size_t align)
1394{
1395 const unsigned long goal = __pa(MAX_DMA_ADDRESS);
1396#ifdef CONFIG_NEED_MULTIPLE_NODES
1397 int node = cpu_to_node(cpu);
1398 void *ptr;
1399
1400 if (!node_online(node) || !NODE_DATA(node)) {
1401 ptr = __alloc_bootmem(size, align, goal);
1402 pr_info("cpu %d has no node %d or node-local memory\n",
1403 cpu, node);
1404 pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
1405 cpu, size, __pa(ptr));
1406 } else {
1407 ptr = __alloc_bootmem_node(NODE_DATA(node),
1408 size, align, goal);
1409 pr_debug("per cpu data for cpu%d %lu bytes on node%d at "
1410 "%016lx\n", cpu, size, node, __pa(ptr));
1411 }
1412 return ptr;
1413#else
1414 return __alloc_bootmem(size, align, goal);
1415#endif
1416}
1417
1418static void __init pcpu_free_bootmem(void *ptr, size_t size)
1419{
1420 free_bootmem(__pa(ptr), size);
1421}
1422
1423static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
1424{
1425 if (cpu_to_node(from) == cpu_to_node(to))
1426 return LOCAL_DISTANCE;
1427 else
1428 return REMOTE_DISTANCE;
1429}
1430
1431static void __init pcpu_populate_pte(unsigned long addr)
1432{
1433 pgd_t *pgd = pgd_offset_k(addr);
1434 pud_t *pud;
1435 pmd_t *pmd;
1436
1437 pud = pud_offset(pgd, addr);
1438 if (pud_none(*pud)) {
1439 pmd_t *new;
1440
1441 new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
1442 pud_populate(&init_mm, pud, new);
1443 }
1444
1445 pmd = pmd_offset(pud, addr);
1446 if (!pmd_present(*pmd)) {
1447 pte_t *new;
1448
1449 new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
1450 pmd_populate_kernel(&init_mm, pmd, new);
1451 }
1452}
1453
1454void __init setup_per_cpu_areas(void)
1455{
1456 unsigned long delta;
1457 unsigned int cpu;
1458 int rc = -EINVAL;
1459
1460 if (pcpu_chosen_fc != PCPU_FC_PAGE) {
1461 rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
1462 PERCPU_DYNAMIC_RESERVE, 4 << 20,
1463 pcpu_cpu_distance,
1464 pcpu_alloc_bootmem,
1465 pcpu_free_bootmem);
1466 if (rc)
1467 pr_warning("PERCPU: %s allocator failed (%d), "
1468 "falling back to page size\n",
1469 pcpu_fc_names[pcpu_chosen_fc], rc);
1470 }
1471 if (rc < 0)
1472 rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
1473 pcpu_alloc_bootmem,
1474 pcpu_free_bootmem,
1475 pcpu_populate_pte);
1476 if (rc < 0)
1477 panic("cannot initialize percpu area (err=%d)", rc);
1478
1479 delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
1480 for_each_possible_cpu(cpu)
1481 __per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu];
1482
1483
1484 __local_per_cpu_offset = __per_cpu_offset(smp_processor_id());
1485
1486 of_fill_in_cpu_data();
1487 if (tlb_type == hypervisor)
1488 mdesc_fill_in_cpu_data(cpu_all_mask);
1489}
1490