1
2
3
4
5
6#include <linux/export.h>
7#include <linux/kernel.h>
8#include <linux/sched.h>
9#include <linux/mm.h>
10#include <linux/pagemap.h>
11#include <linux/threads.h>
12#include <linux/smp.h>
13#include <linux/interrupt.h>
14#include <linux/kernel_stat.h>
15#include <linux/delay.h>
16#include <linux/init.h>
17#include <linux/spinlock.h>
18#include <linux/fs.h>
19#include <linux/seq_file.h>
20#include <linux/cache.h>
21#include <linux/jiffies.h>
22#include <linux/profile.h>
23#include <linux/bootmem.h>
24#include <linux/vmalloc.h>
25#include <linux/ftrace.h>
26#include <linux/cpu.h>
27#include <linux/slab.h>
28
29#include <asm/head.h>
30#include <asm/ptrace.h>
31#include <linux/atomic.h>
32#include <asm/tlbflush.h>
33#include <asm/mmu_context.h>
34#include <asm/cpudata.h>
35#include <asm/hvtramp.h>
36#include <asm/io.h>
37#include <asm/timer.h>
38
39#include <asm/irq.h>
40#include <asm/irq_regs.h>
41#include <asm/page.h>
42#include <asm/pgtable.h>
43#include <asm/oplib.h>
44#include <asm/uaccess.h>
45#include <asm/starfire.h>
46#include <asm/tlb.h>
47#include <asm/sections.h>
48#include <asm/prom.h>
49#include <asm/mdesc.h>
50#include <asm/ldc.h>
51#include <asm/hypervisor.h>
52#include <asm/pcr.h>
53
54#include "cpumap.h"
55
56int sparc64_multi_core __read_mostly;
57
58DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE;
59cpumask_t cpu_core_map[NR_CPUS] __read_mostly =
60 { [0 ... NR_CPUS-1] = CPU_MASK_NONE };
61
62EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
63EXPORT_SYMBOL(cpu_core_map);
64
65static cpumask_t smp_commenced_mask;
66
67void smp_info(struct seq_file *m)
68{
69 int i;
70
71 seq_printf(m, "State:\n");
72 for_each_online_cpu(i)
73 seq_printf(m, "CPU%d:\t\tonline\n", i);
74}
75
76void smp_bogo(struct seq_file *m)
77{
78 int i;
79
80 for_each_online_cpu(i)
81 seq_printf(m,
82 "Cpu%dClkTck\t: %016lx\n",
83 i, cpu_data(i).clock_tick);
84}
85
86extern void setup_sparc64_timer(void);
87
88static volatile unsigned long callin_flag = 0;
89
90void __cpuinit smp_callin(void)
91{
92 int cpuid = hard_smp_processor_id();
93
94 __local_per_cpu_offset = __per_cpu_offset(cpuid);
95
96 if (tlb_type == hypervisor)
97 sun4v_ktsb_register();
98
99 __flush_tlb_all();
100
101 setup_sparc64_timer();
102
103 if (cheetah_pcache_forced_on)
104 cheetah_enable_pcache();
105
106 callin_flag = 1;
107 __asm__ __volatile__("membar #Sync\n\t"
108 "flush %%g6" : : : "memory");
109
110
111
112
113 current_thread_info()->new_child = 0;
114
115
116 atomic_inc(&init_mm.mm_count);
117 current->active_mm = &init_mm;
118
119
120 notify_cpu_starting(cpuid);
121
122 while (!cpumask_test_cpu(cpuid, &smp_commenced_mask))
123 rmb();
124
125 set_cpu_online(cpuid, true);
126 local_irq_enable();
127
128
129 preempt_disable();
130
131 cpu_startup_entry(CPUHP_ONLINE);
132}
133
134void cpu_panic(void)
135{
136 printk("CPU[%d]: Returns from cpu_idle!\n", smp_processor_id());
137 panic("SMP bolixed\n");
138}
139
140
141
142
143
144
145
146
147#define MASTER 0
148#define SLAVE (SMP_CACHE_BYTES/sizeof(unsigned long))
149
150#define NUM_ROUNDS 64
151#define NUM_ITERS 5
152
153static DEFINE_SPINLOCK(itc_sync_lock);
154static unsigned long go[SLAVE + 1];
155
156#define DEBUG_TICK_SYNC 0
157
158static inline long get_delta (long *rt, long *master)
159{
160 unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0;
161 unsigned long tcenter, t0, t1, tm;
162 unsigned long i;
163
164 for (i = 0; i < NUM_ITERS; i++) {
165 t0 = tick_ops->get_tick();
166 go[MASTER] = 1;
167 membar_safe("#StoreLoad");
168 while (!(tm = go[SLAVE]))
169 rmb();
170 go[SLAVE] = 0;
171 wmb();
172 t1 = tick_ops->get_tick();
173
174 if (t1 - t0 < best_t1 - best_t0)
175 best_t0 = t0, best_t1 = t1, best_tm = tm;
176 }
177
178 *rt = best_t1 - best_t0;
179 *master = best_tm - best_t0;
180
181
182 tcenter = (best_t0/2 + best_t1/2);
183 if (best_t0 % 2 + best_t1 % 2 == 2)
184 tcenter++;
185 return tcenter - best_tm;
186}
187
188void smp_synchronize_tick_client(void)
189{
190 long i, delta, adj, adjust_latency = 0, done = 0;
191 unsigned long flags, rt, master_time_stamp;
192#if DEBUG_TICK_SYNC
193 struct {
194 long rt;
195 long master;
196 long diff;
197 long lat;
198 } t[NUM_ROUNDS];
199#endif
200
201 go[MASTER] = 1;
202
203 while (go[MASTER])
204 rmb();
205
206 local_irq_save(flags);
207 {
208 for (i = 0; i < NUM_ROUNDS; i++) {
209 delta = get_delta(&rt, &master_time_stamp);
210 if (delta == 0)
211 done = 1;
212
213 if (!done) {
214 if (i > 0) {
215 adjust_latency += -delta;
216 adj = -delta + adjust_latency/4;
217 } else
218 adj = -delta;
219
220 tick_ops->add_tick(adj);
221 }
222#if DEBUG_TICK_SYNC
223 t[i].rt = rt;
224 t[i].master = master_time_stamp;
225 t[i].diff = delta;
226 t[i].lat = adjust_latency/4;
227#endif
228 }
229 }
230 local_irq_restore(flags);
231
232#if DEBUG_TICK_SYNC
233 for (i = 0; i < NUM_ROUNDS; i++)
234 printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n",
235 t[i].rt, t[i].master, t[i].diff, t[i].lat);
236#endif
237
238 printk(KERN_INFO "CPU %d: synchronized TICK with master CPU "
239 "(last diff %ld cycles, maxerr %lu cycles)\n",
240 smp_processor_id(), delta, rt);
241}
242
243static void smp_start_sync_tick_client(int cpu);
244
245static void smp_synchronize_one_tick(int cpu)
246{
247 unsigned long flags, i;
248
249 go[MASTER] = 0;
250
251 smp_start_sync_tick_client(cpu);
252
253
254 while (!go[MASTER])
255 rmb();
256
257
258 go[MASTER] = 0;
259 membar_safe("#StoreLoad");
260
261 spin_lock_irqsave(&itc_sync_lock, flags);
262 {
263 for (i = 0; i < NUM_ROUNDS*NUM_ITERS; i++) {
264 while (!go[MASTER])
265 rmb();
266 go[MASTER] = 0;
267 wmb();
268 go[SLAVE] = tick_ops->get_tick();
269 membar_safe("#StoreLoad");
270 }
271 }
272 spin_unlock_irqrestore(&itc_sync_lock, flags);
273}
274
275#if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU)
276
277static unsigned long kimage_addr_to_ra(void *p)
278{
279 unsigned long val = (unsigned long) p;
280
281 return kern_base + (val - KERNBASE);
282}
283
284static void __cpuinit ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg, void **descrp)
285{
286 extern unsigned long sparc64_ttable_tl0;
287 extern unsigned long kern_locked_tte_data;
288 struct hvtramp_descr *hdesc;
289 unsigned long trampoline_ra;
290 struct trap_per_cpu *tb;
291 u64 tte_vaddr, tte_data;
292 unsigned long hv_err;
293 int i;
294
295 hdesc = kzalloc(sizeof(*hdesc) +
296 (sizeof(struct hvtramp_mapping) *
297 num_kernel_image_mappings - 1),
298 GFP_KERNEL);
299 if (!hdesc) {
300 printk(KERN_ERR "ldom_startcpu_cpuid: Cannot allocate "
301 "hvtramp_descr.\n");
302 return;
303 }
304 *descrp = hdesc;
305
306 hdesc->cpu = cpu;
307 hdesc->num_mappings = num_kernel_image_mappings;
308
309 tb = &trap_block[cpu];
310
311 hdesc->fault_info_va = (unsigned long) &tb->fault_info;
312 hdesc->fault_info_pa = kimage_addr_to_ra(&tb->fault_info);
313
314 hdesc->thread_reg = thread_reg;
315
316 tte_vaddr = (unsigned long) KERNBASE;
317 tte_data = kern_locked_tte_data;
318
319 for (i = 0; i < hdesc->num_mappings; i++) {
320 hdesc->maps[i].vaddr = tte_vaddr;
321 hdesc->maps[i].tte = tte_data;
322 tte_vaddr += 0x400000;
323 tte_data += 0x400000;
324 }
325
326 trampoline_ra = kimage_addr_to_ra(hv_cpu_startup);
327
328 hv_err = sun4v_cpu_start(cpu, trampoline_ra,
329 kimage_addr_to_ra(&sparc64_ttable_tl0),
330 __pa(hdesc));
331 if (hv_err)
332 printk(KERN_ERR "ldom_startcpu_cpuid: sun4v_cpu_start() "
333 "gives error %lu\n", hv_err);
334}
335#endif
336
337extern unsigned long sparc64_cpu_startup;
338
339
340
341
342
343static struct thread_info *cpu_new_thread = NULL;
344
345static int __cpuinit smp_boot_one_cpu(unsigned int cpu, struct task_struct *idle)
346{
347 unsigned long entry =
348 (unsigned long)(&sparc64_cpu_startup);
349 unsigned long cookie =
350 (unsigned long)(&cpu_new_thread);
351 void *descr = NULL;
352 int timeout, ret;
353
354 callin_flag = 0;
355 cpu_new_thread = task_thread_info(idle);
356
357 if (tlb_type == hypervisor) {
358#if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU)
359 if (ldom_domaining_enabled)
360 ldom_startcpu_cpuid(cpu,
361 (unsigned long) cpu_new_thread,
362 &descr);
363 else
364#endif
365 prom_startcpu_cpuid(cpu, entry, cookie);
366 } else {
367 struct device_node *dp = of_find_node_by_cpuid(cpu);
368
369 prom_startcpu(dp->phandle, entry, cookie);
370 }
371
372 for (timeout = 0; timeout < 50000; timeout++) {
373 if (callin_flag)
374 break;
375 udelay(100);
376 }
377
378 if (callin_flag) {
379 ret = 0;
380 } else {
381 printk("Processor %d is stuck.\n", cpu);
382 ret = -ENODEV;
383 }
384 cpu_new_thread = NULL;
385
386 kfree(descr);
387
388 return ret;
389}
390
391static void spitfire_xcall_helper(u64 data0, u64 data1, u64 data2, u64 pstate, unsigned long cpu)
392{
393 u64 result, target;
394 int stuck, tmp;
395
396 if (this_is_starfire) {
397
398 cpu = (((cpu & 0x3c) << 1) |
399 ((cpu & 0x40) >> 4) |
400 (cpu & 0x3));
401 }
402
403 target = (cpu << 14) | 0x70;
404again:
405
406
407
408
409
410
411
412 tmp = 0x40;
413 __asm__ __volatile__(
414 "wrpr %1, %2, %%pstate\n\t"
415 "stxa %4, [%0] %3\n\t"
416 "stxa %5, [%0+%8] %3\n\t"
417 "add %0, %8, %0\n\t"
418 "stxa %6, [%0+%8] %3\n\t"
419 "membar #Sync\n\t"
420 "stxa %%g0, [%7] %3\n\t"
421 "membar #Sync\n\t"
422 "mov 0x20, %%g1\n\t"
423 "ldxa [%%g1] 0x7f, %%g0\n\t"
424 "membar #Sync"
425 : "=r" (tmp)
426 : "r" (pstate), "i" (PSTATE_IE), "i" (ASI_INTR_W),
427 "r" (data0), "r" (data1), "r" (data2), "r" (target),
428 "r" (0x10), "0" (tmp)
429 : "g1");
430
431
432 stuck = 100000;
433 do {
434 __asm__ __volatile__("ldxa [%%g0] %1, %0"
435 : "=r" (result)
436 : "i" (ASI_INTR_DISPATCH_STAT));
437 if (result == 0) {
438 __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
439 : : "r" (pstate));
440 return;
441 }
442 stuck -= 1;
443 if (stuck == 0)
444 break;
445 } while (result & 0x1);
446 __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
447 : : "r" (pstate));
448 if (stuck == 0) {
449 printk("CPU[%d]: mondo stuckage result[%016llx]\n",
450 smp_processor_id(), result);
451 } else {
452 udelay(2);
453 goto again;
454 }
455}
456
457static void spitfire_xcall_deliver(struct trap_per_cpu *tb, int cnt)
458{
459 u64 *mondo, data0, data1, data2;
460 u16 *cpu_list;
461 u64 pstate;
462 int i;
463
464 __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
465 cpu_list = __va(tb->cpu_list_pa);
466 mondo = __va(tb->cpu_mondo_block_pa);
467 data0 = mondo[0];
468 data1 = mondo[1];
469 data2 = mondo[2];
470 for (i = 0; i < cnt; i++)
471 spitfire_xcall_helper(data0, data1, data2, pstate, cpu_list[i]);
472}
473
474
475
476
477
478static void cheetah_xcall_deliver(struct trap_per_cpu *tb, int cnt)
479{
480 int nack_busy_id, is_jbus, need_more;
481 u64 *mondo, pstate, ver, busy_mask;
482 u16 *cpu_list;
483
484 cpu_list = __va(tb->cpu_list_pa);
485 mondo = __va(tb->cpu_mondo_block_pa);
486
487
488
489
490
491 __asm__ ("rdpr %%ver, %0" : "=r" (ver));
492 is_jbus = ((ver >> 32) == __JALAPENO_ID ||
493 (ver >> 32) == __SERRANO_ID);
494
495 __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
496
497retry:
498 need_more = 0;
499 __asm__ __volatile__("wrpr %0, %1, %%pstate\n\t"
500 : : "r" (pstate), "i" (PSTATE_IE));
501
502
503 __asm__ __volatile__("stxa %0, [%3] %6\n\t"
504 "stxa %1, [%4] %6\n\t"
505 "stxa %2, [%5] %6\n\t"
506 "membar #Sync\n\t"
507 :
508 : "r" (mondo[0]), "r" (mondo[1]), "r" (mondo[2]),
509 "r" (0x40), "r" (0x50), "r" (0x60),
510 "i" (ASI_INTR_W));
511
512 nack_busy_id = 0;
513 busy_mask = 0;
514 {
515 int i;
516
517 for (i = 0; i < cnt; i++) {
518 u64 target, nr;
519
520 nr = cpu_list[i];
521 if (nr == 0xffff)
522 continue;
523
524 target = (nr << 14) | 0x70;
525 if (is_jbus) {
526 busy_mask |= (0x1UL << (nr * 2));
527 } else {
528 target |= (nack_busy_id << 24);
529 busy_mask |= (0x1UL <<
530 (nack_busy_id * 2));
531 }
532 __asm__ __volatile__(
533 "stxa %%g0, [%0] %1\n\t"
534 "membar #Sync\n\t"
535 :
536 : "r" (target), "i" (ASI_INTR_W));
537 nack_busy_id++;
538 if (nack_busy_id == 32) {
539 need_more = 1;
540 break;
541 }
542 }
543 }
544
545
546 {
547 u64 dispatch_stat, nack_mask;
548 long stuck;
549
550 stuck = 100000 * nack_busy_id;
551 nack_mask = busy_mask << 1;
552 do {
553 __asm__ __volatile__("ldxa [%%g0] %1, %0"
554 : "=r" (dispatch_stat)
555 : "i" (ASI_INTR_DISPATCH_STAT));
556 if (!(dispatch_stat & (busy_mask | nack_mask))) {
557 __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
558 : : "r" (pstate));
559 if (unlikely(need_more)) {
560 int i, this_cnt = 0;
561 for (i = 0; i < cnt; i++) {
562 if (cpu_list[i] == 0xffff)
563 continue;
564 cpu_list[i] = 0xffff;
565 this_cnt++;
566 if (this_cnt == 32)
567 break;
568 }
569 goto retry;
570 }
571 return;
572 }
573 if (!--stuck)
574 break;
575 } while (dispatch_stat & busy_mask);
576
577 __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
578 : : "r" (pstate));
579
580 if (dispatch_stat & busy_mask) {
581
582
583
584 printk("CPU[%d]: mondo stuckage result[%016llx]\n",
585 smp_processor_id(), dispatch_stat);
586 } else {
587 int i, this_busy_nack = 0;
588
589
590
591
592 udelay(2 * nack_busy_id);
593
594
595
596
597 for (i = 0; i < cnt; i++) {
598 u64 check_mask, nr;
599
600 nr = cpu_list[i];
601 if (nr == 0xffff)
602 continue;
603
604 if (is_jbus)
605 check_mask = (0x2UL << (2*nr));
606 else
607 check_mask = (0x2UL <<
608 this_busy_nack);
609 if ((dispatch_stat & check_mask) == 0)
610 cpu_list[i] = 0xffff;
611 this_busy_nack += 2;
612 if (this_busy_nack == 64)
613 break;
614 }
615
616 goto retry;
617 }
618 }
619}
620
621
622static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt)
623{
624 int retries, this_cpu, prev_sent, i, saw_cpu_error;
625 unsigned long status;
626 u16 *cpu_list;
627
628 this_cpu = smp_processor_id();
629
630 cpu_list = __va(tb->cpu_list_pa);
631
632 saw_cpu_error = 0;
633 retries = 0;
634 prev_sent = 0;
635 do {
636 int forward_progress, n_sent;
637
638 status = sun4v_cpu_mondo_send(cnt,
639 tb->cpu_list_pa,
640 tb->cpu_mondo_block_pa);
641
642
643 if (likely(status == HV_EOK))
644 break;
645
646
647
648
649
650
651 n_sent = 0;
652 for (i = 0; i < cnt; i++) {
653 if (likely(cpu_list[i] == 0xffff))
654 n_sent++;
655 }
656
657 forward_progress = 0;
658 if (n_sent > prev_sent)
659 forward_progress = 1;
660
661 prev_sent = n_sent;
662
663
664
665
666
667 if (unlikely(status == HV_ECPUERROR)) {
668 for (i = 0; i < cnt; i++) {
669 long err;
670 u16 cpu;
671
672 cpu = cpu_list[i];
673 if (cpu == 0xffff)
674 continue;
675
676 err = sun4v_cpu_state(cpu);
677 if (err == HV_CPU_STATE_ERROR) {
678 saw_cpu_error = (cpu + 1);
679 cpu_list[i] = 0xffff;
680 }
681 }
682 } else if (unlikely(status != HV_EWOULDBLOCK))
683 goto fatal_mondo_error;
684
685
686
687
688
689
690
691
692 if (unlikely(!forward_progress)) {
693 if (unlikely(++retries > 10000))
694 goto fatal_mondo_timeout;
695
696
697
698
699 udelay(2 * cnt);
700 }
701 } while (1);
702
703 if (unlikely(saw_cpu_error))
704 goto fatal_mondo_cpu_error;
705
706 return;
707
708fatal_mondo_cpu_error:
709 printk(KERN_CRIT "CPU[%d]: SUN4V mondo cpu error, some target cpus "
710 "(including %d) were in error state\n",
711 this_cpu, saw_cpu_error - 1);
712 return;
713
714fatal_mondo_timeout:
715 printk(KERN_CRIT "CPU[%d]: SUN4V mondo timeout, no forward "
716 " progress after %d retries.\n",
717 this_cpu, retries);
718 goto dump_cpu_list_and_out;
719
720fatal_mondo_error:
721 printk(KERN_CRIT "CPU[%d]: Unexpected SUN4V mondo error %lu\n",
722 this_cpu, status);
723 printk(KERN_CRIT "CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) "
724 "mondo_block_pa(%lx)\n",
725 this_cpu, cnt, tb->cpu_list_pa, tb->cpu_mondo_block_pa);
726
727dump_cpu_list_and_out:
728 printk(KERN_CRIT "CPU[%d]: CPU list [ ", this_cpu);
729 for (i = 0; i < cnt; i++)
730 printk("%u ", cpu_list[i]);
731 printk("]\n");
732}
733
734static void (*xcall_deliver_impl)(struct trap_per_cpu *, int);
735
736static void xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask)
737{
738 struct trap_per_cpu *tb;
739 int this_cpu, i, cnt;
740 unsigned long flags;
741 u16 *cpu_list;
742 u64 *mondo;
743
744
745
746
747
748
749
750
751
752
753
754 local_irq_save(flags);
755
756 this_cpu = smp_processor_id();
757 tb = &trap_block[this_cpu];
758
759 mondo = __va(tb->cpu_mondo_block_pa);
760 mondo[0] = data0;
761 mondo[1] = data1;
762 mondo[2] = data2;
763 wmb();
764
765 cpu_list = __va(tb->cpu_list_pa);
766
767
768 cnt = 0;
769 for_each_cpu(i, mask) {
770 if (i == this_cpu || !cpu_online(i))
771 continue;
772 cpu_list[cnt++] = i;
773 }
774
775 if (cnt)
776 xcall_deliver_impl(tb, cnt);
777
778 local_irq_restore(flags);
779}
780
781
782
783
784
785static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 data2, const cpumask_t *mask)
786{
787 u64 data0 = (((u64)ctx)<<32 | (((u64)func) & 0xffffffff));
788
789 xcall_deliver(data0, data1, data2, mask);
790}
791
792
793static void smp_cross_call(unsigned long *func, u32 ctx, u64 data1, u64 data2)
794{
795 smp_cross_call_masked(func, ctx, data1, data2, cpu_online_mask);
796}
797
798extern unsigned long xcall_sync_tick;
799
800static void smp_start_sync_tick_client(int cpu)
801{
802 xcall_deliver((u64) &xcall_sync_tick, 0, 0,
803 cpumask_of(cpu));
804}
805
806extern unsigned long xcall_call_function;
807
808void arch_send_call_function_ipi_mask(const struct cpumask *mask)
809{
810 xcall_deliver((u64) &xcall_call_function, 0, 0, mask);
811}
812
813extern unsigned long xcall_call_function_single;
814
815void arch_send_call_function_single_ipi(int cpu)
816{
817 xcall_deliver((u64) &xcall_call_function_single, 0, 0,
818 cpumask_of(cpu));
819}
820
821void __irq_entry smp_call_function_client(int irq, struct pt_regs *regs)
822{
823 clear_softint(1 << irq);
824 generic_smp_call_function_interrupt();
825}
826
827void __irq_entry smp_call_function_single_client(int irq, struct pt_regs *regs)
828{
829 clear_softint(1 << irq);
830 generic_smp_call_function_single_interrupt();
831}
832
833static void tsb_sync(void *info)
834{
835 struct trap_per_cpu *tp = &trap_block[raw_smp_processor_id()];
836 struct mm_struct *mm = info;
837
838
839
840
841
842
843
844 if (tp->pgd_paddr == __pa(mm->pgd))
845 tsb_context_switch(mm);
846}
847
848void smp_tsb_sync(struct mm_struct *mm)
849{
850 smp_call_function_many(mm_cpumask(mm), tsb_sync, mm, 1);
851}
852
853extern unsigned long xcall_flush_tlb_mm;
854extern unsigned long xcall_flush_tlb_page;
855extern unsigned long xcall_flush_tlb_kernel_range;
856extern unsigned long xcall_fetch_glob_regs;
857extern unsigned long xcall_fetch_glob_pmu;
858extern unsigned long xcall_fetch_glob_pmu_n4;
859extern unsigned long xcall_receive_signal;
860extern unsigned long xcall_new_mmu_context_version;
861#ifdef CONFIG_KGDB
862extern unsigned long xcall_kgdb_capture;
863#endif
864
865#ifdef DCACHE_ALIASING_POSSIBLE
866extern unsigned long xcall_flush_dcache_page_cheetah;
867#endif
868extern unsigned long xcall_flush_dcache_page_spitfire;
869
870#ifdef CONFIG_DEBUG_DCFLUSH
871extern atomic_t dcpage_flushes;
872extern atomic_t dcpage_flushes_xcall;
873#endif
874
875static inline void __local_flush_dcache_page(struct page *page)
876{
877#ifdef DCACHE_ALIASING_POSSIBLE
878 __flush_dcache_page(page_address(page),
879 ((tlb_type == spitfire) &&
880 page_mapping(page) != NULL));
881#else
882 if (page_mapping(page) != NULL &&
883 tlb_type == spitfire)
884 __flush_icache_page(__pa(page_address(page)));
885#endif
886}
887
888void smp_flush_dcache_page_impl(struct page *page, int cpu)
889{
890 int this_cpu;
891
892 if (tlb_type == hypervisor)
893 return;
894
895#ifdef CONFIG_DEBUG_DCFLUSH
896 atomic_inc(&dcpage_flushes);
897#endif
898
899 this_cpu = get_cpu();
900
901 if (cpu == this_cpu) {
902 __local_flush_dcache_page(page);
903 } else if (cpu_online(cpu)) {
904 void *pg_addr = page_address(page);
905 u64 data0 = 0;
906
907 if (tlb_type == spitfire) {
908 data0 = ((u64)&xcall_flush_dcache_page_spitfire);
909 if (page_mapping(page) != NULL)
910 data0 |= ((u64)1 << 32);
911 } else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
912#ifdef DCACHE_ALIASING_POSSIBLE
913 data0 = ((u64)&xcall_flush_dcache_page_cheetah);
914#endif
915 }
916 if (data0) {
917 xcall_deliver(data0, __pa(pg_addr),
918 (u64) pg_addr, cpumask_of(cpu));
919#ifdef CONFIG_DEBUG_DCFLUSH
920 atomic_inc(&dcpage_flushes_xcall);
921#endif
922 }
923 }
924
925 put_cpu();
926}
927
928void flush_dcache_page_all(struct mm_struct *mm, struct page *page)
929{
930 void *pg_addr;
931 u64 data0;
932
933 if (tlb_type == hypervisor)
934 return;
935
936 preempt_disable();
937
938#ifdef CONFIG_DEBUG_DCFLUSH
939 atomic_inc(&dcpage_flushes);
940#endif
941 data0 = 0;
942 pg_addr = page_address(page);
943 if (tlb_type == spitfire) {
944 data0 = ((u64)&xcall_flush_dcache_page_spitfire);
945 if (page_mapping(page) != NULL)
946 data0 |= ((u64)1 << 32);
947 } else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
948#ifdef DCACHE_ALIASING_POSSIBLE
949 data0 = ((u64)&xcall_flush_dcache_page_cheetah);
950#endif
951 }
952 if (data0) {
953 xcall_deliver(data0, __pa(pg_addr),
954 (u64) pg_addr, cpu_online_mask);
955#ifdef CONFIG_DEBUG_DCFLUSH
956 atomic_inc(&dcpage_flushes_xcall);
957#endif
958 }
959 __local_flush_dcache_page(page);
960
961 preempt_enable();
962}
963
964void __irq_entry smp_new_mmu_context_version_client(int irq, struct pt_regs *regs)
965{
966 struct mm_struct *mm;
967 unsigned long flags;
968
969 clear_softint(1 << irq);
970
971
972
973
974 mm = current->active_mm;
975 if (unlikely(!mm || (mm == &init_mm)))
976 return;
977
978 spin_lock_irqsave(&mm->context.lock, flags);
979
980 if (unlikely(!CTX_VALID(mm->context)))
981 get_new_mmu_context(mm);
982
983 spin_unlock_irqrestore(&mm->context.lock, flags);
984
985 load_secondary_context(mm);
986 __flush_tlb_mm(CTX_HWBITS(mm->context),
987 SECONDARY_CONTEXT);
988}
989
990void smp_new_mmu_context_version(void)
991{
992 smp_cross_call(&xcall_new_mmu_context_version, 0, 0, 0);
993}
994
995#ifdef CONFIG_KGDB
996void kgdb_roundup_cpus(unsigned long flags)
997{
998 smp_cross_call(&xcall_kgdb_capture, 0, 0, 0);
999}
1000#endif
1001
1002void smp_fetch_global_regs(void)
1003{
1004 smp_cross_call(&xcall_fetch_glob_regs, 0, 0, 0);
1005}
1006
1007void smp_fetch_global_pmu(void)
1008{
1009 if (tlb_type == hypervisor &&
1010 sun4v_chip_type >= SUN4V_CHIP_NIAGARA4)
1011 smp_cross_call(&xcall_fetch_glob_pmu_n4, 0, 0, 0);
1012 else
1013 smp_cross_call(&xcall_fetch_glob_pmu, 0, 0, 0);
1014}
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059void smp_flush_tlb_mm(struct mm_struct *mm)
1060{
1061 u32 ctx = CTX_HWBITS(mm->context);
1062 int cpu = get_cpu();
1063
1064 if (atomic_read(&mm->mm_users) == 1) {
1065 cpumask_copy(mm_cpumask(mm), cpumask_of(cpu));
1066 goto local_flush_and_out;
1067 }
1068
1069 smp_cross_call_masked(&xcall_flush_tlb_mm,
1070 ctx, 0, 0,
1071 mm_cpumask(mm));
1072
1073local_flush_and_out:
1074 __flush_tlb_mm(ctx, SECONDARY_CONTEXT);
1075
1076 put_cpu();
1077}
1078
1079struct tlb_pending_info {
1080 unsigned long ctx;
1081 unsigned long nr;
1082 unsigned long *vaddrs;
1083};
1084
1085static void tlb_pending_func(void *info)
1086{
1087 struct tlb_pending_info *t = info;
1088
1089 __flush_tlb_pending(t->ctx, t->nr, t->vaddrs);
1090}
1091
1092void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long *vaddrs)
1093{
1094 u32 ctx = CTX_HWBITS(mm->context);
1095 struct tlb_pending_info info;
1096 int cpu = get_cpu();
1097
1098 info.ctx = ctx;
1099 info.nr = nr;
1100 info.vaddrs = vaddrs;
1101
1102 if (mm == current->mm && atomic_read(&mm->mm_users) == 1)
1103 cpumask_copy(mm_cpumask(mm), cpumask_of(cpu));
1104 else
1105 smp_call_function_many(mm_cpumask(mm), tlb_pending_func,
1106 &info, 1);
1107
1108 __flush_tlb_pending(ctx, nr, vaddrs);
1109
1110 put_cpu();
1111}
1112
1113void smp_flush_tlb_page(struct mm_struct *mm, unsigned long vaddr)
1114{
1115 unsigned long context = CTX_HWBITS(mm->context);
1116 int cpu = get_cpu();
1117
1118 if (mm == current->mm && atomic_read(&mm->mm_users) == 1)
1119 cpumask_copy(mm_cpumask(mm), cpumask_of(cpu));
1120 else
1121 smp_cross_call_masked(&xcall_flush_tlb_page,
1122 context, vaddr, 0,
1123 mm_cpumask(mm));
1124 __flush_tlb_page(context, vaddr);
1125
1126 put_cpu();
1127}
1128
1129void smp_flush_tlb_kernel_range(unsigned long start, unsigned long end)
1130{
1131 start &= PAGE_MASK;
1132 end = PAGE_ALIGN(end);
1133 if (start != end) {
1134 smp_cross_call(&xcall_flush_tlb_kernel_range,
1135 0, start, end);
1136
1137 __flush_tlb_kernel_range(start, end);
1138 }
1139}
1140
1141
1142
1143extern unsigned long xcall_capture;
1144
1145static atomic_t smp_capture_depth = ATOMIC_INIT(0);
1146static atomic_t smp_capture_registry = ATOMIC_INIT(0);
1147static unsigned long penguins_are_doing_time;
1148
1149void smp_capture(void)
1150{
1151 int result = atomic_add_ret(1, &smp_capture_depth);
1152
1153 if (result == 1) {
1154 int ncpus = num_online_cpus();
1155
1156#ifdef CAPTURE_DEBUG
1157 printk("CPU[%d]: Sending penguins to jail...",
1158 smp_processor_id());
1159#endif
1160 penguins_are_doing_time = 1;
1161 atomic_inc(&smp_capture_registry);
1162 smp_cross_call(&xcall_capture, 0, 0, 0);
1163 while (atomic_read(&smp_capture_registry) != ncpus)
1164 rmb();
1165#ifdef CAPTURE_DEBUG
1166 printk("done\n");
1167#endif
1168 }
1169}
1170
1171void smp_release(void)
1172{
1173 if (atomic_dec_and_test(&smp_capture_depth)) {
1174#ifdef CAPTURE_DEBUG
1175 printk("CPU[%d]: Giving pardon to "
1176 "imprisoned penguins\n",
1177 smp_processor_id());
1178#endif
1179 penguins_are_doing_time = 0;
1180 membar_safe("#StoreLoad");
1181 atomic_dec(&smp_capture_registry);
1182 }
1183}
1184
1185
1186
1187
1188extern void prom_world(int);
1189
1190void __irq_entry smp_penguin_jailcell(int irq, struct pt_regs *regs)
1191{
1192 clear_softint(1 << irq);
1193
1194 preempt_disable();
1195
1196 __asm__ __volatile__("flushw");
1197 prom_world(1);
1198 atomic_inc(&smp_capture_registry);
1199 membar_safe("#StoreLoad");
1200 while (penguins_are_doing_time)
1201 rmb();
1202 atomic_dec(&smp_capture_registry);
1203 prom_world(0);
1204
1205 preempt_enable();
1206}
1207
1208
1209int setup_profiling_timer(unsigned int multiplier)
1210{
1211 return -EINVAL;
1212}
1213
1214void __init smp_prepare_cpus(unsigned int max_cpus)
1215{
1216}
1217
1218void smp_prepare_boot_cpu(void)
1219{
1220}
1221
1222void __init smp_setup_processor_id(void)
1223{
1224 if (tlb_type == spitfire)
1225 xcall_deliver_impl = spitfire_xcall_deliver;
1226 else if (tlb_type == cheetah || tlb_type == cheetah_plus)
1227 xcall_deliver_impl = cheetah_xcall_deliver;
1228 else
1229 xcall_deliver_impl = hypervisor_xcall_deliver;
1230}
1231
1232void smp_fill_in_sib_core_maps(void)
1233{
1234 unsigned int i;
1235
1236 for_each_present_cpu(i) {
1237 unsigned int j;
1238
1239 cpumask_clear(&cpu_core_map[i]);
1240 if (cpu_data(i).core_id == 0) {
1241 cpumask_set_cpu(i, &cpu_core_map[i]);
1242 continue;
1243 }
1244
1245 for_each_present_cpu(j) {
1246 if (cpu_data(i).core_id ==
1247 cpu_data(j).core_id)
1248 cpumask_set_cpu(j, &cpu_core_map[i]);
1249 }
1250 }
1251
1252 for_each_present_cpu(i) {
1253 unsigned int j;
1254
1255 cpumask_clear(&per_cpu(cpu_sibling_map, i));
1256 if (cpu_data(i).proc_id == -1) {
1257 cpumask_set_cpu(i, &per_cpu(cpu_sibling_map, i));
1258 continue;
1259 }
1260
1261 for_each_present_cpu(j) {
1262 if (cpu_data(i).proc_id ==
1263 cpu_data(j).proc_id)
1264 cpumask_set_cpu(j, &per_cpu(cpu_sibling_map, i));
1265 }
1266 }
1267}
1268
1269int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *tidle)
1270{
1271 int ret = smp_boot_one_cpu(cpu, tidle);
1272
1273 if (!ret) {
1274 cpumask_set_cpu(cpu, &smp_commenced_mask);
1275 while (!cpu_online(cpu))
1276 mb();
1277 if (!cpu_online(cpu)) {
1278 ret = -ENODEV;
1279 } else {
1280
1281
1282
1283 if (tlb_type != hypervisor)
1284 smp_synchronize_one_tick(cpu);
1285 }
1286 }
1287 return ret;
1288}
1289
1290#ifdef CONFIG_HOTPLUG_CPU
1291void cpu_play_dead(void)
1292{
1293 int cpu = smp_processor_id();
1294 unsigned long pstate;
1295
1296 idle_task_exit();
1297
1298 if (tlb_type == hypervisor) {
1299 struct trap_per_cpu *tb = &trap_block[cpu];
1300
1301 sun4v_cpu_qconf(HV_CPU_QUEUE_CPU_MONDO,
1302 tb->cpu_mondo_pa, 0);
1303 sun4v_cpu_qconf(HV_CPU_QUEUE_DEVICE_MONDO,
1304 tb->dev_mondo_pa, 0);
1305 sun4v_cpu_qconf(HV_CPU_QUEUE_RES_ERROR,
1306 tb->resum_mondo_pa, 0);
1307 sun4v_cpu_qconf(HV_CPU_QUEUE_NONRES_ERROR,
1308 tb->nonresum_mondo_pa, 0);
1309 }
1310
1311 cpumask_clear_cpu(cpu, &smp_commenced_mask);
1312 membar_safe("#Sync");
1313
1314 local_irq_disable();
1315
1316 __asm__ __volatile__(
1317 "rdpr %%pstate, %0\n\t"
1318 "wrpr %0, %1, %%pstate"
1319 : "=r" (pstate)
1320 : "i" (PSTATE_IE));
1321
1322 while (1)
1323 barrier();
1324}
1325
1326int __cpu_disable(void)
1327{
1328 int cpu = smp_processor_id();
1329 cpuinfo_sparc *c;
1330 int i;
1331
1332 for_each_cpu(i, &cpu_core_map[cpu])
1333 cpumask_clear_cpu(cpu, &cpu_core_map[i]);
1334 cpumask_clear(&cpu_core_map[cpu]);
1335
1336 for_each_cpu(i, &per_cpu(cpu_sibling_map, cpu))
1337 cpumask_clear_cpu(cpu, &per_cpu(cpu_sibling_map, i));
1338 cpumask_clear(&per_cpu(cpu_sibling_map, cpu));
1339
1340 c = &cpu_data(cpu);
1341
1342 c->core_id = 0;
1343 c->proc_id = -1;
1344
1345 smp_wmb();
1346
1347
1348 fixup_irqs();
1349
1350 local_irq_enable();
1351 mdelay(1);
1352 local_irq_disable();
1353
1354 set_cpu_online(cpu, false);
1355
1356 cpu_map_rebuild();
1357
1358 return 0;
1359}
1360
1361void __cpu_die(unsigned int cpu)
1362{
1363 int i;
1364
1365 for (i = 0; i < 100; i++) {
1366 smp_rmb();
1367 if (!cpumask_test_cpu(cpu, &smp_commenced_mask))
1368 break;
1369 msleep(100);
1370 }
1371 if (cpumask_test_cpu(cpu, &smp_commenced_mask)) {
1372 printk(KERN_ERR "CPU %u didn't die...\n", cpu);
1373 } else {
1374#if defined(CONFIG_SUN_LDOMS)
1375 unsigned long hv_err;
1376 int limit = 100;
1377
1378 do {
1379 hv_err = sun4v_cpu_stop(cpu);
1380 if (hv_err == HV_EOK) {
1381 set_cpu_present(cpu, false);
1382 break;
1383 }
1384 } while (--limit > 0);
1385 if (limit <= 0) {
1386 printk(KERN_ERR "sun4v_cpu_stop() fails err=%lu\n",
1387 hv_err);
1388 }
1389#endif
1390 }
1391}
1392#endif
1393
1394void __init smp_cpus_done(unsigned int max_cpus)
1395{
1396 pcr_arch_init();
1397}
1398
1399void smp_send_reschedule(int cpu)
1400{
1401 xcall_deliver((u64) &xcall_receive_signal, 0, 0,
1402 cpumask_of(cpu));
1403}
1404
1405void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs)
1406{
1407 clear_softint(1 << irq);
1408 scheduler_ipi();
1409}
1410
1411
1412
1413
1414void smp_send_stop(void)
1415{
1416}
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
1432 size_t align)
1433{
1434 const unsigned long goal = __pa(MAX_DMA_ADDRESS);
1435#ifdef CONFIG_NEED_MULTIPLE_NODES
1436 int node = cpu_to_node(cpu);
1437 void *ptr;
1438
1439 if (!node_online(node) || !NODE_DATA(node)) {
1440 ptr = __alloc_bootmem(size, align, goal);
1441 pr_info("cpu %d has no node %d or node-local memory\n",
1442 cpu, node);
1443 pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
1444 cpu, size, __pa(ptr));
1445 } else {
1446 ptr = __alloc_bootmem_node(NODE_DATA(node),
1447 size, align, goal);
1448 pr_debug("per cpu data for cpu%d %lu bytes on node%d at "
1449 "%016lx\n", cpu, size, node, __pa(ptr));
1450 }
1451 return ptr;
1452#else
1453 return __alloc_bootmem(size, align, goal);
1454#endif
1455}
1456
1457static void __init pcpu_free_bootmem(void *ptr, size_t size)
1458{
1459 free_bootmem(__pa(ptr), size);
1460}
1461
1462static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
1463{
1464 if (cpu_to_node(from) == cpu_to_node(to))
1465 return LOCAL_DISTANCE;
1466 else
1467 return REMOTE_DISTANCE;
1468}
1469
1470static void __init pcpu_populate_pte(unsigned long addr)
1471{
1472 pgd_t *pgd = pgd_offset_k(addr);
1473 pud_t *pud;
1474 pmd_t *pmd;
1475
1476 pud = pud_offset(pgd, addr);
1477 if (pud_none(*pud)) {
1478 pmd_t *new;
1479
1480 new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
1481 pud_populate(&init_mm, pud, new);
1482 }
1483
1484 pmd = pmd_offset(pud, addr);
1485 if (!pmd_present(*pmd)) {
1486 pte_t *new;
1487
1488 new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
1489 pmd_populate_kernel(&init_mm, pmd, new);
1490 }
1491}
1492
1493void __init setup_per_cpu_areas(void)
1494{
1495 unsigned long delta;
1496 unsigned int cpu;
1497 int rc = -EINVAL;
1498
1499 if (pcpu_chosen_fc != PCPU_FC_PAGE) {
1500 rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
1501 PERCPU_DYNAMIC_RESERVE, 4 << 20,
1502 pcpu_cpu_distance,
1503 pcpu_alloc_bootmem,
1504 pcpu_free_bootmem);
1505 if (rc)
1506 pr_warning("PERCPU: %s allocator failed (%d), "
1507 "falling back to page size\n",
1508 pcpu_fc_names[pcpu_chosen_fc], rc);
1509 }
1510 if (rc < 0)
1511 rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
1512 pcpu_alloc_bootmem,
1513 pcpu_free_bootmem,
1514 pcpu_populate_pte);
1515 if (rc < 0)
1516 panic("cannot initialize percpu area (err=%d)", rc);
1517
1518 delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
1519 for_each_possible_cpu(cpu)
1520 __per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu];
1521
1522
1523 __local_per_cpu_offset = __per_cpu_offset(smp_processor_id());
1524
1525 of_fill_in_cpu_data();
1526 if (tlb_type == hypervisor)
1527 mdesc_fill_in_cpu_data(cpu_all_mask);
1528}
1529