1
2
3
4
5
6#include <linux/export.h>
7#include <linux/kernel.h>
8#include <linux/sched.h>
9#include <linux/mm.h>
10#include <linux/pagemap.h>
11#include <linux/threads.h>
12#include <linux/smp.h>
13#include <linux/interrupt.h>
14#include <linux/kernel_stat.h>
15#include <linux/delay.h>
16#include <linux/init.h>
17#include <linux/spinlock.h>
18#include <linux/fs.h>
19#include <linux/seq_file.h>
20#include <linux/cache.h>
21#include <linux/jiffies.h>
22#include <linux/profile.h>
23#include <linux/bootmem.h>
24#include <linux/vmalloc.h>
25#include <linux/ftrace.h>
26#include <linux/cpu.h>
27#include <linux/slab.h>
28
29#include <asm/head.h>
30#include <asm/ptrace.h>
31#include <linux/atomic.h>
32#include <asm/tlbflush.h>
33#include <asm/mmu_context.h>
34#include <asm/cpudata.h>
35#include <asm/hvtramp.h>
36#include <asm/io.h>
37#include <asm/timer.h>
38
39#include <asm/irq.h>
40#include <asm/irq_regs.h>
41#include <asm/page.h>
42#include <asm/pgtable.h>
43#include <asm/oplib.h>
44#include <asm/uaccess.h>
45#include <asm/starfire.h>
46#include <asm/tlb.h>
47#include <asm/sections.h>
48#include <asm/prom.h>
49#include <asm/mdesc.h>
50#include <asm/ldc.h>
51#include <asm/hypervisor.h>
52#include <asm/pcr.h>
53
54#include "cpumap.h"
55
56int sparc64_multi_core __read_mostly;
57
58DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE;
59cpumask_t cpu_core_map[NR_CPUS] __read_mostly =
60 { [0 ... NR_CPUS-1] = CPU_MASK_NONE };
61
62EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
63EXPORT_SYMBOL(cpu_core_map);
64
65static cpumask_t smp_commenced_mask;
66
67void smp_info(struct seq_file *m)
68{
69 int i;
70
71 seq_printf(m, "State:\n");
72 for_each_online_cpu(i)
73 seq_printf(m, "CPU%d:\t\tonline\n", i);
74}
75
76void smp_bogo(struct seq_file *m)
77{
78 int i;
79
80 for_each_online_cpu(i)
81 seq_printf(m,
82 "Cpu%dClkTck\t: %016lx\n",
83 i, cpu_data(i).clock_tick);
84}
85
86extern void setup_sparc64_timer(void);
87
88static volatile unsigned long callin_flag = 0;
89
90void smp_callin(void)
91{
92 int cpuid = hard_smp_processor_id();
93
94 __local_per_cpu_offset = __per_cpu_offset(cpuid);
95
96 if (tlb_type == hypervisor)
97 sun4v_ktsb_register();
98
99 __flush_tlb_all();
100
101 setup_sparc64_timer();
102
103 if (cheetah_pcache_forced_on)
104 cheetah_enable_pcache();
105
106 callin_flag = 1;
107 __asm__ __volatile__("membar #Sync\n\t"
108 "flush %%g6" : : : "memory");
109
110
111
112
113 current_thread_info()->new_child = 0;
114
115
116 atomic_inc(&init_mm.mm_count);
117 current->active_mm = &init_mm;
118
119
120 notify_cpu_starting(cpuid);
121
122 while (!cpumask_test_cpu(cpuid, &smp_commenced_mask))
123 rmb();
124
125 set_cpu_online(cpuid, true);
126 local_irq_enable();
127
128
129 preempt_disable();
130
131 cpu_startup_entry(CPUHP_ONLINE);
132}
133
134void cpu_panic(void)
135{
136 printk("CPU[%d]: Returns from cpu_idle!\n", smp_processor_id());
137 panic("SMP bolixed\n");
138}
139
140
141
142
143
144
145
146
147#define MASTER 0
148#define SLAVE (SMP_CACHE_BYTES/sizeof(unsigned long))
149
150#define NUM_ROUNDS 64
151#define NUM_ITERS 5
152
153static DEFINE_SPINLOCK(itc_sync_lock);
154static unsigned long go[SLAVE + 1];
155
156#define DEBUG_TICK_SYNC 0
157
158static inline long get_delta (long *rt, long *master)
159{
160 unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0;
161 unsigned long tcenter, t0, t1, tm;
162 unsigned long i;
163
164 for (i = 0; i < NUM_ITERS; i++) {
165 t0 = tick_ops->get_tick();
166 go[MASTER] = 1;
167 membar_safe("#StoreLoad");
168 while (!(tm = go[SLAVE]))
169 rmb();
170 go[SLAVE] = 0;
171 wmb();
172 t1 = tick_ops->get_tick();
173
174 if (t1 - t0 < best_t1 - best_t0)
175 best_t0 = t0, best_t1 = t1, best_tm = tm;
176 }
177
178 *rt = best_t1 - best_t0;
179 *master = best_tm - best_t0;
180
181
182 tcenter = (best_t0/2 + best_t1/2);
183 if (best_t0 % 2 + best_t1 % 2 == 2)
184 tcenter++;
185 return tcenter - best_tm;
186}
187
188void smp_synchronize_tick_client(void)
189{
190 long i, delta, adj, adjust_latency = 0, done = 0;
191 unsigned long flags, rt, master_time_stamp;
192#if DEBUG_TICK_SYNC
193 struct {
194 long rt;
195 long master;
196 long diff;
197 long lat;
198 } t[NUM_ROUNDS];
199#endif
200
201 go[MASTER] = 1;
202
203 while (go[MASTER])
204 rmb();
205
206 local_irq_save(flags);
207 {
208 for (i = 0; i < NUM_ROUNDS; i++) {
209 delta = get_delta(&rt, &master_time_stamp);
210 if (delta == 0)
211 done = 1;
212
213 if (!done) {
214 if (i > 0) {
215 adjust_latency += -delta;
216 adj = -delta + adjust_latency/4;
217 } else
218 adj = -delta;
219
220 tick_ops->add_tick(adj);
221 }
222#if DEBUG_TICK_SYNC
223 t[i].rt = rt;
224 t[i].master = master_time_stamp;
225 t[i].diff = delta;
226 t[i].lat = adjust_latency/4;
227#endif
228 }
229 }
230 local_irq_restore(flags);
231
232#if DEBUG_TICK_SYNC
233 for (i = 0; i < NUM_ROUNDS; i++)
234 printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n",
235 t[i].rt, t[i].master, t[i].diff, t[i].lat);
236#endif
237
238 printk(KERN_INFO "CPU %d: synchronized TICK with master CPU "
239 "(last diff %ld cycles, maxerr %lu cycles)\n",
240 smp_processor_id(), delta, rt);
241}
242
243static void smp_start_sync_tick_client(int cpu);
244
245static void smp_synchronize_one_tick(int cpu)
246{
247 unsigned long flags, i;
248
249 go[MASTER] = 0;
250
251 smp_start_sync_tick_client(cpu);
252
253
254 while (!go[MASTER])
255 rmb();
256
257
258 go[MASTER] = 0;
259 membar_safe("#StoreLoad");
260
261 spin_lock_irqsave(&itc_sync_lock, flags);
262 {
263 for (i = 0; i < NUM_ROUNDS*NUM_ITERS; i++) {
264 while (!go[MASTER])
265 rmb();
266 go[MASTER] = 0;
267 wmb();
268 go[SLAVE] = tick_ops->get_tick();
269 membar_safe("#StoreLoad");
270 }
271 }
272 spin_unlock_irqrestore(&itc_sync_lock, flags);
273}
274
275#if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU)
276
277static unsigned long kimage_addr_to_ra(void *p)
278{
279 unsigned long val = (unsigned long) p;
280
281 return kern_base + (val - KERNBASE);
282}
283
284static void ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg,
285 void **descrp)
286{
287 extern unsigned long sparc64_ttable_tl0;
288 extern unsigned long kern_locked_tte_data;
289 struct hvtramp_descr *hdesc;
290 unsigned long trampoline_ra;
291 struct trap_per_cpu *tb;
292 u64 tte_vaddr, tte_data;
293 unsigned long hv_err;
294 int i;
295
296 hdesc = kzalloc(sizeof(*hdesc) +
297 (sizeof(struct hvtramp_mapping) *
298 num_kernel_image_mappings - 1),
299 GFP_KERNEL);
300 if (!hdesc) {
301 printk(KERN_ERR "ldom_startcpu_cpuid: Cannot allocate "
302 "hvtramp_descr.\n");
303 return;
304 }
305 *descrp = hdesc;
306
307 hdesc->cpu = cpu;
308 hdesc->num_mappings = num_kernel_image_mappings;
309
310 tb = &trap_block[cpu];
311
312 hdesc->fault_info_va = (unsigned long) &tb->fault_info;
313 hdesc->fault_info_pa = kimage_addr_to_ra(&tb->fault_info);
314
315 hdesc->thread_reg = thread_reg;
316
317 tte_vaddr = (unsigned long) KERNBASE;
318 tte_data = kern_locked_tte_data;
319
320 for (i = 0; i < hdesc->num_mappings; i++) {
321 hdesc->maps[i].vaddr = tte_vaddr;
322 hdesc->maps[i].tte = tte_data;
323 tte_vaddr += 0x400000;
324 tte_data += 0x400000;
325 }
326
327 trampoline_ra = kimage_addr_to_ra(hv_cpu_startup);
328
329 hv_err = sun4v_cpu_start(cpu, trampoline_ra,
330 kimage_addr_to_ra(&sparc64_ttable_tl0),
331 __pa(hdesc));
332 if (hv_err)
333 printk(KERN_ERR "ldom_startcpu_cpuid: sun4v_cpu_start() "
334 "gives error %lu\n", hv_err);
335}
336#endif
337
338extern unsigned long sparc64_cpu_startup;
339
340
341
342
343
344static struct thread_info *cpu_new_thread = NULL;
345
346static int smp_boot_one_cpu(unsigned int cpu, struct task_struct *idle)
347{
348 unsigned long entry =
349 (unsigned long)(&sparc64_cpu_startup);
350 unsigned long cookie =
351 (unsigned long)(&cpu_new_thread);
352 void *descr = NULL;
353 int timeout, ret;
354
355 callin_flag = 0;
356 cpu_new_thread = task_thread_info(idle);
357
358 if (tlb_type == hypervisor) {
359#if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU)
360 if (ldom_domaining_enabled)
361 ldom_startcpu_cpuid(cpu,
362 (unsigned long) cpu_new_thread,
363 &descr);
364 else
365#endif
366 prom_startcpu_cpuid(cpu, entry, cookie);
367 } else {
368 struct device_node *dp = of_find_node_by_cpuid(cpu);
369
370 prom_startcpu(dp->phandle, entry, cookie);
371 }
372
373 for (timeout = 0; timeout < 50000; timeout++) {
374 if (callin_flag)
375 break;
376 udelay(100);
377 }
378
379 if (callin_flag) {
380 ret = 0;
381 } else {
382 printk("Processor %d is stuck.\n", cpu);
383 ret = -ENODEV;
384 }
385 cpu_new_thread = NULL;
386
387 kfree(descr);
388
389 return ret;
390}
391
392static void spitfire_xcall_helper(u64 data0, u64 data1, u64 data2, u64 pstate, unsigned long cpu)
393{
394 u64 result, target;
395 int stuck, tmp;
396
397 if (this_is_starfire) {
398
399 cpu = (((cpu & 0x3c) << 1) |
400 ((cpu & 0x40) >> 4) |
401 (cpu & 0x3));
402 }
403
404 target = (cpu << 14) | 0x70;
405again:
406
407
408
409
410
411
412
413 tmp = 0x40;
414 __asm__ __volatile__(
415 "wrpr %1, %2, %%pstate\n\t"
416 "stxa %4, [%0] %3\n\t"
417 "stxa %5, [%0+%8] %3\n\t"
418 "add %0, %8, %0\n\t"
419 "stxa %6, [%0+%8] %3\n\t"
420 "membar #Sync\n\t"
421 "stxa %%g0, [%7] %3\n\t"
422 "membar #Sync\n\t"
423 "mov 0x20, %%g1\n\t"
424 "ldxa [%%g1] 0x7f, %%g0\n\t"
425 "membar #Sync"
426 : "=r" (tmp)
427 : "r" (pstate), "i" (PSTATE_IE), "i" (ASI_INTR_W),
428 "r" (data0), "r" (data1), "r" (data2), "r" (target),
429 "r" (0x10), "0" (tmp)
430 : "g1");
431
432
433 stuck = 100000;
434 do {
435 __asm__ __volatile__("ldxa [%%g0] %1, %0"
436 : "=r" (result)
437 : "i" (ASI_INTR_DISPATCH_STAT));
438 if (result == 0) {
439 __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
440 : : "r" (pstate));
441 return;
442 }
443 stuck -= 1;
444 if (stuck == 0)
445 break;
446 } while (result & 0x1);
447 __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
448 : : "r" (pstate));
449 if (stuck == 0) {
450 printk("CPU[%d]: mondo stuckage result[%016llx]\n",
451 smp_processor_id(), result);
452 } else {
453 udelay(2);
454 goto again;
455 }
456}
457
458static void spitfire_xcall_deliver(struct trap_per_cpu *tb, int cnt)
459{
460 u64 *mondo, data0, data1, data2;
461 u16 *cpu_list;
462 u64 pstate;
463 int i;
464
465 __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
466 cpu_list = __va(tb->cpu_list_pa);
467 mondo = __va(tb->cpu_mondo_block_pa);
468 data0 = mondo[0];
469 data1 = mondo[1];
470 data2 = mondo[2];
471 for (i = 0; i < cnt; i++)
472 spitfire_xcall_helper(data0, data1, data2, pstate, cpu_list[i]);
473}
474
475
476
477
478
479static void cheetah_xcall_deliver(struct trap_per_cpu *tb, int cnt)
480{
481 int nack_busy_id, is_jbus, need_more;
482 u64 *mondo, pstate, ver, busy_mask;
483 u16 *cpu_list;
484
485 cpu_list = __va(tb->cpu_list_pa);
486 mondo = __va(tb->cpu_mondo_block_pa);
487
488
489
490
491
492 __asm__ ("rdpr %%ver, %0" : "=r" (ver));
493 is_jbus = ((ver >> 32) == __JALAPENO_ID ||
494 (ver >> 32) == __SERRANO_ID);
495
496 __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
497
498retry:
499 need_more = 0;
500 __asm__ __volatile__("wrpr %0, %1, %%pstate\n\t"
501 : : "r" (pstate), "i" (PSTATE_IE));
502
503
504 __asm__ __volatile__("stxa %0, [%3] %6\n\t"
505 "stxa %1, [%4] %6\n\t"
506 "stxa %2, [%5] %6\n\t"
507 "membar #Sync\n\t"
508 :
509 : "r" (mondo[0]), "r" (mondo[1]), "r" (mondo[2]),
510 "r" (0x40), "r" (0x50), "r" (0x60),
511 "i" (ASI_INTR_W));
512
513 nack_busy_id = 0;
514 busy_mask = 0;
515 {
516 int i;
517
518 for (i = 0; i < cnt; i++) {
519 u64 target, nr;
520
521 nr = cpu_list[i];
522 if (nr == 0xffff)
523 continue;
524
525 target = (nr << 14) | 0x70;
526 if (is_jbus) {
527 busy_mask |= (0x1UL << (nr * 2));
528 } else {
529 target |= (nack_busy_id << 24);
530 busy_mask |= (0x1UL <<
531 (nack_busy_id * 2));
532 }
533 __asm__ __volatile__(
534 "stxa %%g0, [%0] %1\n\t"
535 "membar #Sync\n\t"
536 :
537 : "r" (target), "i" (ASI_INTR_W));
538 nack_busy_id++;
539 if (nack_busy_id == 32) {
540 need_more = 1;
541 break;
542 }
543 }
544 }
545
546
547 {
548 u64 dispatch_stat, nack_mask;
549 long stuck;
550
551 stuck = 100000 * nack_busy_id;
552 nack_mask = busy_mask << 1;
553 do {
554 __asm__ __volatile__("ldxa [%%g0] %1, %0"
555 : "=r" (dispatch_stat)
556 : "i" (ASI_INTR_DISPATCH_STAT));
557 if (!(dispatch_stat & (busy_mask | nack_mask))) {
558 __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
559 : : "r" (pstate));
560 if (unlikely(need_more)) {
561 int i, this_cnt = 0;
562 for (i = 0; i < cnt; i++) {
563 if (cpu_list[i] == 0xffff)
564 continue;
565 cpu_list[i] = 0xffff;
566 this_cnt++;
567 if (this_cnt == 32)
568 break;
569 }
570 goto retry;
571 }
572 return;
573 }
574 if (!--stuck)
575 break;
576 } while (dispatch_stat & busy_mask);
577
578 __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
579 : : "r" (pstate));
580
581 if (dispatch_stat & busy_mask) {
582
583
584
585 printk("CPU[%d]: mondo stuckage result[%016llx]\n",
586 smp_processor_id(), dispatch_stat);
587 } else {
588 int i, this_busy_nack = 0;
589
590
591
592
593 udelay(2 * nack_busy_id);
594
595
596
597
598 for (i = 0; i < cnt; i++) {
599 u64 check_mask, nr;
600
601 nr = cpu_list[i];
602 if (nr == 0xffff)
603 continue;
604
605 if (is_jbus)
606 check_mask = (0x2UL << (2*nr));
607 else
608 check_mask = (0x2UL <<
609 this_busy_nack);
610 if ((dispatch_stat & check_mask) == 0)
611 cpu_list[i] = 0xffff;
612 this_busy_nack += 2;
613 if (this_busy_nack == 64)
614 break;
615 }
616
617 goto retry;
618 }
619 }
620}
621
622
623static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt)
624{
625 int retries, this_cpu, prev_sent, i, saw_cpu_error;
626 unsigned long status;
627 u16 *cpu_list;
628
629 this_cpu = smp_processor_id();
630
631 cpu_list = __va(tb->cpu_list_pa);
632
633 saw_cpu_error = 0;
634 retries = 0;
635 prev_sent = 0;
636 do {
637 int forward_progress, n_sent;
638
639 status = sun4v_cpu_mondo_send(cnt,
640 tb->cpu_list_pa,
641 tb->cpu_mondo_block_pa);
642
643
644 if (likely(status == HV_EOK))
645 break;
646
647
648
649
650
651
652 n_sent = 0;
653 for (i = 0; i < cnt; i++) {
654 if (likely(cpu_list[i] == 0xffff))
655 n_sent++;
656 }
657
658 forward_progress = 0;
659 if (n_sent > prev_sent)
660 forward_progress = 1;
661
662 prev_sent = n_sent;
663
664
665
666
667
668 if (unlikely(status == HV_ECPUERROR)) {
669 for (i = 0; i < cnt; i++) {
670 long err;
671 u16 cpu;
672
673 cpu = cpu_list[i];
674 if (cpu == 0xffff)
675 continue;
676
677 err = sun4v_cpu_state(cpu);
678 if (err == HV_CPU_STATE_ERROR) {
679 saw_cpu_error = (cpu + 1);
680 cpu_list[i] = 0xffff;
681 }
682 }
683 } else if (unlikely(status != HV_EWOULDBLOCK))
684 goto fatal_mondo_error;
685
686
687
688
689
690
691
692
693 if (unlikely(!forward_progress)) {
694 if (unlikely(++retries > 10000))
695 goto fatal_mondo_timeout;
696
697
698
699
700 udelay(2 * cnt);
701 }
702 } while (1);
703
704 if (unlikely(saw_cpu_error))
705 goto fatal_mondo_cpu_error;
706
707 return;
708
709fatal_mondo_cpu_error:
710 printk(KERN_CRIT "CPU[%d]: SUN4V mondo cpu error, some target cpus "
711 "(including %d) were in error state\n",
712 this_cpu, saw_cpu_error - 1);
713 return;
714
715fatal_mondo_timeout:
716 printk(KERN_CRIT "CPU[%d]: SUN4V mondo timeout, no forward "
717 " progress after %d retries.\n",
718 this_cpu, retries);
719 goto dump_cpu_list_and_out;
720
721fatal_mondo_error:
722 printk(KERN_CRIT "CPU[%d]: Unexpected SUN4V mondo error %lu\n",
723 this_cpu, status);
724 printk(KERN_CRIT "CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) "
725 "mondo_block_pa(%lx)\n",
726 this_cpu, cnt, tb->cpu_list_pa, tb->cpu_mondo_block_pa);
727
728dump_cpu_list_and_out:
729 printk(KERN_CRIT "CPU[%d]: CPU list [ ", this_cpu);
730 for (i = 0; i < cnt; i++)
731 printk("%u ", cpu_list[i]);
732 printk("]\n");
733}
734
735static void (*xcall_deliver_impl)(struct trap_per_cpu *, int);
736
737static void xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask)
738{
739 struct trap_per_cpu *tb;
740 int this_cpu, i, cnt;
741 unsigned long flags;
742 u16 *cpu_list;
743 u64 *mondo;
744
745
746
747
748
749
750
751
752
753
754
755 local_irq_save(flags);
756
757 this_cpu = smp_processor_id();
758 tb = &trap_block[this_cpu];
759
760 mondo = __va(tb->cpu_mondo_block_pa);
761 mondo[0] = data0;
762 mondo[1] = data1;
763 mondo[2] = data2;
764 wmb();
765
766 cpu_list = __va(tb->cpu_list_pa);
767
768
769 cnt = 0;
770 for_each_cpu(i, mask) {
771 if (i == this_cpu || !cpu_online(i))
772 continue;
773 cpu_list[cnt++] = i;
774 }
775
776 if (cnt)
777 xcall_deliver_impl(tb, cnt);
778
779 local_irq_restore(flags);
780}
781
782
783
784
785
786static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 data2, const cpumask_t *mask)
787{
788 u64 data0 = (((u64)ctx)<<32 | (((u64)func) & 0xffffffff));
789
790 xcall_deliver(data0, data1, data2, mask);
791}
792
793
794static void smp_cross_call(unsigned long *func, u32 ctx, u64 data1, u64 data2)
795{
796 smp_cross_call_masked(func, ctx, data1, data2, cpu_online_mask);
797}
798
799extern unsigned long xcall_sync_tick;
800
801static void smp_start_sync_tick_client(int cpu)
802{
803 xcall_deliver((u64) &xcall_sync_tick, 0, 0,
804 cpumask_of(cpu));
805}
806
807extern unsigned long xcall_call_function;
808
809void arch_send_call_function_ipi_mask(const struct cpumask *mask)
810{
811 xcall_deliver((u64) &xcall_call_function, 0, 0, mask);
812}
813
814extern unsigned long xcall_call_function_single;
815
816void arch_send_call_function_single_ipi(int cpu)
817{
818 xcall_deliver((u64) &xcall_call_function_single, 0, 0,
819 cpumask_of(cpu));
820}
821
822void __irq_entry smp_call_function_client(int irq, struct pt_regs *regs)
823{
824 clear_softint(1 << irq);
825 generic_smp_call_function_interrupt();
826}
827
828void __irq_entry smp_call_function_single_client(int irq, struct pt_regs *regs)
829{
830 clear_softint(1 << irq);
831 generic_smp_call_function_single_interrupt();
832}
833
834static void tsb_sync(void *info)
835{
836 struct trap_per_cpu *tp = &trap_block[raw_smp_processor_id()];
837 struct mm_struct *mm = info;
838
839
840
841
842
843
844
845 if (tp->pgd_paddr == __pa(mm->pgd))
846 tsb_context_switch(mm);
847}
848
849void smp_tsb_sync(struct mm_struct *mm)
850{
851 smp_call_function_many(mm_cpumask(mm), tsb_sync, mm, 1);
852}
853
854extern unsigned long xcall_flush_tlb_mm;
855extern unsigned long xcall_flush_tlb_page;
856extern unsigned long xcall_flush_tlb_kernel_range;
857extern unsigned long xcall_fetch_glob_regs;
858extern unsigned long xcall_fetch_glob_pmu;
859extern unsigned long xcall_fetch_glob_pmu_n4;
860extern unsigned long xcall_receive_signal;
861extern unsigned long xcall_new_mmu_context_version;
862#ifdef CONFIG_KGDB
863extern unsigned long xcall_kgdb_capture;
864#endif
865
866#ifdef DCACHE_ALIASING_POSSIBLE
867extern unsigned long xcall_flush_dcache_page_cheetah;
868#endif
869extern unsigned long xcall_flush_dcache_page_spitfire;
870
871#ifdef CONFIG_DEBUG_DCFLUSH
872extern atomic_t dcpage_flushes;
873extern atomic_t dcpage_flushes_xcall;
874#endif
875
876static inline void __local_flush_dcache_page(struct page *page)
877{
878#ifdef DCACHE_ALIASING_POSSIBLE
879 __flush_dcache_page(page_address(page),
880 ((tlb_type == spitfire) &&
881 page_mapping(page) != NULL));
882#else
883 if (page_mapping(page) != NULL &&
884 tlb_type == spitfire)
885 __flush_icache_page(__pa(page_address(page)));
886#endif
887}
888
889void smp_flush_dcache_page_impl(struct page *page, int cpu)
890{
891 int this_cpu;
892
893 if (tlb_type == hypervisor)
894 return;
895
896#ifdef CONFIG_DEBUG_DCFLUSH
897 atomic_inc(&dcpage_flushes);
898#endif
899
900 this_cpu = get_cpu();
901
902 if (cpu == this_cpu) {
903 __local_flush_dcache_page(page);
904 } else if (cpu_online(cpu)) {
905 void *pg_addr = page_address(page);
906 u64 data0 = 0;
907
908 if (tlb_type == spitfire) {
909 data0 = ((u64)&xcall_flush_dcache_page_spitfire);
910 if (page_mapping(page) != NULL)
911 data0 |= ((u64)1 << 32);
912 } else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
913#ifdef DCACHE_ALIASING_POSSIBLE
914 data0 = ((u64)&xcall_flush_dcache_page_cheetah);
915#endif
916 }
917 if (data0) {
918 xcall_deliver(data0, __pa(pg_addr),
919 (u64) pg_addr, cpumask_of(cpu));
920#ifdef CONFIG_DEBUG_DCFLUSH
921 atomic_inc(&dcpage_flushes_xcall);
922#endif
923 }
924 }
925
926 put_cpu();
927}
928
929void flush_dcache_page_all(struct mm_struct *mm, struct page *page)
930{
931 void *pg_addr;
932 u64 data0;
933
934 if (tlb_type == hypervisor)
935 return;
936
937 preempt_disable();
938
939#ifdef CONFIG_DEBUG_DCFLUSH
940 atomic_inc(&dcpage_flushes);
941#endif
942 data0 = 0;
943 pg_addr = page_address(page);
944 if (tlb_type == spitfire) {
945 data0 = ((u64)&xcall_flush_dcache_page_spitfire);
946 if (page_mapping(page) != NULL)
947 data0 |= ((u64)1 << 32);
948 } else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
949#ifdef DCACHE_ALIASING_POSSIBLE
950 data0 = ((u64)&xcall_flush_dcache_page_cheetah);
951#endif
952 }
953 if (data0) {
954 xcall_deliver(data0, __pa(pg_addr),
955 (u64) pg_addr, cpu_online_mask);
956#ifdef CONFIG_DEBUG_DCFLUSH
957 atomic_inc(&dcpage_flushes_xcall);
958#endif
959 }
960 __local_flush_dcache_page(page);
961
962 preempt_enable();
963}
964
965void __irq_entry smp_new_mmu_context_version_client(int irq, struct pt_regs *regs)
966{
967 struct mm_struct *mm;
968 unsigned long flags;
969
970 clear_softint(1 << irq);
971
972
973
974
975 mm = current->active_mm;
976 if (unlikely(!mm || (mm == &init_mm)))
977 return;
978
979 spin_lock_irqsave(&mm->context.lock, flags);
980
981 if (unlikely(!CTX_VALID(mm->context)))
982 get_new_mmu_context(mm);
983
984 spin_unlock_irqrestore(&mm->context.lock, flags);
985
986 load_secondary_context(mm);
987 __flush_tlb_mm(CTX_HWBITS(mm->context),
988 SECONDARY_CONTEXT);
989}
990
991void smp_new_mmu_context_version(void)
992{
993 smp_cross_call(&xcall_new_mmu_context_version, 0, 0, 0);
994}
995
996#ifdef CONFIG_KGDB
997void kgdb_roundup_cpus(unsigned long flags)
998{
999 smp_cross_call(&xcall_kgdb_capture, 0, 0, 0);
1000}
1001#endif
1002
1003void smp_fetch_global_regs(void)
1004{
1005 smp_cross_call(&xcall_fetch_glob_regs, 0, 0, 0);
1006}
1007
1008void smp_fetch_global_pmu(void)
1009{
1010 if (tlb_type == hypervisor &&
1011 sun4v_chip_type >= SUN4V_CHIP_NIAGARA4)
1012 smp_cross_call(&xcall_fetch_glob_pmu_n4, 0, 0, 0);
1013 else
1014 smp_cross_call(&xcall_fetch_glob_pmu, 0, 0, 0);
1015}
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060void smp_flush_tlb_mm(struct mm_struct *mm)
1061{
1062 u32 ctx = CTX_HWBITS(mm->context);
1063 int cpu = get_cpu();
1064
1065 if (atomic_read(&mm->mm_users) == 1) {
1066 cpumask_copy(mm_cpumask(mm), cpumask_of(cpu));
1067 goto local_flush_and_out;
1068 }
1069
1070 smp_cross_call_masked(&xcall_flush_tlb_mm,
1071 ctx, 0, 0,
1072 mm_cpumask(mm));
1073
1074local_flush_and_out:
1075 __flush_tlb_mm(ctx, SECONDARY_CONTEXT);
1076
1077 put_cpu();
1078}
1079
1080struct tlb_pending_info {
1081 unsigned long ctx;
1082 unsigned long nr;
1083 unsigned long *vaddrs;
1084};
1085
1086static void tlb_pending_func(void *info)
1087{
1088 struct tlb_pending_info *t = info;
1089
1090 __flush_tlb_pending(t->ctx, t->nr, t->vaddrs);
1091}
1092
1093void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long *vaddrs)
1094{
1095 u32 ctx = CTX_HWBITS(mm->context);
1096 struct tlb_pending_info info;
1097 int cpu = get_cpu();
1098
1099 info.ctx = ctx;
1100 info.nr = nr;
1101 info.vaddrs = vaddrs;
1102
1103 if (mm == current->mm && atomic_read(&mm->mm_users) == 1)
1104 cpumask_copy(mm_cpumask(mm), cpumask_of(cpu));
1105 else
1106 smp_call_function_many(mm_cpumask(mm), tlb_pending_func,
1107 &info, 1);
1108
1109 __flush_tlb_pending(ctx, nr, vaddrs);
1110
1111 put_cpu();
1112}
1113
1114void smp_flush_tlb_page(struct mm_struct *mm, unsigned long vaddr)
1115{
1116 unsigned long context = CTX_HWBITS(mm->context);
1117 int cpu = get_cpu();
1118
1119 if (mm == current->mm && atomic_read(&mm->mm_users) == 1)
1120 cpumask_copy(mm_cpumask(mm), cpumask_of(cpu));
1121 else
1122 smp_cross_call_masked(&xcall_flush_tlb_page,
1123 context, vaddr, 0,
1124 mm_cpumask(mm));
1125 __flush_tlb_page(context, vaddr);
1126
1127 put_cpu();
1128}
1129
1130void smp_flush_tlb_kernel_range(unsigned long start, unsigned long end)
1131{
1132 start &= PAGE_MASK;
1133 end = PAGE_ALIGN(end);
1134 if (start != end) {
1135 smp_cross_call(&xcall_flush_tlb_kernel_range,
1136 0, start, end);
1137
1138 __flush_tlb_kernel_range(start, end);
1139 }
1140}
1141
1142
1143
1144extern unsigned long xcall_capture;
1145
1146static atomic_t smp_capture_depth = ATOMIC_INIT(0);
1147static atomic_t smp_capture_registry = ATOMIC_INIT(0);
1148static unsigned long penguins_are_doing_time;
1149
1150void smp_capture(void)
1151{
1152 int result = atomic_add_ret(1, &smp_capture_depth);
1153
1154 if (result == 1) {
1155 int ncpus = num_online_cpus();
1156
1157#ifdef CAPTURE_DEBUG
1158 printk("CPU[%d]: Sending penguins to jail...",
1159 smp_processor_id());
1160#endif
1161 penguins_are_doing_time = 1;
1162 atomic_inc(&smp_capture_registry);
1163 smp_cross_call(&xcall_capture, 0, 0, 0);
1164 while (atomic_read(&smp_capture_registry) != ncpus)
1165 rmb();
1166#ifdef CAPTURE_DEBUG
1167 printk("done\n");
1168#endif
1169 }
1170}
1171
1172void smp_release(void)
1173{
1174 if (atomic_dec_and_test(&smp_capture_depth)) {
1175#ifdef CAPTURE_DEBUG
1176 printk("CPU[%d]: Giving pardon to "
1177 "imprisoned penguins\n",
1178 smp_processor_id());
1179#endif
1180 penguins_are_doing_time = 0;
1181 membar_safe("#StoreLoad");
1182 atomic_dec(&smp_capture_registry);
1183 }
1184}
1185
1186
1187
1188
1189extern void prom_world(int);
1190
1191void __irq_entry smp_penguin_jailcell(int irq, struct pt_regs *regs)
1192{
1193 clear_softint(1 << irq);
1194
1195 preempt_disable();
1196
1197 __asm__ __volatile__("flushw");
1198 prom_world(1);
1199 atomic_inc(&smp_capture_registry);
1200 membar_safe("#StoreLoad");
1201 while (penguins_are_doing_time)
1202 rmb();
1203 atomic_dec(&smp_capture_registry);
1204 prom_world(0);
1205
1206 preempt_enable();
1207}
1208
1209
1210int setup_profiling_timer(unsigned int multiplier)
1211{
1212 return -EINVAL;
1213}
1214
1215void __init smp_prepare_cpus(unsigned int max_cpus)
1216{
1217}
1218
1219void smp_prepare_boot_cpu(void)
1220{
1221}
1222
1223void __init smp_setup_processor_id(void)
1224{
1225 if (tlb_type == spitfire)
1226 xcall_deliver_impl = spitfire_xcall_deliver;
1227 else if (tlb_type == cheetah || tlb_type == cheetah_plus)
1228 xcall_deliver_impl = cheetah_xcall_deliver;
1229 else
1230 xcall_deliver_impl = hypervisor_xcall_deliver;
1231}
1232
1233void smp_fill_in_sib_core_maps(void)
1234{
1235 unsigned int i;
1236
1237 for_each_present_cpu(i) {
1238 unsigned int j;
1239
1240 cpumask_clear(&cpu_core_map[i]);
1241 if (cpu_data(i).core_id == 0) {
1242 cpumask_set_cpu(i, &cpu_core_map[i]);
1243 continue;
1244 }
1245
1246 for_each_present_cpu(j) {
1247 if (cpu_data(i).core_id ==
1248 cpu_data(j).core_id)
1249 cpumask_set_cpu(j, &cpu_core_map[i]);
1250 }
1251 }
1252
1253 for_each_present_cpu(i) {
1254 unsigned int j;
1255
1256 cpumask_clear(&per_cpu(cpu_sibling_map, i));
1257 if (cpu_data(i).proc_id == -1) {
1258 cpumask_set_cpu(i, &per_cpu(cpu_sibling_map, i));
1259 continue;
1260 }
1261
1262 for_each_present_cpu(j) {
1263 if (cpu_data(i).proc_id ==
1264 cpu_data(j).proc_id)
1265 cpumask_set_cpu(j, &per_cpu(cpu_sibling_map, i));
1266 }
1267 }
1268}
1269
1270int __cpu_up(unsigned int cpu, struct task_struct *tidle)
1271{
1272 int ret = smp_boot_one_cpu(cpu, tidle);
1273
1274 if (!ret) {
1275 cpumask_set_cpu(cpu, &smp_commenced_mask);
1276 while (!cpu_online(cpu))
1277 mb();
1278 if (!cpu_online(cpu)) {
1279 ret = -ENODEV;
1280 } else {
1281
1282
1283
1284 if (tlb_type != hypervisor)
1285 smp_synchronize_one_tick(cpu);
1286 }
1287 }
1288 return ret;
1289}
1290
1291#ifdef CONFIG_HOTPLUG_CPU
1292void cpu_play_dead(void)
1293{
1294 int cpu = smp_processor_id();
1295 unsigned long pstate;
1296
1297 idle_task_exit();
1298
1299 if (tlb_type == hypervisor) {
1300 struct trap_per_cpu *tb = &trap_block[cpu];
1301
1302 sun4v_cpu_qconf(HV_CPU_QUEUE_CPU_MONDO,
1303 tb->cpu_mondo_pa, 0);
1304 sun4v_cpu_qconf(HV_CPU_QUEUE_DEVICE_MONDO,
1305 tb->dev_mondo_pa, 0);
1306 sun4v_cpu_qconf(HV_CPU_QUEUE_RES_ERROR,
1307 tb->resum_mondo_pa, 0);
1308 sun4v_cpu_qconf(HV_CPU_QUEUE_NONRES_ERROR,
1309 tb->nonresum_mondo_pa, 0);
1310 }
1311
1312 cpumask_clear_cpu(cpu, &smp_commenced_mask);
1313 membar_safe("#Sync");
1314
1315 local_irq_disable();
1316
1317 __asm__ __volatile__(
1318 "rdpr %%pstate, %0\n\t"
1319 "wrpr %0, %1, %%pstate"
1320 : "=r" (pstate)
1321 : "i" (PSTATE_IE));
1322
1323 while (1)
1324 barrier();
1325}
1326
1327int __cpu_disable(void)
1328{
1329 int cpu = smp_processor_id();
1330 cpuinfo_sparc *c;
1331 int i;
1332
1333 for_each_cpu(i, &cpu_core_map[cpu])
1334 cpumask_clear_cpu(cpu, &cpu_core_map[i]);
1335 cpumask_clear(&cpu_core_map[cpu]);
1336
1337 for_each_cpu(i, &per_cpu(cpu_sibling_map, cpu))
1338 cpumask_clear_cpu(cpu, &per_cpu(cpu_sibling_map, i));
1339 cpumask_clear(&per_cpu(cpu_sibling_map, cpu));
1340
1341 c = &cpu_data(cpu);
1342
1343 c->core_id = 0;
1344 c->proc_id = -1;
1345
1346 smp_wmb();
1347
1348
1349 fixup_irqs();
1350
1351 local_irq_enable();
1352 mdelay(1);
1353 local_irq_disable();
1354
1355 set_cpu_online(cpu, false);
1356
1357 cpu_map_rebuild();
1358
1359 return 0;
1360}
1361
1362void __cpu_die(unsigned int cpu)
1363{
1364 int i;
1365
1366 for (i = 0; i < 100; i++) {
1367 smp_rmb();
1368 if (!cpumask_test_cpu(cpu, &smp_commenced_mask))
1369 break;
1370 msleep(100);
1371 }
1372 if (cpumask_test_cpu(cpu, &smp_commenced_mask)) {
1373 printk(KERN_ERR "CPU %u didn't die...\n", cpu);
1374 } else {
1375#if defined(CONFIG_SUN_LDOMS)
1376 unsigned long hv_err;
1377 int limit = 100;
1378
1379 do {
1380 hv_err = sun4v_cpu_stop(cpu);
1381 if (hv_err == HV_EOK) {
1382 set_cpu_present(cpu, false);
1383 break;
1384 }
1385 } while (--limit > 0);
1386 if (limit <= 0) {
1387 printk(KERN_ERR "sun4v_cpu_stop() fails err=%lu\n",
1388 hv_err);
1389 }
1390#endif
1391 }
1392}
1393#endif
1394
1395void __init smp_cpus_done(unsigned int max_cpus)
1396{
1397 pcr_arch_init();
1398}
1399
1400void smp_send_reschedule(int cpu)
1401{
1402 xcall_deliver((u64) &xcall_receive_signal, 0, 0,
1403 cpumask_of(cpu));
1404}
1405
1406void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs)
1407{
1408 clear_softint(1 << irq);
1409 scheduler_ipi();
1410}
1411
1412
1413
1414
1415void smp_send_stop(void)
1416{
1417}
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
1433 size_t align)
1434{
1435 const unsigned long goal = __pa(MAX_DMA_ADDRESS);
1436#ifdef CONFIG_NEED_MULTIPLE_NODES
1437 int node = cpu_to_node(cpu);
1438 void *ptr;
1439
1440 if (!node_online(node) || !NODE_DATA(node)) {
1441 ptr = __alloc_bootmem(size, align, goal);
1442 pr_info("cpu %d has no node %d or node-local memory\n",
1443 cpu, node);
1444 pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
1445 cpu, size, __pa(ptr));
1446 } else {
1447 ptr = __alloc_bootmem_node(NODE_DATA(node),
1448 size, align, goal);
1449 pr_debug("per cpu data for cpu%d %lu bytes on node%d at "
1450 "%016lx\n", cpu, size, node, __pa(ptr));
1451 }
1452 return ptr;
1453#else
1454 return __alloc_bootmem(size, align, goal);
1455#endif
1456}
1457
1458static void __init pcpu_free_bootmem(void *ptr, size_t size)
1459{
1460 free_bootmem(__pa(ptr), size);
1461}
1462
1463static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
1464{
1465 if (cpu_to_node(from) == cpu_to_node(to))
1466 return LOCAL_DISTANCE;
1467 else
1468 return REMOTE_DISTANCE;
1469}
1470
1471static void __init pcpu_populate_pte(unsigned long addr)
1472{
1473 pgd_t *pgd = pgd_offset_k(addr);
1474 pud_t *pud;
1475 pmd_t *pmd;
1476
1477 pud = pud_offset(pgd, addr);
1478 if (pud_none(*pud)) {
1479 pmd_t *new;
1480
1481 new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
1482 pud_populate(&init_mm, pud, new);
1483 }
1484
1485 pmd = pmd_offset(pud, addr);
1486 if (!pmd_present(*pmd)) {
1487 pte_t *new;
1488
1489 new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
1490 pmd_populate_kernel(&init_mm, pmd, new);
1491 }
1492}
1493
1494void __init setup_per_cpu_areas(void)
1495{
1496 unsigned long delta;
1497 unsigned int cpu;
1498 int rc = -EINVAL;
1499
1500 if (pcpu_chosen_fc != PCPU_FC_PAGE) {
1501 rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
1502 PERCPU_DYNAMIC_RESERVE, 4 << 20,
1503 pcpu_cpu_distance,
1504 pcpu_alloc_bootmem,
1505 pcpu_free_bootmem);
1506 if (rc)
1507 pr_warning("PERCPU: %s allocator failed (%d), "
1508 "falling back to page size\n",
1509 pcpu_fc_names[pcpu_chosen_fc], rc);
1510 }
1511 if (rc < 0)
1512 rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
1513 pcpu_alloc_bootmem,
1514 pcpu_free_bootmem,
1515 pcpu_populate_pte);
1516 if (rc < 0)
1517 panic("cannot initialize percpu area (err=%d)", rc);
1518
1519 delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
1520 for_each_possible_cpu(cpu)
1521 __per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu];
1522
1523
1524 __local_per_cpu_offset = __per_cpu_offset(smp_processor_id());
1525
1526 of_fill_in_cpu_data();
1527 if (tlb_type == hypervisor)
1528 mdesc_fill_in_cpu_data(cpu_all_mask);
1529}
1530