1
2
3
4
5
6#include <linux/export.h>
7#include <linux/kernel.h>
8#include <linux/sched.h>
9#include <linux/mm.h>
10#include <linux/pagemap.h>
11#include <linux/threads.h>
12#include <linux/smp.h>
13#include <linux/interrupt.h>
14#include <linux/kernel_stat.h>
15#include <linux/delay.h>
16#include <linux/init.h>
17#include <linux/spinlock.h>
18#include <linux/fs.h>
19#include <linux/seq_file.h>
20#include <linux/cache.h>
21#include <linux/jiffies.h>
22#include <linux/profile.h>
23#include <linux/bootmem.h>
24#include <linux/vmalloc.h>
25#include <linux/ftrace.h>
26#include <linux/cpu.h>
27#include <linux/slab.h>
28
29#include <asm/head.h>
30#include <asm/ptrace.h>
31#include <linux/atomic.h>
32#include <asm/tlbflush.h>
33#include <asm/mmu_context.h>
34#include <asm/cpudata.h>
35#include <asm/hvtramp.h>
36#include <asm/io.h>
37#include <asm/timer.h>
38
39#include <asm/irq.h>
40#include <asm/irq_regs.h>
41#include <asm/page.h>
42#include <asm/pgtable.h>
43#include <asm/oplib.h>
44#include <asm/uaccess.h>
45#include <asm/starfire.h>
46#include <asm/tlb.h>
47#include <asm/sections.h>
48#include <asm/prom.h>
49#include <asm/mdesc.h>
50#include <asm/ldc.h>
51#include <asm/hypervisor.h>
52#include <asm/pcr.h>
53
54#include "cpumap.h"
55
56int sparc64_multi_core __read_mostly;
57
58DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE;
59cpumask_t cpu_core_map[NR_CPUS] __read_mostly =
60 { [0 ... NR_CPUS-1] = CPU_MASK_NONE };
61
62EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
63EXPORT_SYMBOL(cpu_core_map);
64
65static cpumask_t smp_commenced_mask;
66
67void smp_info(struct seq_file *m)
68{
69 int i;
70
71 seq_printf(m, "State:\n");
72 for_each_online_cpu(i)
73 seq_printf(m, "CPU%d:\t\tonline\n", i);
74}
75
76void smp_bogo(struct seq_file *m)
77{
78 int i;
79
80 for_each_online_cpu(i)
81 seq_printf(m,
82 "Cpu%dClkTck\t: %016lx\n",
83 i, cpu_data(i).clock_tick);
84}
85
86extern void setup_sparc64_timer(void);
87
88static volatile unsigned long callin_flag = 0;
89
90void smp_callin(void)
91{
92 int cpuid = hard_smp_processor_id();
93
94 __local_per_cpu_offset = __per_cpu_offset(cpuid);
95
96 if (tlb_type == hypervisor)
97 sun4v_ktsb_register();
98
99 __flush_tlb_all();
100
101 setup_sparc64_timer();
102
103 if (cheetah_pcache_forced_on)
104 cheetah_enable_pcache();
105
106 callin_flag = 1;
107 __asm__ __volatile__("membar #Sync\n\t"
108 "flush %%g6" : : : "memory");
109
110
111
112
113 current_thread_info()->new_child = 0;
114
115
116 atomic_inc(&init_mm.mm_count);
117 current->active_mm = &init_mm;
118
119
120 notify_cpu_starting(cpuid);
121
122 while (!cpumask_test_cpu(cpuid, &smp_commenced_mask))
123 rmb();
124
125 set_cpu_online(cpuid, true);
126
127
128 preempt_disable();
129
130 local_irq_enable();
131
132 cpu_startup_entry(CPUHP_ONLINE);
133}
134
135void cpu_panic(void)
136{
137 printk("CPU[%d]: Returns from cpu_idle!\n", smp_processor_id());
138 panic("SMP bolixed\n");
139}
140
141
142
143
144
145
146
147
148#define MASTER 0
149#define SLAVE (SMP_CACHE_BYTES/sizeof(unsigned long))
150
151#define NUM_ROUNDS 64
152#define NUM_ITERS 5
153
154static DEFINE_SPINLOCK(itc_sync_lock);
155static unsigned long go[SLAVE + 1];
156
157#define DEBUG_TICK_SYNC 0
158
159static inline long get_delta (long *rt, long *master)
160{
161 unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0;
162 unsigned long tcenter, t0, t1, tm;
163 unsigned long i;
164
165 for (i = 0; i < NUM_ITERS; i++) {
166 t0 = tick_ops->get_tick();
167 go[MASTER] = 1;
168 membar_safe("#StoreLoad");
169 while (!(tm = go[SLAVE]))
170 rmb();
171 go[SLAVE] = 0;
172 wmb();
173 t1 = tick_ops->get_tick();
174
175 if (t1 - t0 < best_t1 - best_t0)
176 best_t0 = t0, best_t1 = t1, best_tm = tm;
177 }
178
179 *rt = best_t1 - best_t0;
180 *master = best_tm - best_t0;
181
182
183 tcenter = (best_t0/2 + best_t1/2);
184 if (best_t0 % 2 + best_t1 % 2 == 2)
185 tcenter++;
186 return tcenter - best_tm;
187}
188
189void smp_synchronize_tick_client(void)
190{
191 long i, delta, adj, adjust_latency = 0, done = 0;
192 unsigned long flags, rt, master_time_stamp;
193#if DEBUG_TICK_SYNC
194 struct {
195 long rt;
196 long master;
197 long diff;
198 long lat;
199 } t[NUM_ROUNDS];
200#endif
201
202 go[MASTER] = 1;
203
204 while (go[MASTER])
205 rmb();
206
207 local_irq_save(flags);
208 {
209 for (i = 0; i < NUM_ROUNDS; i++) {
210 delta = get_delta(&rt, &master_time_stamp);
211 if (delta == 0)
212 done = 1;
213
214 if (!done) {
215 if (i > 0) {
216 adjust_latency += -delta;
217 adj = -delta + adjust_latency/4;
218 } else
219 adj = -delta;
220
221 tick_ops->add_tick(adj);
222 }
223#if DEBUG_TICK_SYNC
224 t[i].rt = rt;
225 t[i].master = master_time_stamp;
226 t[i].diff = delta;
227 t[i].lat = adjust_latency/4;
228#endif
229 }
230 }
231 local_irq_restore(flags);
232
233#if DEBUG_TICK_SYNC
234 for (i = 0; i < NUM_ROUNDS; i++)
235 printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n",
236 t[i].rt, t[i].master, t[i].diff, t[i].lat);
237#endif
238
239 printk(KERN_INFO "CPU %d: synchronized TICK with master CPU "
240 "(last diff %ld cycles, maxerr %lu cycles)\n",
241 smp_processor_id(), delta, rt);
242}
243
244static void smp_start_sync_tick_client(int cpu);
245
246static void smp_synchronize_one_tick(int cpu)
247{
248 unsigned long flags, i;
249
250 go[MASTER] = 0;
251
252 smp_start_sync_tick_client(cpu);
253
254
255 while (!go[MASTER])
256 rmb();
257
258
259 go[MASTER] = 0;
260 membar_safe("#StoreLoad");
261
262 spin_lock_irqsave(&itc_sync_lock, flags);
263 {
264 for (i = 0; i < NUM_ROUNDS*NUM_ITERS; i++) {
265 while (!go[MASTER])
266 rmb();
267 go[MASTER] = 0;
268 wmb();
269 go[SLAVE] = tick_ops->get_tick();
270 membar_safe("#StoreLoad");
271 }
272 }
273 spin_unlock_irqrestore(&itc_sync_lock, flags);
274}
275
276#if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU)
277
278static unsigned long kimage_addr_to_ra(void *p)
279{
280 unsigned long val = (unsigned long) p;
281
282 return kern_base + (val - KERNBASE);
283}
284
285static void ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg,
286 void **descrp)
287{
288 extern unsigned long sparc64_ttable_tl0;
289 extern unsigned long kern_locked_tte_data;
290 struct hvtramp_descr *hdesc;
291 unsigned long trampoline_ra;
292 struct trap_per_cpu *tb;
293 u64 tte_vaddr, tte_data;
294 unsigned long hv_err;
295 int i;
296
297 hdesc = kzalloc(sizeof(*hdesc) +
298 (sizeof(struct hvtramp_mapping) *
299 num_kernel_image_mappings - 1),
300 GFP_KERNEL);
301 if (!hdesc) {
302 printk(KERN_ERR "ldom_startcpu_cpuid: Cannot allocate "
303 "hvtramp_descr.\n");
304 return;
305 }
306 *descrp = hdesc;
307
308 hdesc->cpu = cpu;
309 hdesc->num_mappings = num_kernel_image_mappings;
310
311 tb = &trap_block[cpu];
312
313 hdesc->fault_info_va = (unsigned long) &tb->fault_info;
314 hdesc->fault_info_pa = kimage_addr_to_ra(&tb->fault_info);
315
316 hdesc->thread_reg = thread_reg;
317
318 tte_vaddr = (unsigned long) KERNBASE;
319 tte_data = kern_locked_tte_data;
320
321 for (i = 0; i < hdesc->num_mappings; i++) {
322 hdesc->maps[i].vaddr = tte_vaddr;
323 hdesc->maps[i].tte = tte_data;
324 tte_vaddr += 0x400000;
325 tte_data += 0x400000;
326 }
327
328 trampoline_ra = kimage_addr_to_ra(hv_cpu_startup);
329
330 hv_err = sun4v_cpu_start(cpu, trampoline_ra,
331 kimage_addr_to_ra(&sparc64_ttable_tl0),
332 __pa(hdesc));
333 if (hv_err)
334 printk(KERN_ERR "ldom_startcpu_cpuid: sun4v_cpu_start() "
335 "gives error %lu\n", hv_err);
336}
337#endif
338
339extern unsigned long sparc64_cpu_startup;
340
341
342
343
344
345static struct thread_info *cpu_new_thread = NULL;
346
347static int smp_boot_one_cpu(unsigned int cpu, struct task_struct *idle)
348{
349 unsigned long entry =
350 (unsigned long)(&sparc64_cpu_startup);
351 unsigned long cookie =
352 (unsigned long)(&cpu_new_thread);
353 void *descr = NULL;
354 int timeout, ret;
355
356 callin_flag = 0;
357 cpu_new_thread = task_thread_info(idle);
358
359 if (tlb_type == hypervisor) {
360#if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU)
361 if (ldom_domaining_enabled)
362 ldom_startcpu_cpuid(cpu,
363 (unsigned long) cpu_new_thread,
364 &descr);
365 else
366#endif
367 prom_startcpu_cpuid(cpu, entry, cookie);
368 } else {
369 struct device_node *dp = of_find_node_by_cpuid(cpu);
370
371 prom_startcpu(dp->phandle, entry, cookie);
372 }
373
374 for (timeout = 0; timeout < 50000; timeout++) {
375 if (callin_flag)
376 break;
377 udelay(100);
378 }
379
380 if (callin_flag) {
381 ret = 0;
382 } else {
383 printk("Processor %d is stuck.\n", cpu);
384 ret = -ENODEV;
385 }
386 cpu_new_thread = NULL;
387
388 kfree(descr);
389
390 return ret;
391}
392
393static void spitfire_xcall_helper(u64 data0, u64 data1, u64 data2, u64 pstate, unsigned long cpu)
394{
395 u64 result, target;
396 int stuck, tmp;
397
398 if (this_is_starfire) {
399
400 cpu = (((cpu & 0x3c) << 1) |
401 ((cpu & 0x40) >> 4) |
402 (cpu & 0x3));
403 }
404
405 target = (cpu << 14) | 0x70;
406again:
407
408
409
410
411
412
413
414 tmp = 0x40;
415 __asm__ __volatile__(
416 "wrpr %1, %2, %%pstate\n\t"
417 "stxa %4, [%0] %3\n\t"
418 "stxa %5, [%0+%8] %3\n\t"
419 "add %0, %8, %0\n\t"
420 "stxa %6, [%0+%8] %3\n\t"
421 "membar #Sync\n\t"
422 "stxa %%g0, [%7] %3\n\t"
423 "membar #Sync\n\t"
424 "mov 0x20, %%g1\n\t"
425 "ldxa [%%g1] 0x7f, %%g0\n\t"
426 "membar #Sync"
427 : "=r" (tmp)
428 : "r" (pstate), "i" (PSTATE_IE), "i" (ASI_INTR_W),
429 "r" (data0), "r" (data1), "r" (data2), "r" (target),
430 "r" (0x10), "0" (tmp)
431 : "g1");
432
433
434 stuck = 100000;
435 do {
436 __asm__ __volatile__("ldxa [%%g0] %1, %0"
437 : "=r" (result)
438 : "i" (ASI_INTR_DISPATCH_STAT));
439 if (result == 0) {
440 __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
441 : : "r" (pstate));
442 return;
443 }
444 stuck -= 1;
445 if (stuck == 0)
446 break;
447 } while (result & 0x1);
448 __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
449 : : "r" (pstate));
450 if (stuck == 0) {
451 printk("CPU[%d]: mondo stuckage result[%016llx]\n",
452 smp_processor_id(), result);
453 } else {
454 udelay(2);
455 goto again;
456 }
457}
458
459static void spitfire_xcall_deliver(struct trap_per_cpu *tb, int cnt)
460{
461 u64 *mondo, data0, data1, data2;
462 u16 *cpu_list;
463 u64 pstate;
464 int i;
465
466 __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
467 cpu_list = __va(tb->cpu_list_pa);
468 mondo = __va(tb->cpu_mondo_block_pa);
469 data0 = mondo[0];
470 data1 = mondo[1];
471 data2 = mondo[2];
472 for (i = 0; i < cnt; i++)
473 spitfire_xcall_helper(data0, data1, data2, pstate, cpu_list[i]);
474}
475
476
477
478
479
480static void cheetah_xcall_deliver(struct trap_per_cpu *tb, int cnt)
481{
482 int nack_busy_id, is_jbus, need_more;
483 u64 *mondo, pstate, ver, busy_mask;
484 u16 *cpu_list;
485
486 cpu_list = __va(tb->cpu_list_pa);
487 mondo = __va(tb->cpu_mondo_block_pa);
488
489
490
491
492
493 __asm__ ("rdpr %%ver, %0" : "=r" (ver));
494 is_jbus = ((ver >> 32) == __JALAPENO_ID ||
495 (ver >> 32) == __SERRANO_ID);
496
497 __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
498
499retry:
500 need_more = 0;
501 __asm__ __volatile__("wrpr %0, %1, %%pstate\n\t"
502 : : "r" (pstate), "i" (PSTATE_IE));
503
504
505 __asm__ __volatile__("stxa %0, [%3] %6\n\t"
506 "stxa %1, [%4] %6\n\t"
507 "stxa %2, [%5] %6\n\t"
508 "membar #Sync\n\t"
509 :
510 : "r" (mondo[0]), "r" (mondo[1]), "r" (mondo[2]),
511 "r" (0x40), "r" (0x50), "r" (0x60),
512 "i" (ASI_INTR_W));
513
514 nack_busy_id = 0;
515 busy_mask = 0;
516 {
517 int i;
518
519 for (i = 0; i < cnt; i++) {
520 u64 target, nr;
521
522 nr = cpu_list[i];
523 if (nr == 0xffff)
524 continue;
525
526 target = (nr << 14) | 0x70;
527 if (is_jbus) {
528 busy_mask |= (0x1UL << (nr * 2));
529 } else {
530 target |= (nack_busy_id << 24);
531 busy_mask |= (0x1UL <<
532 (nack_busy_id * 2));
533 }
534 __asm__ __volatile__(
535 "stxa %%g0, [%0] %1\n\t"
536 "membar #Sync\n\t"
537 :
538 : "r" (target), "i" (ASI_INTR_W));
539 nack_busy_id++;
540 if (nack_busy_id == 32) {
541 need_more = 1;
542 break;
543 }
544 }
545 }
546
547
548 {
549 u64 dispatch_stat, nack_mask;
550 long stuck;
551
552 stuck = 100000 * nack_busy_id;
553 nack_mask = busy_mask << 1;
554 do {
555 __asm__ __volatile__("ldxa [%%g0] %1, %0"
556 : "=r" (dispatch_stat)
557 : "i" (ASI_INTR_DISPATCH_STAT));
558 if (!(dispatch_stat & (busy_mask | nack_mask))) {
559 __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
560 : : "r" (pstate));
561 if (unlikely(need_more)) {
562 int i, this_cnt = 0;
563 for (i = 0; i < cnt; i++) {
564 if (cpu_list[i] == 0xffff)
565 continue;
566 cpu_list[i] = 0xffff;
567 this_cnt++;
568 if (this_cnt == 32)
569 break;
570 }
571 goto retry;
572 }
573 return;
574 }
575 if (!--stuck)
576 break;
577 } while (dispatch_stat & busy_mask);
578
579 __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
580 : : "r" (pstate));
581
582 if (dispatch_stat & busy_mask) {
583
584
585
586 printk("CPU[%d]: mondo stuckage result[%016llx]\n",
587 smp_processor_id(), dispatch_stat);
588 } else {
589 int i, this_busy_nack = 0;
590
591
592
593
594 udelay(2 * nack_busy_id);
595
596
597
598
599 for (i = 0; i < cnt; i++) {
600 u64 check_mask, nr;
601
602 nr = cpu_list[i];
603 if (nr == 0xffff)
604 continue;
605
606 if (is_jbus)
607 check_mask = (0x2UL << (2*nr));
608 else
609 check_mask = (0x2UL <<
610 this_busy_nack);
611 if ((dispatch_stat & check_mask) == 0)
612 cpu_list[i] = 0xffff;
613 this_busy_nack += 2;
614 if (this_busy_nack == 64)
615 break;
616 }
617
618 goto retry;
619 }
620 }
621}
622
623
624static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt)
625{
626 int retries, this_cpu, prev_sent, i, saw_cpu_error;
627 unsigned long status;
628 u16 *cpu_list;
629
630 this_cpu = smp_processor_id();
631
632 cpu_list = __va(tb->cpu_list_pa);
633
634 saw_cpu_error = 0;
635 retries = 0;
636 prev_sent = 0;
637 do {
638 int forward_progress, n_sent;
639
640 status = sun4v_cpu_mondo_send(cnt,
641 tb->cpu_list_pa,
642 tb->cpu_mondo_block_pa);
643
644
645 if (likely(status == HV_EOK))
646 break;
647
648
649
650
651
652
653 n_sent = 0;
654 for (i = 0; i < cnt; i++) {
655 if (likely(cpu_list[i] == 0xffff))
656 n_sent++;
657 }
658
659 forward_progress = 0;
660 if (n_sent > prev_sent)
661 forward_progress = 1;
662
663 prev_sent = n_sent;
664
665
666
667
668
669 if (unlikely(status == HV_ECPUERROR)) {
670 for (i = 0; i < cnt; i++) {
671 long err;
672 u16 cpu;
673
674 cpu = cpu_list[i];
675 if (cpu == 0xffff)
676 continue;
677
678 err = sun4v_cpu_state(cpu);
679 if (err == HV_CPU_STATE_ERROR) {
680 saw_cpu_error = (cpu + 1);
681 cpu_list[i] = 0xffff;
682 }
683 }
684 } else if (unlikely(status != HV_EWOULDBLOCK))
685 goto fatal_mondo_error;
686
687
688
689
690
691
692
693
694 if (unlikely(!forward_progress)) {
695 if (unlikely(++retries > 10000))
696 goto fatal_mondo_timeout;
697
698
699
700
701 udelay(2 * cnt);
702 }
703 } while (1);
704
705 if (unlikely(saw_cpu_error))
706 goto fatal_mondo_cpu_error;
707
708 return;
709
710fatal_mondo_cpu_error:
711 printk(KERN_CRIT "CPU[%d]: SUN4V mondo cpu error, some target cpus "
712 "(including %d) were in error state\n",
713 this_cpu, saw_cpu_error - 1);
714 return;
715
716fatal_mondo_timeout:
717 printk(KERN_CRIT "CPU[%d]: SUN4V mondo timeout, no forward "
718 " progress after %d retries.\n",
719 this_cpu, retries);
720 goto dump_cpu_list_and_out;
721
722fatal_mondo_error:
723 printk(KERN_CRIT "CPU[%d]: Unexpected SUN4V mondo error %lu\n",
724 this_cpu, status);
725 printk(KERN_CRIT "CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) "
726 "mondo_block_pa(%lx)\n",
727 this_cpu, cnt, tb->cpu_list_pa, tb->cpu_mondo_block_pa);
728
729dump_cpu_list_and_out:
730 printk(KERN_CRIT "CPU[%d]: CPU list [ ", this_cpu);
731 for (i = 0; i < cnt; i++)
732 printk("%u ", cpu_list[i]);
733 printk("]\n");
734}
735
736static void (*xcall_deliver_impl)(struct trap_per_cpu *, int);
737
738static void xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask)
739{
740 struct trap_per_cpu *tb;
741 int this_cpu, i, cnt;
742 unsigned long flags;
743 u16 *cpu_list;
744 u64 *mondo;
745
746
747
748
749
750
751
752
753
754
755
756 local_irq_save(flags);
757
758 this_cpu = smp_processor_id();
759 tb = &trap_block[this_cpu];
760
761 mondo = __va(tb->cpu_mondo_block_pa);
762 mondo[0] = data0;
763 mondo[1] = data1;
764 mondo[2] = data2;
765 wmb();
766
767 cpu_list = __va(tb->cpu_list_pa);
768
769
770 cnt = 0;
771 for_each_cpu(i, mask) {
772 if (i == this_cpu || !cpu_online(i))
773 continue;
774 cpu_list[cnt++] = i;
775 }
776
777 if (cnt)
778 xcall_deliver_impl(tb, cnt);
779
780 local_irq_restore(flags);
781}
782
783
784
785
786
787static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 data2, const cpumask_t *mask)
788{
789 u64 data0 = (((u64)ctx)<<32 | (((u64)func) & 0xffffffff));
790
791 xcall_deliver(data0, data1, data2, mask);
792}
793
794
795static void smp_cross_call(unsigned long *func, u32 ctx, u64 data1, u64 data2)
796{
797 smp_cross_call_masked(func, ctx, data1, data2, cpu_online_mask);
798}
799
800extern unsigned long xcall_sync_tick;
801
802static void smp_start_sync_tick_client(int cpu)
803{
804 xcall_deliver((u64) &xcall_sync_tick, 0, 0,
805 cpumask_of(cpu));
806}
807
808extern unsigned long xcall_call_function;
809
810void arch_send_call_function_ipi_mask(const struct cpumask *mask)
811{
812 xcall_deliver((u64) &xcall_call_function, 0, 0, mask);
813}
814
815extern unsigned long xcall_call_function_single;
816
817void arch_send_call_function_single_ipi(int cpu)
818{
819 xcall_deliver((u64) &xcall_call_function_single, 0, 0,
820 cpumask_of(cpu));
821}
822
823void __irq_entry smp_call_function_client(int irq, struct pt_regs *regs)
824{
825 clear_softint(1 << irq);
826 generic_smp_call_function_interrupt();
827}
828
829void __irq_entry smp_call_function_single_client(int irq, struct pt_regs *regs)
830{
831 clear_softint(1 << irq);
832 generic_smp_call_function_single_interrupt();
833}
834
835static void tsb_sync(void *info)
836{
837 struct trap_per_cpu *tp = &trap_block[raw_smp_processor_id()];
838 struct mm_struct *mm = info;
839
840
841
842
843
844
845
846 if (tp->pgd_paddr == __pa(mm->pgd))
847 tsb_context_switch(mm);
848}
849
850void smp_tsb_sync(struct mm_struct *mm)
851{
852 smp_call_function_many(mm_cpumask(mm), tsb_sync, mm, 1);
853}
854
855extern unsigned long xcall_flush_tlb_mm;
856extern unsigned long xcall_flush_tlb_page;
857extern unsigned long xcall_flush_tlb_kernel_range;
858extern unsigned long xcall_fetch_glob_regs;
859extern unsigned long xcall_fetch_glob_pmu;
860extern unsigned long xcall_fetch_glob_pmu_n4;
861extern unsigned long xcall_receive_signal;
862extern unsigned long xcall_new_mmu_context_version;
863#ifdef CONFIG_KGDB
864extern unsigned long xcall_kgdb_capture;
865#endif
866
867#ifdef DCACHE_ALIASING_POSSIBLE
868extern unsigned long xcall_flush_dcache_page_cheetah;
869#endif
870extern unsigned long xcall_flush_dcache_page_spitfire;
871
872#ifdef CONFIG_DEBUG_DCFLUSH
873extern atomic_t dcpage_flushes;
874extern atomic_t dcpage_flushes_xcall;
875#endif
876
877static inline void __local_flush_dcache_page(struct page *page)
878{
879#ifdef DCACHE_ALIASING_POSSIBLE
880 __flush_dcache_page(page_address(page),
881 ((tlb_type == spitfire) &&
882 page_mapping(page) != NULL));
883#else
884 if (page_mapping(page) != NULL &&
885 tlb_type == spitfire)
886 __flush_icache_page(__pa(page_address(page)));
887#endif
888}
889
890void smp_flush_dcache_page_impl(struct page *page, int cpu)
891{
892 int this_cpu;
893
894 if (tlb_type == hypervisor)
895 return;
896
897#ifdef CONFIG_DEBUG_DCFLUSH
898 atomic_inc(&dcpage_flushes);
899#endif
900
901 this_cpu = get_cpu();
902
903 if (cpu == this_cpu) {
904 __local_flush_dcache_page(page);
905 } else if (cpu_online(cpu)) {
906 void *pg_addr = page_address(page);
907 u64 data0 = 0;
908
909 if (tlb_type == spitfire) {
910 data0 = ((u64)&xcall_flush_dcache_page_spitfire);
911 if (page_mapping(page) != NULL)
912 data0 |= ((u64)1 << 32);
913 } else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
914#ifdef DCACHE_ALIASING_POSSIBLE
915 data0 = ((u64)&xcall_flush_dcache_page_cheetah);
916#endif
917 }
918 if (data0) {
919 xcall_deliver(data0, __pa(pg_addr),
920 (u64) pg_addr, cpumask_of(cpu));
921#ifdef CONFIG_DEBUG_DCFLUSH
922 atomic_inc(&dcpage_flushes_xcall);
923#endif
924 }
925 }
926
927 put_cpu();
928}
929
930void flush_dcache_page_all(struct mm_struct *mm, struct page *page)
931{
932 void *pg_addr;
933 u64 data0;
934
935 if (tlb_type == hypervisor)
936 return;
937
938 preempt_disable();
939
940#ifdef CONFIG_DEBUG_DCFLUSH
941 atomic_inc(&dcpage_flushes);
942#endif
943 data0 = 0;
944 pg_addr = page_address(page);
945 if (tlb_type == spitfire) {
946 data0 = ((u64)&xcall_flush_dcache_page_spitfire);
947 if (page_mapping(page) != NULL)
948 data0 |= ((u64)1 << 32);
949 } else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
950#ifdef DCACHE_ALIASING_POSSIBLE
951 data0 = ((u64)&xcall_flush_dcache_page_cheetah);
952#endif
953 }
954 if (data0) {
955 xcall_deliver(data0, __pa(pg_addr),
956 (u64) pg_addr, cpu_online_mask);
957#ifdef CONFIG_DEBUG_DCFLUSH
958 atomic_inc(&dcpage_flushes_xcall);
959#endif
960 }
961 __local_flush_dcache_page(page);
962
963 preempt_enable();
964}
965
966void __irq_entry smp_new_mmu_context_version_client(int irq, struct pt_regs *regs)
967{
968 struct mm_struct *mm;
969 unsigned long flags;
970
971 clear_softint(1 << irq);
972
973
974
975
976 mm = current->active_mm;
977 if (unlikely(!mm || (mm == &init_mm)))
978 return;
979
980 spin_lock_irqsave(&mm->context.lock, flags);
981
982 if (unlikely(!CTX_VALID(mm->context)))
983 get_new_mmu_context(mm);
984
985 spin_unlock_irqrestore(&mm->context.lock, flags);
986
987 load_secondary_context(mm);
988 __flush_tlb_mm(CTX_HWBITS(mm->context),
989 SECONDARY_CONTEXT);
990}
991
992void smp_new_mmu_context_version(void)
993{
994 smp_cross_call(&xcall_new_mmu_context_version, 0, 0, 0);
995}
996
997#ifdef CONFIG_KGDB
998void kgdb_roundup_cpus(unsigned long flags)
999{
1000 smp_cross_call(&xcall_kgdb_capture, 0, 0, 0);
1001}
1002#endif
1003
1004void smp_fetch_global_regs(void)
1005{
1006 smp_cross_call(&xcall_fetch_glob_regs, 0, 0, 0);
1007}
1008
1009void smp_fetch_global_pmu(void)
1010{
1011 if (tlb_type == hypervisor &&
1012 sun4v_chip_type >= SUN4V_CHIP_NIAGARA4)
1013 smp_cross_call(&xcall_fetch_glob_pmu_n4, 0, 0, 0);
1014 else
1015 smp_cross_call(&xcall_fetch_glob_pmu, 0, 0, 0);
1016}
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061void smp_flush_tlb_mm(struct mm_struct *mm)
1062{
1063 u32 ctx = CTX_HWBITS(mm->context);
1064 int cpu = get_cpu();
1065
1066 if (atomic_read(&mm->mm_users) == 1) {
1067 cpumask_copy(mm_cpumask(mm), cpumask_of(cpu));
1068 goto local_flush_and_out;
1069 }
1070
1071 smp_cross_call_masked(&xcall_flush_tlb_mm,
1072 ctx, 0, 0,
1073 mm_cpumask(mm));
1074
1075local_flush_and_out:
1076 __flush_tlb_mm(ctx, SECONDARY_CONTEXT);
1077
1078 put_cpu();
1079}
1080
1081struct tlb_pending_info {
1082 unsigned long ctx;
1083 unsigned long nr;
1084 unsigned long *vaddrs;
1085};
1086
1087static void tlb_pending_func(void *info)
1088{
1089 struct tlb_pending_info *t = info;
1090
1091 __flush_tlb_pending(t->ctx, t->nr, t->vaddrs);
1092}
1093
1094void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long *vaddrs)
1095{
1096 u32 ctx = CTX_HWBITS(mm->context);
1097 struct tlb_pending_info info;
1098 int cpu = get_cpu();
1099
1100 info.ctx = ctx;
1101 info.nr = nr;
1102 info.vaddrs = vaddrs;
1103
1104 if (mm == current->mm && atomic_read(&mm->mm_users) == 1)
1105 cpumask_copy(mm_cpumask(mm), cpumask_of(cpu));
1106 else
1107 smp_call_function_many(mm_cpumask(mm), tlb_pending_func,
1108 &info, 1);
1109
1110 __flush_tlb_pending(ctx, nr, vaddrs);
1111
1112 put_cpu();
1113}
1114
1115void smp_flush_tlb_page(struct mm_struct *mm, unsigned long vaddr)
1116{
1117 unsigned long context = CTX_HWBITS(mm->context);
1118 int cpu = get_cpu();
1119
1120 if (mm == current->mm && atomic_read(&mm->mm_users) == 1)
1121 cpumask_copy(mm_cpumask(mm), cpumask_of(cpu));
1122 else
1123 smp_cross_call_masked(&xcall_flush_tlb_page,
1124 context, vaddr, 0,
1125 mm_cpumask(mm));
1126 __flush_tlb_page(context, vaddr);
1127
1128 put_cpu();
1129}
1130
1131void smp_flush_tlb_kernel_range(unsigned long start, unsigned long end)
1132{
1133 start &= PAGE_MASK;
1134 end = PAGE_ALIGN(end);
1135 if (start != end) {
1136 smp_cross_call(&xcall_flush_tlb_kernel_range,
1137 0, start, end);
1138
1139 __flush_tlb_kernel_range(start, end);
1140 }
1141}
1142
1143
1144
1145extern unsigned long xcall_capture;
1146
1147static atomic_t smp_capture_depth = ATOMIC_INIT(0);
1148static atomic_t smp_capture_registry = ATOMIC_INIT(0);
1149static unsigned long penguins_are_doing_time;
1150
1151void smp_capture(void)
1152{
1153 int result = atomic_add_ret(1, &smp_capture_depth);
1154
1155 if (result == 1) {
1156 int ncpus = num_online_cpus();
1157
1158#ifdef CAPTURE_DEBUG
1159 printk("CPU[%d]: Sending penguins to jail...",
1160 smp_processor_id());
1161#endif
1162 penguins_are_doing_time = 1;
1163 atomic_inc(&smp_capture_registry);
1164 smp_cross_call(&xcall_capture, 0, 0, 0);
1165 while (atomic_read(&smp_capture_registry) != ncpus)
1166 rmb();
1167#ifdef CAPTURE_DEBUG
1168 printk("done\n");
1169#endif
1170 }
1171}
1172
1173void smp_release(void)
1174{
1175 if (atomic_dec_and_test(&smp_capture_depth)) {
1176#ifdef CAPTURE_DEBUG
1177 printk("CPU[%d]: Giving pardon to "
1178 "imprisoned penguins\n",
1179 smp_processor_id());
1180#endif
1181 penguins_are_doing_time = 0;
1182 membar_safe("#StoreLoad");
1183 atomic_dec(&smp_capture_registry);
1184 }
1185}
1186
1187
1188
1189
1190extern void prom_world(int);
1191
1192void __irq_entry smp_penguin_jailcell(int irq, struct pt_regs *regs)
1193{
1194 clear_softint(1 << irq);
1195
1196 preempt_disable();
1197
1198 __asm__ __volatile__("flushw");
1199 prom_world(1);
1200 atomic_inc(&smp_capture_registry);
1201 membar_safe("#StoreLoad");
1202 while (penguins_are_doing_time)
1203 rmb();
1204 atomic_dec(&smp_capture_registry);
1205 prom_world(0);
1206
1207 preempt_enable();
1208}
1209
1210
1211int setup_profiling_timer(unsigned int multiplier)
1212{
1213 return -EINVAL;
1214}
1215
1216void __init smp_prepare_cpus(unsigned int max_cpus)
1217{
1218}
1219
1220void smp_prepare_boot_cpu(void)
1221{
1222}
1223
1224void __init smp_setup_processor_id(void)
1225{
1226 if (tlb_type == spitfire)
1227 xcall_deliver_impl = spitfire_xcall_deliver;
1228 else if (tlb_type == cheetah || tlb_type == cheetah_plus)
1229 xcall_deliver_impl = cheetah_xcall_deliver;
1230 else
1231 xcall_deliver_impl = hypervisor_xcall_deliver;
1232}
1233
1234void smp_fill_in_sib_core_maps(void)
1235{
1236 unsigned int i;
1237
1238 for_each_present_cpu(i) {
1239 unsigned int j;
1240
1241 cpumask_clear(&cpu_core_map[i]);
1242 if (cpu_data(i).core_id == 0) {
1243 cpumask_set_cpu(i, &cpu_core_map[i]);
1244 continue;
1245 }
1246
1247 for_each_present_cpu(j) {
1248 if (cpu_data(i).core_id ==
1249 cpu_data(j).core_id)
1250 cpumask_set_cpu(j, &cpu_core_map[i]);
1251 }
1252 }
1253
1254 for_each_present_cpu(i) {
1255 unsigned int j;
1256
1257 cpumask_clear(&per_cpu(cpu_sibling_map, i));
1258 if (cpu_data(i).proc_id == -1) {
1259 cpumask_set_cpu(i, &per_cpu(cpu_sibling_map, i));
1260 continue;
1261 }
1262
1263 for_each_present_cpu(j) {
1264 if (cpu_data(i).proc_id ==
1265 cpu_data(j).proc_id)
1266 cpumask_set_cpu(j, &per_cpu(cpu_sibling_map, i));
1267 }
1268 }
1269}
1270
1271int __cpu_up(unsigned int cpu, struct task_struct *tidle)
1272{
1273 int ret = smp_boot_one_cpu(cpu, tidle);
1274
1275 if (!ret) {
1276 cpumask_set_cpu(cpu, &smp_commenced_mask);
1277 while (!cpu_online(cpu))
1278 mb();
1279 if (!cpu_online(cpu)) {
1280 ret = -ENODEV;
1281 } else {
1282
1283
1284
1285 if (tlb_type != hypervisor)
1286 smp_synchronize_one_tick(cpu);
1287 }
1288 }
1289 return ret;
1290}
1291
1292#ifdef CONFIG_HOTPLUG_CPU
1293void cpu_play_dead(void)
1294{
1295 int cpu = smp_processor_id();
1296 unsigned long pstate;
1297
1298 idle_task_exit();
1299
1300 if (tlb_type == hypervisor) {
1301 struct trap_per_cpu *tb = &trap_block[cpu];
1302
1303 sun4v_cpu_qconf(HV_CPU_QUEUE_CPU_MONDO,
1304 tb->cpu_mondo_pa, 0);
1305 sun4v_cpu_qconf(HV_CPU_QUEUE_DEVICE_MONDO,
1306 tb->dev_mondo_pa, 0);
1307 sun4v_cpu_qconf(HV_CPU_QUEUE_RES_ERROR,
1308 tb->resum_mondo_pa, 0);
1309 sun4v_cpu_qconf(HV_CPU_QUEUE_NONRES_ERROR,
1310 tb->nonresum_mondo_pa, 0);
1311 }
1312
1313 cpumask_clear_cpu(cpu, &smp_commenced_mask);
1314 membar_safe("#Sync");
1315
1316 local_irq_disable();
1317
1318 __asm__ __volatile__(
1319 "rdpr %%pstate, %0\n\t"
1320 "wrpr %0, %1, %%pstate"
1321 : "=r" (pstate)
1322 : "i" (PSTATE_IE));
1323
1324 while (1)
1325 barrier();
1326}
1327
1328int __cpu_disable(void)
1329{
1330 int cpu = smp_processor_id();
1331 cpuinfo_sparc *c;
1332 int i;
1333
1334 for_each_cpu(i, &cpu_core_map[cpu])
1335 cpumask_clear_cpu(cpu, &cpu_core_map[i]);
1336 cpumask_clear(&cpu_core_map[cpu]);
1337
1338 for_each_cpu(i, &per_cpu(cpu_sibling_map, cpu))
1339 cpumask_clear_cpu(cpu, &per_cpu(cpu_sibling_map, i));
1340 cpumask_clear(&per_cpu(cpu_sibling_map, cpu));
1341
1342 c = &cpu_data(cpu);
1343
1344 c->core_id = 0;
1345 c->proc_id = -1;
1346
1347 smp_wmb();
1348
1349
1350 fixup_irqs();
1351
1352 local_irq_enable();
1353 mdelay(1);
1354 local_irq_disable();
1355
1356 set_cpu_online(cpu, false);
1357
1358 cpu_map_rebuild();
1359
1360 return 0;
1361}
1362
1363void __cpu_die(unsigned int cpu)
1364{
1365 int i;
1366
1367 for (i = 0; i < 100; i++) {
1368 smp_rmb();
1369 if (!cpumask_test_cpu(cpu, &smp_commenced_mask))
1370 break;
1371 msleep(100);
1372 }
1373 if (cpumask_test_cpu(cpu, &smp_commenced_mask)) {
1374 printk(KERN_ERR "CPU %u didn't die...\n", cpu);
1375 } else {
1376#if defined(CONFIG_SUN_LDOMS)
1377 unsigned long hv_err;
1378 int limit = 100;
1379
1380 do {
1381 hv_err = sun4v_cpu_stop(cpu);
1382 if (hv_err == HV_EOK) {
1383 set_cpu_present(cpu, false);
1384 break;
1385 }
1386 } while (--limit > 0);
1387 if (limit <= 0) {
1388 printk(KERN_ERR "sun4v_cpu_stop() fails err=%lu\n",
1389 hv_err);
1390 }
1391#endif
1392 }
1393}
1394#endif
1395
1396void __init smp_cpus_done(unsigned int max_cpus)
1397{
1398 pcr_arch_init();
1399}
1400
1401void smp_send_reschedule(int cpu)
1402{
1403 if (cpu == smp_processor_id()) {
1404 WARN_ON_ONCE(preemptible());
1405 set_softint(1 << PIL_SMP_RECEIVE_SIGNAL);
1406 } else {
1407 xcall_deliver((u64) &xcall_receive_signal,
1408 0, 0, cpumask_of(cpu));
1409 }
1410}
1411
1412void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs)
1413{
1414 clear_softint(1 << irq);
1415 scheduler_ipi();
1416}
1417
1418
1419
1420
1421void smp_send_stop(void)
1422{
1423}
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
1439 size_t align)
1440{
1441 const unsigned long goal = __pa(MAX_DMA_ADDRESS);
1442#ifdef CONFIG_NEED_MULTIPLE_NODES
1443 int node = cpu_to_node(cpu);
1444 void *ptr;
1445
1446 if (!node_online(node) || !NODE_DATA(node)) {
1447 ptr = __alloc_bootmem(size, align, goal);
1448 pr_info("cpu %d has no node %d or node-local memory\n",
1449 cpu, node);
1450 pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
1451 cpu, size, __pa(ptr));
1452 } else {
1453 ptr = __alloc_bootmem_node(NODE_DATA(node),
1454 size, align, goal);
1455 pr_debug("per cpu data for cpu%d %lu bytes on node%d at "
1456 "%016lx\n", cpu, size, node, __pa(ptr));
1457 }
1458 return ptr;
1459#else
1460 return __alloc_bootmem(size, align, goal);
1461#endif
1462}
1463
1464static void __init pcpu_free_bootmem(void *ptr, size_t size)
1465{
1466 free_bootmem(__pa(ptr), size);
1467}
1468
1469static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
1470{
1471 if (cpu_to_node(from) == cpu_to_node(to))
1472 return LOCAL_DISTANCE;
1473 else
1474 return REMOTE_DISTANCE;
1475}
1476
1477static void __init pcpu_populate_pte(unsigned long addr)
1478{
1479 pgd_t *pgd = pgd_offset_k(addr);
1480 pud_t *pud;
1481 pmd_t *pmd;
1482
1483 pud = pud_offset(pgd, addr);
1484 if (pud_none(*pud)) {
1485 pmd_t *new;
1486
1487 new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
1488 pud_populate(&init_mm, pud, new);
1489 }
1490
1491 pmd = pmd_offset(pud, addr);
1492 if (!pmd_present(*pmd)) {
1493 pte_t *new;
1494
1495 new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
1496 pmd_populate_kernel(&init_mm, pmd, new);
1497 }
1498}
1499
1500void __init setup_per_cpu_areas(void)
1501{
1502 unsigned long delta;
1503 unsigned int cpu;
1504 int rc = -EINVAL;
1505
1506 if (pcpu_chosen_fc != PCPU_FC_PAGE) {
1507 rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
1508 PERCPU_DYNAMIC_RESERVE, 4 << 20,
1509 pcpu_cpu_distance,
1510 pcpu_alloc_bootmem,
1511 pcpu_free_bootmem);
1512 if (rc)
1513 pr_warning("PERCPU: %s allocator failed (%d), "
1514 "falling back to page size\n",
1515 pcpu_fc_names[pcpu_chosen_fc], rc);
1516 }
1517 if (rc < 0)
1518 rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
1519 pcpu_alloc_bootmem,
1520 pcpu_free_bootmem,
1521 pcpu_populate_pte);
1522 if (rc < 0)
1523 panic("cannot initialize percpu area (err=%d)", rc);
1524
1525 delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
1526 for_each_possible_cpu(cpu)
1527 __per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu];
1528
1529
1530 __local_per_cpu_offset = __per_cpu_offset(smp_processor_id());
1531
1532 of_fill_in_cpu_data();
1533 if (tlb_type == hypervisor)
1534 mdesc_fill_in_cpu_data(cpu_all_mask);
1535}
1536