1
2
3
4
5
6#include <linux/export.h>
7#include <linux/kernel.h>
8#include <linux/sched.h>
9#include <linux/mm.h>
10#include <linux/pagemap.h>
11#include <linux/threads.h>
12#include <linux/smp.h>
13#include <linux/interrupt.h>
14#include <linux/kernel_stat.h>
15#include <linux/delay.h>
16#include <linux/init.h>
17#include <linux/spinlock.h>
18#include <linux/fs.h>
19#include <linux/seq_file.h>
20#include <linux/cache.h>
21#include <linux/jiffies.h>
22#include <linux/profile.h>
23#include <linux/bootmem.h>
24#include <linux/vmalloc.h>
25#include <linux/ftrace.h>
26#include <linux/cpu.h>
27#include <linux/slab.h>
28#include <linux/kgdb.h>
29
30#include <asm/head.h>
31#include <asm/ptrace.h>
32#include <linux/atomic.h>
33#include <asm/tlbflush.h>
34#include <asm/mmu_context.h>
35#include <asm/cpudata.h>
36#include <asm/hvtramp.h>
37#include <asm/io.h>
38#include <asm/timer.h>
39#include <asm/setup.h>
40
41#include <asm/irq.h>
42#include <asm/irq_regs.h>
43#include <asm/page.h>
44#include <asm/pgtable.h>
45#include <asm/oplib.h>
46#include <asm/uaccess.h>
47#include <asm/starfire.h>
48#include <asm/tlb.h>
49#include <asm/sections.h>
50#include <asm/prom.h>
51#include <asm/mdesc.h>
52#include <asm/ldc.h>
53#include <asm/hypervisor.h>
54#include <asm/pcr.h>
55
56#include "cpumap.h"
57#include "kernel.h"
58
59DEFINE_PER_CPU(cpumask_t, cpu_sibling_map) = CPU_MASK_NONE;
60cpumask_t cpu_core_map[NR_CPUS] __read_mostly =
61 { [0 ... NR_CPUS-1] = CPU_MASK_NONE };
62
63cpumask_t cpu_core_sib_map[NR_CPUS] __read_mostly = {
64 [0 ... NR_CPUS-1] = CPU_MASK_NONE };
65
66cpumask_t cpu_core_sib_cache_map[NR_CPUS] __read_mostly = {
67 [0 ... NR_CPUS - 1] = CPU_MASK_NONE };
68
69EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
70EXPORT_SYMBOL(cpu_core_map);
71EXPORT_SYMBOL(cpu_core_sib_map);
72EXPORT_SYMBOL(cpu_core_sib_cache_map);
73
74static cpumask_t smp_commenced_mask;
75
76void smp_info(struct seq_file *m)
77{
78 int i;
79
80 seq_printf(m, "State:\n");
81 for_each_online_cpu(i)
82 seq_printf(m, "CPU%d:\t\tonline\n", i);
83}
84
85void smp_bogo(struct seq_file *m)
86{
87 int i;
88
89 for_each_online_cpu(i)
90 seq_printf(m,
91 "Cpu%dClkTck\t: %016lx\n",
92 i, cpu_data(i).clock_tick);
93}
94
95extern void setup_sparc64_timer(void);
96
97static volatile unsigned long callin_flag = 0;
98
99void smp_callin(void)
100{
101 int cpuid = hard_smp_processor_id();
102
103 __local_per_cpu_offset = __per_cpu_offset(cpuid);
104
105 if (tlb_type == hypervisor)
106 sun4v_ktsb_register();
107
108 __flush_tlb_all();
109
110 setup_sparc64_timer();
111
112 if (cheetah_pcache_forced_on)
113 cheetah_enable_pcache();
114
115 callin_flag = 1;
116 __asm__ __volatile__("membar #Sync\n\t"
117 "flush %%g6" : : : "memory");
118
119
120
121
122 current_thread_info()->new_child = 0;
123
124
125 atomic_inc(&init_mm.mm_count);
126 current->active_mm = &init_mm;
127
128
129 notify_cpu_starting(cpuid);
130
131 while (!cpumask_test_cpu(cpuid, &smp_commenced_mask))
132 rmb();
133
134 set_cpu_online(cpuid, true);
135
136
137 preempt_disable();
138
139 local_irq_enable();
140
141 cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
142}
143
144void cpu_panic(void)
145{
146 printk("CPU[%d]: Returns from cpu_idle!\n", smp_processor_id());
147 panic("SMP bolixed\n");
148}
149
150
151
152
153
154
155
156
157#define MASTER 0
158#define SLAVE (SMP_CACHE_BYTES/sizeof(unsigned long))
159
160#define NUM_ROUNDS 64
161#define NUM_ITERS 5
162
163static DEFINE_RAW_SPINLOCK(itc_sync_lock);
164static unsigned long go[SLAVE + 1];
165
166#define DEBUG_TICK_SYNC 0
167
168static inline long get_delta (long *rt, long *master)
169{
170 unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0;
171 unsigned long tcenter, t0, t1, tm;
172 unsigned long i;
173
174 for (i = 0; i < NUM_ITERS; i++) {
175 t0 = tick_ops->get_tick();
176 go[MASTER] = 1;
177 membar_safe("#StoreLoad");
178 while (!(tm = go[SLAVE]))
179 rmb();
180 go[SLAVE] = 0;
181 wmb();
182 t1 = tick_ops->get_tick();
183
184 if (t1 - t0 < best_t1 - best_t0)
185 best_t0 = t0, best_t1 = t1, best_tm = tm;
186 }
187
188 *rt = best_t1 - best_t0;
189 *master = best_tm - best_t0;
190
191
192 tcenter = (best_t0/2 + best_t1/2);
193 if (best_t0 % 2 + best_t1 % 2 == 2)
194 tcenter++;
195 return tcenter - best_tm;
196}
197
198void smp_synchronize_tick_client(void)
199{
200 long i, delta, adj, adjust_latency = 0, done = 0;
201 unsigned long flags, rt, master_time_stamp;
202#if DEBUG_TICK_SYNC
203 struct {
204 long rt;
205 long master;
206 long diff;
207 long lat;
208 } t[NUM_ROUNDS];
209#endif
210
211 go[MASTER] = 1;
212
213 while (go[MASTER])
214 rmb();
215
216 local_irq_save(flags);
217 {
218 for (i = 0; i < NUM_ROUNDS; i++) {
219 delta = get_delta(&rt, &master_time_stamp);
220 if (delta == 0)
221 done = 1;
222
223 if (!done) {
224 if (i > 0) {
225 adjust_latency += -delta;
226 adj = -delta + adjust_latency/4;
227 } else
228 adj = -delta;
229
230 tick_ops->add_tick(adj);
231 }
232#if DEBUG_TICK_SYNC
233 t[i].rt = rt;
234 t[i].master = master_time_stamp;
235 t[i].diff = delta;
236 t[i].lat = adjust_latency/4;
237#endif
238 }
239 }
240 local_irq_restore(flags);
241
242#if DEBUG_TICK_SYNC
243 for (i = 0; i < NUM_ROUNDS; i++)
244 printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n",
245 t[i].rt, t[i].master, t[i].diff, t[i].lat);
246#endif
247
248 printk(KERN_INFO "CPU %d: synchronized TICK with master CPU "
249 "(last diff %ld cycles, maxerr %lu cycles)\n",
250 smp_processor_id(), delta, rt);
251}
252
253static void smp_start_sync_tick_client(int cpu);
254
255static void smp_synchronize_one_tick(int cpu)
256{
257 unsigned long flags, i;
258
259 go[MASTER] = 0;
260
261 smp_start_sync_tick_client(cpu);
262
263
264 while (!go[MASTER])
265 rmb();
266
267
268 go[MASTER] = 0;
269 membar_safe("#StoreLoad");
270
271 raw_spin_lock_irqsave(&itc_sync_lock, flags);
272 {
273 for (i = 0; i < NUM_ROUNDS*NUM_ITERS; i++) {
274 while (!go[MASTER])
275 rmb();
276 go[MASTER] = 0;
277 wmb();
278 go[SLAVE] = tick_ops->get_tick();
279 membar_safe("#StoreLoad");
280 }
281 }
282 raw_spin_unlock_irqrestore(&itc_sync_lock, flags);
283}
284
285#if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU)
286static void ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg,
287 void **descrp)
288{
289 extern unsigned long sparc64_ttable_tl0;
290 extern unsigned long kern_locked_tte_data;
291 struct hvtramp_descr *hdesc;
292 unsigned long trampoline_ra;
293 struct trap_per_cpu *tb;
294 u64 tte_vaddr, tte_data;
295 unsigned long hv_err;
296 int i;
297
298 hdesc = kzalloc(sizeof(*hdesc) +
299 (sizeof(struct hvtramp_mapping) *
300 num_kernel_image_mappings - 1),
301 GFP_KERNEL);
302 if (!hdesc) {
303 printk(KERN_ERR "ldom_startcpu_cpuid: Cannot allocate "
304 "hvtramp_descr.\n");
305 return;
306 }
307 *descrp = hdesc;
308
309 hdesc->cpu = cpu;
310 hdesc->num_mappings = num_kernel_image_mappings;
311
312 tb = &trap_block[cpu];
313
314 hdesc->fault_info_va = (unsigned long) &tb->fault_info;
315 hdesc->fault_info_pa = kimage_addr_to_ra(&tb->fault_info);
316
317 hdesc->thread_reg = thread_reg;
318
319 tte_vaddr = (unsigned long) KERNBASE;
320 tte_data = kern_locked_tte_data;
321
322 for (i = 0; i < hdesc->num_mappings; i++) {
323 hdesc->maps[i].vaddr = tte_vaddr;
324 hdesc->maps[i].tte = tte_data;
325 tte_vaddr += 0x400000;
326 tte_data += 0x400000;
327 }
328
329 trampoline_ra = kimage_addr_to_ra(hv_cpu_startup);
330
331 hv_err = sun4v_cpu_start(cpu, trampoline_ra,
332 kimage_addr_to_ra(&sparc64_ttable_tl0),
333 __pa(hdesc));
334 if (hv_err)
335 printk(KERN_ERR "ldom_startcpu_cpuid: sun4v_cpu_start() "
336 "gives error %lu\n", hv_err);
337}
338#endif
339
340extern unsigned long sparc64_cpu_startup;
341
342
343
344
345
346static struct thread_info *cpu_new_thread = NULL;
347
348static int smp_boot_one_cpu(unsigned int cpu, struct task_struct *idle)
349{
350 unsigned long entry =
351 (unsigned long)(&sparc64_cpu_startup);
352 unsigned long cookie =
353 (unsigned long)(&cpu_new_thread);
354 void *descr = NULL;
355 int timeout, ret;
356
357 callin_flag = 0;
358 cpu_new_thread = task_thread_info(idle);
359
360 if (tlb_type == hypervisor) {
361#if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU)
362 if (ldom_domaining_enabled)
363 ldom_startcpu_cpuid(cpu,
364 (unsigned long) cpu_new_thread,
365 &descr);
366 else
367#endif
368 prom_startcpu_cpuid(cpu, entry, cookie);
369 } else {
370 struct device_node *dp = of_find_node_by_cpuid(cpu);
371
372 prom_startcpu(dp->phandle, entry, cookie);
373 }
374
375 for (timeout = 0; timeout < 50000; timeout++) {
376 if (callin_flag)
377 break;
378 udelay(100);
379 }
380
381 if (callin_flag) {
382 ret = 0;
383 } else {
384 printk("Processor %d is stuck.\n", cpu);
385 ret = -ENODEV;
386 }
387 cpu_new_thread = NULL;
388
389 kfree(descr);
390
391 return ret;
392}
393
394static void spitfire_xcall_helper(u64 data0, u64 data1, u64 data2, u64 pstate, unsigned long cpu)
395{
396 u64 result, target;
397 int stuck, tmp;
398
399 if (this_is_starfire) {
400
401 cpu = (((cpu & 0x3c) << 1) |
402 ((cpu & 0x40) >> 4) |
403 (cpu & 0x3));
404 }
405
406 target = (cpu << 14) | 0x70;
407again:
408
409
410
411
412
413
414
415 tmp = 0x40;
416 __asm__ __volatile__(
417 "wrpr %1, %2, %%pstate\n\t"
418 "stxa %4, [%0] %3\n\t"
419 "stxa %5, [%0+%8] %3\n\t"
420 "add %0, %8, %0\n\t"
421 "stxa %6, [%0+%8] %3\n\t"
422 "membar #Sync\n\t"
423 "stxa %%g0, [%7] %3\n\t"
424 "membar #Sync\n\t"
425 "mov 0x20, %%g1\n\t"
426 "ldxa [%%g1] 0x7f, %%g0\n\t"
427 "membar #Sync"
428 : "=r" (tmp)
429 : "r" (pstate), "i" (PSTATE_IE), "i" (ASI_INTR_W),
430 "r" (data0), "r" (data1), "r" (data2), "r" (target),
431 "r" (0x10), "0" (tmp)
432 : "g1");
433
434
435 stuck = 100000;
436 do {
437 __asm__ __volatile__("ldxa [%%g0] %1, %0"
438 : "=r" (result)
439 : "i" (ASI_INTR_DISPATCH_STAT));
440 if (result == 0) {
441 __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
442 : : "r" (pstate));
443 return;
444 }
445 stuck -= 1;
446 if (stuck == 0)
447 break;
448 } while (result & 0x1);
449 __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
450 : : "r" (pstate));
451 if (stuck == 0) {
452 printk("CPU[%d]: mondo stuckage result[%016llx]\n",
453 smp_processor_id(), result);
454 } else {
455 udelay(2);
456 goto again;
457 }
458}
459
460static void spitfire_xcall_deliver(struct trap_per_cpu *tb, int cnt)
461{
462 u64 *mondo, data0, data1, data2;
463 u16 *cpu_list;
464 u64 pstate;
465 int i;
466
467 __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
468 cpu_list = __va(tb->cpu_list_pa);
469 mondo = __va(tb->cpu_mondo_block_pa);
470 data0 = mondo[0];
471 data1 = mondo[1];
472 data2 = mondo[2];
473 for (i = 0; i < cnt; i++)
474 spitfire_xcall_helper(data0, data1, data2, pstate, cpu_list[i]);
475}
476
477
478
479
480
481static void cheetah_xcall_deliver(struct trap_per_cpu *tb, int cnt)
482{
483 int nack_busy_id, is_jbus, need_more;
484 u64 *mondo, pstate, ver, busy_mask;
485 u16 *cpu_list;
486
487 cpu_list = __va(tb->cpu_list_pa);
488 mondo = __va(tb->cpu_mondo_block_pa);
489
490
491
492
493
494 __asm__ ("rdpr %%ver, %0" : "=r" (ver));
495 is_jbus = ((ver >> 32) == __JALAPENO_ID ||
496 (ver >> 32) == __SERRANO_ID);
497
498 __asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
499
500retry:
501 need_more = 0;
502 __asm__ __volatile__("wrpr %0, %1, %%pstate\n\t"
503 : : "r" (pstate), "i" (PSTATE_IE));
504
505
506 __asm__ __volatile__("stxa %0, [%3] %6\n\t"
507 "stxa %1, [%4] %6\n\t"
508 "stxa %2, [%5] %6\n\t"
509 "membar #Sync\n\t"
510 :
511 : "r" (mondo[0]), "r" (mondo[1]), "r" (mondo[2]),
512 "r" (0x40), "r" (0x50), "r" (0x60),
513 "i" (ASI_INTR_W));
514
515 nack_busy_id = 0;
516 busy_mask = 0;
517 {
518 int i;
519
520 for (i = 0; i < cnt; i++) {
521 u64 target, nr;
522
523 nr = cpu_list[i];
524 if (nr == 0xffff)
525 continue;
526
527 target = (nr << 14) | 0x70;
528 if (is_jbus) {
529 busy_mask |= (0x1UL << (nr * 2));
530 } else {
531 target |= (nack_busy_id << 24);
532 busy_mask |= (0x1UL <<
533 (nack_busy_id * 2));
534 }
535 __asm__ __volatile__(
536 "stxa %%g0, [%0] %1\n\t"
537 "membar #Sync\n\t"
538 :
539 : "r" (target), "i" (ASI_INTR_W));
540 nack_busy_id++;
541 if (nack_busy_id == 32) {
542 need_more = 1;
543 break;
544 }
545 }
546 }
547
548
549 {
550 u64 dispatch_stat, nack_mask;
551 long stuck;
552
553 stuck = 100000 * nack_busy_id;
554 nack_mask = busy_mask << 1;
555 do {
556 __asm__ __volatile__("ldxa [%%g0] %1, %0"
557 : "=r" (dispatch_stat)
558 : "i" (ASI_INTR_DISPATCH_STAT));
559 if (!(dispatch_stat & (busy_mask | nack_mask))) {
560 __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
561 : : "r" (pstate));
562 if (unlikely(need_more)) {
563 int i, this_cnt = 0;
564 for (i = 0; i < cnt; i++) {
565 if (cpu_list[i] == 0xffff)
566 continue;
567 cpu_list[i] = 0xffff;
568 this_cnt++;
569 if (this_cnt == 32)
570 break;
571 }
572 goto retry;
573 }
574 return;
575 }
576 if (!--stuck)
577 break;
578 } while (dispatch_stat & busy_mask);
579
580 __asm__ __volatile__("wrpr %0, 0x0, %%pstate"
581 : : "r" (pstate));
582
583 if (dispatch_stat & busy_mask) {
584
585
586
587 printk("CPU[%d]: mondo stuckage result[%016llx]\n",
588 smp_processor_id(), dispatch_stat);
589 } else {
590 int i, this_busy_nack = 0;
591
592
593
594
595 udelay(2 * nack_busy_id);
596
597
598
599
600 for (i = 0; i < cnt; i++) {
601 u64 check_mask, nr;
602
603 nr = cpu_list[i];
604 if (nr == 0xffff)
605 continue;
606
607 if (is_jbus)
608 check_mask = (0x2UL << (2*nr));
609 else
610 check_mask = (0x2UL <<
611 this_busy_nack);
612 if ((dispatch_stat & check_mask) == 0)
613 cpu_list[i] = 0xffff;
614 this_busy_nack += 2;
615 if (this_busy_nack == 64)
616 break;
617 }
618
619 goto retry;
620 }
621 }
622}
623
624
625static void hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt)
626{
627 int retries, this_cpu, prev_sent, i, saw_cpu_error;
628 unsigned long status;
629 u16 *cpu_list;
630
631 this_cpu = smp_processor_id();
632
633 cpu_list = __va(tb->cpu_list_pa);
634
635 saw_cpu_error = 0;
636 retries = 0;
637 prev_sent = 0;
638 do {
639 int forward_progress, n_sent;
640
641 status = sun4v_cpu_mondo_send(cnt,
642 tb->cpu_list_pa,
643 tb->cpu_mondo_block_pa);
644
645
646 if (likely(status == HV_EOK))
647 break;
648
649
650
651
652
653
654 n_sent = 0;
655 for (i = 0; i < cnt; i++) {
656 if (likely(cpu_list[i] == 0xffff))
657 n_sent++;
658 }
659
660 forward_progress = 0;
661 if (n_sent > prev_sent)
662 forward_progress = 1;
663
664 prev_sent = n_sent;
665
666
667
668
669
670 if (unlikely(status == HV_ECPUERROR)) {
671 for (i = 0; i < cnt; i++) {
672 long err;
673 u16 cpu;
674
675 cpu = cpu_list[i];
676 if (cpu == 0xffff)
677 continue;
678
679 err = sun4v_cpu_state(cpu);
680 if (err == HV_CPU_STATE_ERROR) {
681 saw_cpu_error = (cpu + 1);
682 cpu_list[i] = 0xffff;
683 }
684 }
685 } else if (unlikely(status != HV_EWOULDBLOCK))
686 goto fatal_mondo_error;
687
688
689
690
691
692
693
694
695 if (unlikely(!forward_progress)) {
696 if (unlikely(++retries > 10000))
697 goto fatal_mondo_timeout;
698
699
700
701
702 udelay(2 * cnt);
703 }
704 } while (1);
705
706 if (unlikely(saw_cpu_error))
707 goto fatal_mondo_cpu_error;
708
709 return;
710
711fatal_mondo_cpu_error:
712 printk(KERN_CRIT "CPU[%d]: SUN4V mondo cpu error, some target cpus "
713 "(including %d) were in error state\n",
714 this_cpu, saw_cpu_error - 1);
715 return;
716
717fatal_mondo_timeout:
718 printk(KERN_CRIT "CPU[%d]: SUN4V mondo timeout, no forward "
719 " progress after %d retries.\n",
720 this_cpu, retries);
721 goto dump_cpu_list_and_out;
722
723fatal_mondo_error:
724 printk(KERN_CRIT "CPU[%d]: Unexpected SUN4V mondo error %lu\n",
725 this_cpu, status);
726 printk(KERN_CRIT "CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) "
727 "mondo_block_pa(%lx)\n",
728 this_cpu, cnt, tb->cpu_list_pa, tb->cpu_mondo_block_pa);
729
730dump_cpu_list_and_out:
731 printk(KERN_CRIT "CPU[%d]: CPU list [ ", this_cpu);
732 for (i = 0; i < cnt; i++)
733 printk("%u ", cpu_list[i]);
734 printk("]\n");
735}
736
737static void (*xcall_deliver_impl)(struct trap_per_cpu *, int);
738
739static void xcall_deliver(u64 data0, u64 data1, u64 data2, const cpumask_t *mask)
740{
741 struct trap_per_cpu *tb;
742 int this_cpu, i, cnt;
743 unsigned long flags;
744 u16 *cpu_list;
745 u64 *mondo;
746
747
748
749
750
751
752
753
754
755
756
757 local_irq_save(flags);
758
759 this_cpu = smp_processor_id();
760 tb = &trap_block[this_cpu];
761
762 mondo = __va(tb->cpu_mondo_block_pa);
763 mondo[0] = data0;
764 mondo[1] = data1;
765 mondo[2] = data2;
766 wmb();
767
768 cpu_list = __va(tb->cpu_list_pa);
769
770
771 cnt = 0;
772 for_each_cpu(i, mask) {
773 if (i == this_cpu || !cpu_online(i))
774 continue;
775 cpu_list[cnt++] = i;
776 }
777
778 if (cnt)
779 xcall_deliver_impl(tb, cnt);
780
781 local_irq_restore(flags);
782}
783
784
785
786
787
788static void smp_cross_call_masked(unsigned long *func, u32 ctx, u64 data1, u64 data2, const cpumask_t *mask)
789{
790 u64 data0 = (((u64)ctx)<<32 | (((u64)func) & 0xffffffff));
791
792 xcall_deliver(data0, data1, data2, mask);
793}
794
795
796static void smp_cross_call(unsigned long *func, u32 ctx, u64 data1, u64 data2)
797{
798 smp_cross_call_masked(func, ctx, data1, data2, cpu_online_mask);
799}
800
801extern unsigned long xcall_sync_tick;
802
803static void smp_start_sync_tick_client(int cpu)
804{
805 xcall_deliver((u64) &xcall_sync_tick, 0, 0,
806 cpumask_of(cpu));
807}
808
809extern unsigned long xcall_call_function;
810
811void arch_send_call_function_ipi_mask(const struct cpumask *mask)
812{
813 xcall_deliver((u64) &xcall_call_function, 0, 0, mask);
814}
815
816extern unsigned long xcall_call_function_single;
817
818void arch_send_call_function_single_ipi(int cpu)
819{
820 xcall_deliver((u64) &xcall_call_function_single, 0, 0,
821 cpumask_of(cpu));
822}
823
824void __irq_entry smp_call_function_client(int irq, struct pt_regs *regs)
825{
826 clear_softint(1 << irq);
827 irq_enter();
828 generic_smp_call_function_interrupt();
829 irq_exit();
830}
831
832void __irq_entry smp_call_function_single_client(int irq, struct pt_regs *regs)
833{
834 clear_softint(1 << irq);
835 irq_enter();
836 generic_smp_call_function_single_interrupt();
837 irq_exit();
838}
839
840static void tsb_sync(void *info)
841{
842 struct trap_per_cpu *tp = &trap_block[raw_smp_processor_id()];
843 struct mm_struct *mm = info;
844
845
846
847
848
849
850
851 if (tp->pgd_paddr == __pa(mm->pgd))
852 tsb_context_switch(mm);
853}
854
855void smp_tsb_sync(struct mm_struct *mm)
856{
857 smp_call_function_many(mm_cpumask(mm), tsb_sync, mm, 1);
858}
859
860extern unsigned long xcall_flush_tlb_mm;
861extern unsigned long xcall_flush_tlb_page;
862extern unsigned long xcall_flush_tlb_kernel_range;
863extern unsigned long xcall_fetch_glob_regs;
864extern unsigned long xcall_fetch_glob_pmu;
865extern unsigned long xcall_fetch_glob_pmu_n4;
866extern unsigned long xcall_receive_signal;
867extern unsigned long xcall_new_mmu_context_version;
868#ifdef CONFIG_KGDB
869extern unsigned long xcall_kgdb_capture;
870#endif
871
872#ifdef DCACHE_ALIASING_POSSIBLE
873extern unsigned long xcall_flush_dcache_page_cheetah;
874#endif
875extern unsigned long xcall_flush_dcache_page_spitfire;
876
877static inline void __local_flush_dcache_page(struct page *page)
878{
879#ifdef DCACHE_ALIASING_POSSIBLE
880 __flush_dcache_page(page_address(page),
881 ((tlb_type == spitfire) &&
882 page_mapping(page) != NULL));
883#else
884 if (page_mapping(page) != NULL &&
885 tlb_type == spitfire)
886 __flush_icache_page(__pa(page_address(page)));
887#endif
888}
889
890void smp_flush_dcache_page_impl(struct page *page, int cpu)
891{
892 int this_cpu;
893
894 if (tlb_type == hypervisor)
895 return;
896
897#ifdef CONFIG_DEBUG_DCFLUSH
898 atomic_inc(&dcpage_flushes);
899#endif
900
901 this_cpu = get_cpu();
902
903 if (cpu == this_cpu) {
904 __local_flush_dcache_page(page);
905 } else if (cpu_online(cpu)) {
906 void *pg_addr = page_address(page);
907 u64 data0 = 0;
908
909 if (tlb_type == spitfire) {
910 data0 = ((u64)&xcall_flush_dcache_page_spitfire);
911 if (page_mapping(page) != NULL)
912 data0 |= ((u64)1 << 32);
913 } else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
914#ifdef DCACHE_ALIASING_POSSIBLE
915 data0 = ((u64)&xcall_flush_dcache_page_cheetah);
916#endif
917 }
918 if (data0) {
919 xcall_deliver(data0, __pa(pg_addr),
920 (u64) pg_addr, cpumask_of(cpu));
921#ifdef CONFIG_DEBUG_DCFLUSH
922 atomic_inc(&dcpage_flushes_xcall);
923#endif
924 }
925 }
926
927 put_cpu();
928}
929
930void flush_dcache_page_all(struct mm_struct *mm, struct page *page)
931{
932 void *pg_addr;
933 u64 data0;
934
935 if (tlb_type == hypervisor)
936 return;
937
938 preempt_disable();
939
940#ifdef CONFIG_DEBUG_DCFLUSH
941 atomic_inc(&dcpage_flushes);
942#endif
943 data0 = 0;
944 pg_addr = page_address(page);
945 if (tlb_type == spitfire) {
946 data0 = ((u64)&xcall_flush_dcache_page_spitfire);
947 if (page_mapping(page) != NULL)
948 data0 |= ((u64)1 << 32);
949 } else if (tlb_type == cheetah || tlb_type == cheetah_plus) {
950#ifdef DCACHE_ALIASING_POSSIBLE
951 data0 = ((u64)&xcall_flush_dcache_page_cheetah);
952#endif
953 }
954 if (data0) {
955 xcall_deliver(data0, __pa(pg_addr),
956 (u64) pg_addr, cpu_online_mask);
957#ifdef CONFIG_DEBUG_DCFLUSH
958 atomic_inc(&dcpage_flushes_xcall);
959#endif
960 }
961 __local_flush_dcache_page(page);
962
963 preempt_enable();
964}
965
966void __irq_entry smp_new_mmu_context_version_client(int irq, struct pt_regs *regs)
967{
968 struct mm_struct *mm;
969 unsigned long flags;
970
971 clear_softint(1 << irq);
972
973
974
975
976 mm = current->active_mm;
977 if (unlikely(!mm || (mm == &init_mm)))
978 return;
979
980 spin_lock_irqsave(&mm->context.lock, flags);
981
982 if (unlikely(!CTX_VALID(mm->context)))
983 get_new_mmu_context(mm);
984
985 spin_unlock_irqrestore(&mm->context.lock, flags);
986
987 load_secondary_context(mm);
988 __flush_tlb_mm(CTX_HWBITS(mm->context),
989 SECONDARY_CONTEXT);
990}
991
992void smp_new_mmu_context_version(void)
993{
994 smp_cross_call(&xcall_new_mmu_context_version, 0, 0, 0);
995}
996
997#ifdef CONFIG_KGDB
998void kgdb_roundup_cpus(unsigned long flags)
999{
1000 smp_cross_call(&xcall_kgdb_capture, 0, 0, 0);
1001}
1002#endif
1003
1004void smp_fetch_global_regs(void)
1005{
1006 smp_cross_call(&xcall_fetch_glob_regs, 0, 0, 0);
1007}
1008
1009void smp_fetch_global_pmu(void)
1010{
1011 if (tlb_type == hypervisor &&
1012 sun4v_chip_type >= SUN4V_CHIP_NIAGARA4)
1013 smp_cross_call(&xcall_fetch_glob_pmu_n4, 0, 0, 0);
1014 else
1015 smp_cross_call(&xcall_fetch_glob_pmu, 0, 0, 0);
1016}
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061void smp_flush_tlb_mm(struct mm_struct *mm)
1062{
1063 u32 ctx = CTX_HWBITS(mm->context);
1064 int cpu = get_cpu();
1065
1066 if (atomic_read(&mm->mm_users) == 1) {
1067 cpumask_copy(mm_cpumask(mm), cpumask_of(cpu));
1068 goto local_flush_and_out;
1069 }
1070
1071 smp_cross_call_masked(&xcall_flush_tlb_mm,
1072 ctx, 0, 0,
1073 mm_cpumask(mm));
1074
1075local_flush_and_out:
1076 __flush_tlb_mm(ctx, SECONDARY_CONTEXT);
1077
1078 put_cpu();
1079}
1080
1081struct tlb_pending_info {
1082 unsigned long ctx;
1083 unsigned long nr;
1084 unsigned long *vaddrs;
1085};
1086
1087static void tlb_pending_func(void *info)
1088{
1089 struct tlb_pending_info *t = info;
1090
1091 __flush_tlb_pending(t->ctx, t->nr, t->vaddrs);
1092}
1093
1094void smp_flush_tlb_pending(struct mm_struct *mm, unsigned long nr, unsigned long *vaddrs)
1095{
1096 u32 ctx = CTX_HWBITS(mm->context);
1097 struct tlb_pending_info info;
1098 int cpu = get_cpu();
1099
1100 info.ctx = ctx;
1101 info.nr = nr;
1102 info.vaddrs = vaddrs;
1103
1104 if (mm == current->mm && atomic_read(&mm->mm_users) == 1)
1105 cpumask_copy(mm_cpumask(mm), cpumask_of(cpu));
1106 else
1107 smp_call_function_many(mm_cpumask(mm), tlb_pending_func,
1108 &info, 1);
1109
1110 __flush_tlb_pending(ctx, nr, vaddrs);
1111
1112 put_cpu();
1113}
1114
1115void smp_flush_tlb_page(struct mm_struct *mm, unsigned long vaddr)
1116{
1117 unsigned long context = CTX_HWBITS(mm->context);
1118 int cpu = get_cpu();
1119
1120 if (mm == current->mm && atomic_read(&mm->mm_users) == 1)
1121 cpumask_copy(mm_cpumask(mm), cpumask_of(cpu));
1122 else
1123 smp_cross_call_masked(&xcall_flush_tlb_page,
1124 context, vaddr, 0,
1125 mm_cpumask(mm));
1126 __flush_tlb_page(context, vaddr);
1127
1128 put_cpu();
1129}
1130
1131void smp_flush_tlb_kernel_range(unsigned long start, unsigned long end)
1132{
1133 start &= PAGE_MASK;
1134 end = PAGE_ALIGN(end);
1135 if (start != end) {
1136 smp_cross_call(&xcall_flush_tlb_kernel_range,
1137 0, start, end);
1138
1139 __flush_tlb_kernel_range(start, end);
1140 }
1141}
1142
1143
1144
1145extern unsigned long xcall_capture;
1146
1147static atomic_t smp_capture_depth = ATOMIC_INIT(0);
1148static atomic_t smp_capture_registry = ATOMIC_INIT(0);
1149static unsigned long penguins_are_doing_time;
1150
1151void smp_capture(void)
1152{
1153 int result = atomic_add_return(1, &smp_capture_depth);
1154
1155 if (result == 1) {
1156 int ncpus = num_online_cpus();
1157
1158#ifdef CAPTURE_DEBUG
1159 printk("CPU[%d]: Sending penguins to jail...",
1160 smp_processor_id());
1161#endif
1162 penguins_are_doing_time = 1;
1163 atomic_inc(&smp_capture_registry);
1164 smp_cross_call(&xcall_capture, 0, 0, 0);
1165 while (atomic_read(&smp_capture_registry) != ncpus)
1166 rmb();
1167#ifdef CAPTURE_DEBUG
1168 printk("done\n");
1169#endif
1170 }
1171}
1172
1173void smp_release(void)
1174{
1175 if (atomic_dec_and_test(&smp_capture_depth)) {
1176#ifdef CAPTURE_DEBUG
1177 printk("CPU[%d]: Giving pardon to "
1178 "imprisoned penguins\n",
1179 smp_processor_id());
1180#endif
1181 penguins_are_doing_time = 0;
1182 membar_safe("#StoreLoad");
1183 atomic_dec(&smp_capture_registry);
1184 }
1185}
1186
1187
1188
1189
1190extern void prom_world(int);
1191
1192void __irq_entry smp_penguin_jailcell(int irq, struct pt_regs *regs)
1193{
1194 clear_softint(1 << irq);
1195
1196 preempt_disable();
1197
1198 __asm__ __volatile__("flushw");
1199 prom_world(1);
1200 atomic_inc(&smp_capture_registry);
1201 membar_safe("#StoreLoad");
1202 while (penguins_are_doing_time)
1203 rmb();
1204 atomic_dec(&smp_capture_registry);
1205 prom_world(0);
1206
1207 preempt_enable();
1208}
1209
1210
1211int setup_profiling_timer(unsigned int multiplier)
1212{
1213 return -EINVAL;
1214}
1215
1216void __init smp_prepare_cpus(unsigned int max_cpus)
1217{
1218}
1219
1220void smp_prepare_boot_cpu(void)
1221{
1222}
1223
1224void __init smp_setup_processor_id(void)
1225{
1226 if (tlb_type == spitfire)
1227 xcall_deliver_impl = spitfire_xcall_deliver;
1228 else if (tlb_type == cheetah || tlb_type == cheetah_plus)
1229 xcall_deliver_impl = cheetah_xcall_deliver;
1230 else
1231 xcall_deliver_impl = hypervisor_xcall_deliver;
1232}
1233
1234void __init smp_fill_in_cpu_possible_map(void)
1235{
1236 int possible_cpus = num_possible_cpus();
1237 int i;
1238
1239 if (possible_cpus > nr_cpu_ids)
1240 possible_cpus = nr_cpu_ids;
1241
1242 for (i = 0; i < possible_cpus; i++)
1243 set_cpu_possible(i, true);
1244 for (; i < NR_CPUS; i++)
1245 set_cpu_possible(i, false);
1246}
1247
1248void smp_fill_in_sib_core_maps(void)
1249{
1250 unsigned int i;
1251
1252 for_each_present_cpu(i) {
1253 unsigned int j;
1254
1255 cpumask_clear(&cpu_core_map[i]);
1256 if (cpu_data(i).core_id == 0) {
1257 cpumask_set_cpu(i, &cpu_core_map[i]);
1258 continue;
1259 }
1260
1261 for_each_present_cpu(j) {
1262 if (cpu_data(i).core_id ==
1263 cpu_data(j).core_id)
1264 cpumask_set_cpu(j, &cpu_core_map[i]);
1265 }
1266 }
1267
1268 for_each_present_cpu(i) {
1269 unsigned int j;
1270
1271 for_each_present_cpu(j) {
1272 if (cpu_data(i).max_cache_id ==
1273 cpu_data(j).max_cache_id)
1274 cpumask_set_cpu(j, &cpu_core_sib_cache_map[i]);
1275
1276 if (cpu_data(i).sock_id == cpu_data(j).sock_id)
1277 cpumask_set_cpu(j, &cpu_core_sib_map[i]);
1278 }
1279 }
1280
1281 for_each_present_cpu(i) {
1282 unsigned int j;
1283
1284 cpumask_clear(&per_cpu(cpu_sibling_map, i));
1285 if (cpu_data(i).proc_id == -1) {
1286 cpumask_set_cpu(i, &per_cpu(cpu_sibling_map, i));
1287 continue;
1288 }
1289
1290 for_each_present_cpu(j) {
1291 if (cpu_data(i).proc_id ==
1292 cpu_data(j).proc_id)
1293 cpumask_set_cpu(j, &per_cpu(cpu_sibling_map, i));
1294 }
1295 }
1296}
1297
1298int __cpu_up(unsigned int cpu, struct task_struct *tidle)
1299{
1300 int ret = smp_boot_one_cpu(cpu, tidle);
1301
1302 if (!ret) {
1303 cpumask_set_cpu(cpu, &smp_commenced_mask);
1304 while (!cpu_online(cpu))
1305 mb();
1306 if (!cpu_online(cpu)) {
1307 ret = -ENODEV;
1308 } else {
1309
1310
1311
1312 if (tlb_type != hypervisor)
1313 smp_synchronize_one_tick(cpu);
1314 }
1315 }
1316 return ret;
1317}
1318
1319#ifdef CONFIG_HOTPLUG_CPU
1320void cpu_play_dead(void)
1321{
1322 int cpu = smp_processor_id();
1323 unsigned long pstate;
1324
1325 idle_task_exit();
1326
1327 if (tlb_type == hypervisor) {
1328 struct trap_per_cpu *tb = &trap_block[cpu];
1329
1330 sun4v_cpu_qconf(HV_CPU_QUEUE_CPU_MONDO,
1331 tb->cpu_mondo_pa, 0);
1332 sun4v_cpu_qconf(HV_CPU_QUEUE_DEVICE_MONDO,
1333 tb->dev_mondo_pa, 0);
1334 sun4v_cpu_qconf(HV_CPU_QUEUE_RES_ERROR,
1335 tb->resum_mondo_pa, 0);
1336 sun4v_cpu_qconf(HV_CPU_QUEUE_NONRES_ERROR,
1337 tb->nonresum_mondo_pa, 0);
1338 }
1339
1340 cpumask_clear_cpu(cpu, &smp_commenced_mask);
1341 membar_safe("#Sync");
1342
1343 local_irq_disable();
1344
1345 __asm__ __volatile__(
1346 "rdpr %%pstate, %0\n\t"
1347 "wrpr %0, %1, %%pstate"
1348 : "=r" (pstate)
1349 : "i" (PSTATE_IE));
1350
1351 while (1)
1352 barrier();
1353}
1354
1355int __cpu_disable(void)
1356{
1357 int cpu = smp_processor_id();
1358 cpuinfo_sparc *c;
1359 int i;
1360
1361 for_each_cpu(i, &cpu_core_map[cpu])
1362 cpumask_clear_cpu(cpu, &cpu_core_map[i]);
1363 cpumask_clear(&cpu_core_map[cpu]);
1364
1365 for_each_cpu(i, &per_cpu(cpu_sibling_map, cpu))
1366 cpumask_clear_cpu(cpu, &per_cpu(cpu_sibling_map, i));
1367 cpumask_clear(&per_cpu(cpu_sibling_map, cpu));
1368
1369 c = &cpu_data(cpu);
1370
1371 c->core_id = 0;
1372 c->proc_id = -1;
1373
1374 smp_wmb();
1375
1376
1377 fixup_irqs();
1378
1379 local_irq_enable();
1380 mdelay(1);
1381 local_irq_disable();
1382
1383 set_cpu_online(cpu, false);
1384
1385 cpu_map_rebuild();
1386
1387 return 0;
1388}
1389
1390void __cpu_die(unsigned int cpu)
1391{
1392 int i;
1393
1394 for (i = 0; i < 100; i++) {
1395 smp_rmb();
1396 if (!cpumask_test_cpu(cpu, &smp_commenced_mask))
1397 break;
1398 msleep(100);
1399 }
1400 if (cpumask_test_cpu(cpu, &smp_commenced_mask)) {
1401 printk(KERN_ERR "CPU %u didn't die...\n", cpu);
1402 } else {
1403#if defined(CONFIG_SUN_LDOMS)
1404 unsigned long hv_err;
1405 int limit = 100;
1406
1407 do {
1408 hv_err = sun4v_cpu_stop(cpu);
1409 if (hv_err == HV_EOK) {
1410 set_cpu_present(cpu, false);
1411 break;
1412 }
1413 } while (--limit > 0);
1414 if (limit <= 0) {
1415 printk(KERN_ERR "sun4v_cpu_stop() fails err=%lu\n",
1416 hv_err);
1417 }
1418#endif
1419 }
1420}
1421#endif
1422
1423void __init smp_cpus_done(unsigned int max_cpus)
1424{
1425}
1426
1427void smp_send_reschedule(int cpu)
1428{
1429 if (cpu == smp_processor_id()) {
1430 WARN_ON_ONCE(preemptible());
1431 set_softint(1 << PIL_SMP_RECEIVE_SIGNAL);
1432 } else {
1433 xcall_deliver((u64) &xcall_receive_signal,
1434 0, 0, cpumask_of(cpu));
1435 }
1436}
1437
1438void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs)
1439{
1440 clear_softint(1 << irq);
1441 scheduler_ipi();
1442}
1443
1444static void stop_this_cpu(void *dummy)
1445{
1446 prom_stopself();
1447}
1448
1449void smp_send_stop(void)
1450{
1451 int cpu;
1452
1453 if (tlb_type == hypervisor) {
1454 for_each_online_cpu(cpu) {
1455 if (cpu == smp_processor_id())
1456 continue;
1457#ifdef CONFIG_SUN_LDOMS
1458 if (ldom_domaining_enabled) {
1459 unsigned long hv_err;
1460 hv_err = sun4v_cpu_stop(cpu);
1461 if (hv_err)
1462 printk(KERN_ERR "sun4v_cpu_stop() "
1463 "failed err=%lu\n", hv_err);
1464 } else
1465#endif
1466 prom_stopcpu_cpuid(cpu);
1467 }
1468 } else
1469 smp_call_function(stop_this_cpu, NULL, 0);
1470}
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485static void * __init pcpu_alloc_bootmem(unsigned int cpu, size_t size,
1486 size_t align)
1487{
1488 const unsigned long goal = __pa(MAX_DMA_ADDRESS);
1489#ifdef CONFIG_NEED_MULTIPLE_NODES
1490 int node = cpu_to_node(cpu);
1491 void *ptr;
1492
1493 if (!node_online(node) || !NODE_DATA(node)) {
1494 ptr = __alloc_bootmem(size, align, goal);
1495 pr_info("cpu %d has no node %d or node-local memory\n",
1496 cpu, node);
1497 pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
1498 cpu, size, __pa(ptr));
1499 } else {
1500 ptr = __alloc_bootmem_node(NODE_DATA(node),
1501 size, align, goal);
1502 pr_debug("per cpu data for cpu%d %lu bytes on node%d at "
1503 "%016lx\n", cpu, size, node, __pa(ptr));
1504 }
1505 return ptr;
1506#else
1507 return __alloc_bootmem(size, align, goal);
1508#endif
1509}
1510
1511static void __init pcpu_free_bootmem(void *ptr, size_t size)
1512{
1513 free_bootmem(__pa(ptr), size);
1514}
1515
1516static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
1517{
1518 if (cpu_to_node(from) == cpu_to_node(to))
1519 return LOCAL_DISTANCE;
1520 else
1521 return REMOTE_DISTANCE;
1522}
1523
1524static void __init pcpu_populate_pte(unsigned long addr)
1525{
1526 pgd_t *pgd = pgd_offset_k(addr);
1527 pud_t *pud;
1528 pmd_t *pmd;
1529
1530 if (pgd_none(*pgd)) {
1531 pud_t *new;
1532
1533 new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
1534 pgd_populate(&init_mm, pgd, new);
1535 }
1536
1537 pud = pud_offset(pgd, addr);
1538 if (pud_none(*pud)) {
1539 pmd_t *new;
1540
1541 new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
1542 pud_populate(&init_mm, pud, new);
1543 }
1544
1545 pmd = pmd_offset(pud, addr);
1546 if (!pmd_present(*pmd)) {
1547 pte_t *new;
1548
1549 new = __alloc_bootmem(PAGE_SIZE, PAGE_SIZE, PAGE_SIZE);
1550 pmd_populate_kernel(&init_mm, pmd, new);
1551 }
1552}
1553
1554void __init setup_per_cpu_areas(void)
1555{
1556 unsigned long delta;
1557 unsigned int cpu;
1558 int rc = -EINVAL;
1559
1560 if (pcpu_chosen_fc != PCPU_FC_PAGE) {
1561 rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
1562 PERCPU_DYNAMIC_RESERVE, 4 << 20,
1563 pcpu_cpu_distance,
1564 pcpu_alloc_bootmem,
1565 pcpu_free_bootmem);
1566 if (rc)
1567 pr_warning("PERCPU: %s allocator failed (%d), "
1568 "falling back to page size\n",
1569 pcpu_fc_names[pcpu_chosen_fc], rc);
1570 }
1571 if (rc < 0)
1572 rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
1573 pcpu_alloc_bootmem,
1574 pcpu_free_bootmem,
1575 pcpu_populate_pte);
1576 if (rc < 0)
1577 panic("cannot initialize percpu area (err=%d)", rc);
1578
1579 delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
1580 for_each_possible_cpu(cpu)
1581 __per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu];
1582
1583
1584 __local_per_cpu_offset = __per_cpu_offset(smp_processor_id());
1585
1586 of_fill_in_cpu_data();
1587 if (tlb_type == hypervisor)
1588 mdesc_fill_in_cpu_data(cpu_all_mask);
1589}
1590