1
2
3
4
5
6
7
8#include <linux/linkage.h>
9#include <asm/blackfin.h>
10#include <asm/cache.h>
11#include <asm/asm-offsets.h>
12#include <asm/rwlock.h>
13#include <asm/cplb.h>
14
15.text
16
17.macro coreslot_loadaddr reg:req
18 \reg\().l = _corelock;
19 \reg\().h = _corelock;
20.endm
21
22.macro safe_testset addr:req, scratch:req
23
24 cli \scratch;
25 testset (\addr);
26 sti \scratch;
27#else
28 testset (\addr);
29#endif
30.endm
31
32
33
34
35
36
37
38
39
40ENTRY(_get_core_lock)
41 r1 = -L1_CACHE_BYTES;
42 r1 = r0 & r1;
43 cli r0;
44 coreslot_loadaddr p0;
45.Lretry_corelock:
46 safe_testset p0, r2;
47 if cc jump .Ldone_corelock;
48 SSYNC(r2);
49 jump .Lretry_corelock
50.Ldone_corelock:
51 p0 = r1;
52 CSYNC(r2);
53 flushinv[p0];
54 SSYNC(r2);
55 rts;
56ENDPROC(_get_core_lock)
57
58
59
60
61
62
63
64
65ENTRY(_get_core_lock_noflush)
66 cli r0;
67 coreslot_loadaddr p0;
68.Lretry_corelock_noflush:
69 safe_testset p0, r2;
70 if cc jump .Ldone_corelock_noflush;
71 SSYNC(r2);
72 jump .Lretry_corelock_noflush
73.Ldone_corelock_noflush:
74 rts;
75ENDPROC(_get_core_lock_noflush)
76
77
78
79
80
81
82
83
84ENTRY(_put_core_lock)
85
86 coreslot_loadaddr p0;
87 r1 = 0;
88 [p0] = r1;
89 SSYNC(r2);
90 sti r0;
91 rts;
92ENDPROC(_put_core_lock)
93
94#ifdef __ARCH_SYNC_CORE_DCACHE
95
96ENTRY(___raw_smp_mark_barrier_asm)
97 [--sp] = rets;
98 [--sp] = ( r7:5 );
99 [--sp] = r0;
100 [--sp] = p1;
101 [--sp] = p0;
102 call _get_core_lock_noflush;
103
104
105
106
107 GET_CPUID(p1, r7);
108 r6 = 1;
109 r6 <<= r7;
110
111
112
113
114 p1.l = _barrier_mask;
115 p1.h = _barrier_mask;
116 r7 = [p1];
117 r5 = r7 & r6;
118 r7 = ~r6;
119 cc = r5 == 0;
120 if cc jump 1f;
121 r7 = r7 | r6;
1221:
123 [p1] = r7;
124 SSYNC(r2);
125
126 call _put_core_lock;
127 p0 = [sp++];
128 p1 = [sp++];
129 r0 = [sp++];
130 ( r7:5 ) = [sp++];
131 rets = [sp++];
132 rts;
133ENDPROC(___raw_smp_mark_barrier_asm)
134
135ENTRY(___raw_smp_check_barrier_asm)
136 [--sp] = rets;
137 [--sp] = ( r7:5 );
138 [--sp] = r0;
139 [--sp] = p1;
140 [--sp] = p0;
141 call _get_core_lock_noflush;
142
143
144
145
146 GET_CPUID(p1, r7);
147 r6 = 1;
148 r6 <<= r7;
149
150
151
152
153 p1.l = _barrier_mask;
154 p1.h = _barrier_mask;
155 r7 = [p1];
156 r5 = r7 & r6;
157 cc = r5 == 0;
158 if cc jump 1f;
159 r6 = ~r6;
160 r7 = r7 & r6;
161 [p1] = r7;
162 SSYNC(r2);
163
164 call _put_core_lock;
165
166
167
168
169 sp += -12;
170 call _resync_core_dcache
171 sp += 12;
172 jump 2f;
1731:
174 call _put_core_lock;
1752:
176 p0 = [sp++];
177 p1 = [sp++];
178 r0 = [sp++];
179 ( r7:5 ) = [sp++];
180 rets = [sp++];
181 rts;
182ENDPROC(___raw_smp_check_barrier_asm)
183
184
185
186
187
188
189
190_start_lock_coherent:
191
192 [--sp] = rets;
193 [--sp] = ( r7:6 );
194 r7 = r0;
195 p1 = r1;
196
197
198
199
200
201 GET_CPUID(p0, r2);
202 r1 = 1;
203 r1 <<= r2;
204 r2 = ~r1;
205
206 r1 = [p1];
207 r1 >>= 28;
208 r6 = r1 & r2;
209 r1 = [p1];
210 r1 <<= 4;
211 r1 >>= 4;
212 [p1] = r1;
213
214
215
216
217
218 coreslot_loadaddr p0;
219 r1 = 0;
220 [p0] = r1;
221
222
223
224
225
226
227 cc = r6 == 0;
228 if cc jump .Lcache_synced
229
230
231
232
233 sp += -12;
234 call _resync_core_dcache
235 sp += 12;
236
237.Lcache_synced:
238 SSYNC(r2);
239 sti r7;
240 ( r7:6 ) = [sp++];
241 rets = [sp++];
242 rts
243
244
245
246
247
248
249
250_end_lock_coherent:
251
252 p1 = r1;
253 GET_CPUID(p0, r2);
254 r2 += 28;
255 r1 = 1;
256 r1 <<= r2;
257 r2 = [p1];
258 r2 = r1 | r2;
259 [p1] = r2;
260 r1 = p1;
261 jump _put_core_lock;
262
263#endif
264
265
266
267
268
269
270ENTRY(___raw_spin_is_locked_asm)
271 p1 = r0;
272 [--sp] = rets;
273 call _get_core_lock;
274 r3 = [p1];
275 cc = bittst( r3, 0 );
276 r3 = cc;
277 r1 = p1;
278 call _put_core_lock;
279 rets = [sp++];
280 r0 = r3;
281 rts;
282ENDPROC(___raw_spin_is_locked_asm)
283
284
285
286
287
288
289ENTRY(___raw_spin_lock_asm)
290 p1 = r0;
291 [--sp] = rets;
292.Lretry_spinlock:
293 call _get_core_lock;
294 r1 = p1;
295 r2 = [p1];
296 cc = bittst( r2, 0 );
297 if cc jump .Lbusy_spinlock
298#ifdef __ARCH_SYNC_CORE_DCACHE
299 r3 = p1;
300 bitset ( r2, 0 );
301 [p1] = r2;
302 call _start_lock_coherent
303#else
304 r2 = 1;
305 [p1] = r2;
306 call _put_core_lock;
307#endif
308 rets = [sp++];
309 rts;
310
311.Lbusy_spinlock:
312
313
314 call _put_core_lock;
315 SSYNC(r2);
316 r0 = p1;
317 jump .Lretry_spinlock
318ENDPROC(___raw_spin_lock_asm)
319
320
321
322
323
324
325ENTRY(___raw_spin_trylock_asm)
326 p1 = r0;
327 [--sp] = rets;
328 call _get_core_lock;
329 r1 = p1;
330 r3 = [p1];
331 cc = bittst( r3, 0 );
332 if cc jump .Lfailed_trylock
333#ifdef __ARCH_SYNC_CORE_DCACHE
334 bitset ( r3, 0 );
335 [p1] = r3;
336 call _start_lock_coherent
337#else
338 r2 = 1;
339 [p1] = r2;
340 call _put_core_lock;
341#endif
342 r0 = 1;
343 rets = [sp++];
344 rts;
345.Lfailed_trylock:
346 call _put_core_lock;
347 r0 = 0;
348 rets = [sp++];
349 rts;
350ENDPROC(___raw_spin_trylock_asm)
351
352
353
354
355
356
357ENTRY(___raw_spin_unlock_asm)
358 p1 = r0;
359 [--sp] = rets;
360 call _get_core_lock;
361 r2 = [p1];
362 bitclr ( r2, 0 );
363 [p1] = r2;
364 r1 = p1;
365#ifdef __ARCH_SYNC_CORE_DCACHE
366 call _end_lock_coherent
367#else
368 call _put_core_lock;
369#endif
370 rets = [sp++];
371 rts;
372ENDPROC(___raw_spin_unlock_asm)
373
374
375
376
377
378
379ENTRY(___raw_read_lock_asm)
380 p1 = r0;
381 [--sp] = rets;
382 call _get_core_lock;
383.Lrdlock_try:
384 r1 = [p1];
385 r1 += -1;
386 [p1] = r1;
387 cc = r1 < 0;
388 if cc jump .Lrdlock_failed
389 r1 = p1;
390#ifdef __ARCH_SYNC_CORE_DCACHE
391 call _start_lock_coherent
392#else
393 call _put_core_lock;
394#endif
395 rets = [sp++];
396 rts;
397
398.Lrdlock_failed:
399 r1 += 1;
400 [p1] = r1;
401.Lrdlock_wait:
402 r1 = p1;
403 call _put_core_lock;
404 SSYNC(r2);
405 r0 = p1;
406 call _get_core_lock;
407 r1 = [p1];
408 cc = r1 < 2;
409 if cc jump .Lrdlock_wait;
410 jump .Lrdlock_try
411ENDPROC(___raw_read_lock_asm)
412
413
414
415
416
417
418ENTRY(___raw_read_trylock_asm)
419 p1 = r0;
420 [--sp] = rets;
421 call _get_core_lock;
422 r1 = [p1];
423 cc = r1 <= 0;
424 if cc jump .Lfailed_tryrdlock;
425 r1 += -1;
426 [p1] = r1;
427 r1 = p1;
428#ifdef __ARCH_SYNC_CORE_DCACHE
429 call _start_lock_coherent
430#else
431 call _put_core_lock;
432#endif
433 rets = [sp++];
434 r0 = 1;
435 rts;
436.Lfailed_tryrdlock:
437 r1 = p1;
438 call _put_core_lock;
439 rets = [sp++];
440 r0 = 0;
441 rts;
442ENDPROC(___raw_read_trylock_asm)
443
444
445
446
447
448
449
450
451
452
453ENTRY(___raw_read_unlock_asm)
454 p1 = r0;
455 [--sp] = rets;
456 call _get_core_lock;
457 r1 = [p1];
458 r1 += 1;
459 [p1] = r1;
460 r1 = p1;
461 call _put_core_lock;
462 rets = [sp++];
463 rts;
464ENDPROC(___raw_read_unlock_asm)
465
466
467
468
469
470
471ENTRY(___raw_write_lock_asm)
472 p1 = r0;
473 r3.l = lo(RW_LOCK_BIAS);
474 r3.h = hi(RW_LOCK_BIAS);
475 [--sp] = rets;
476 call _get_core_lock;
477.Lwrlock_try:
478 r1 = [p1];
479 r1 = r1 - r3;
480#ifdef __ARCH_SYNC_CORE_DCACHE
481 r2 = r1;
482 r2 <<= 4;
483 r2 >>= 4;
484 cc = r2 == 0;
485#else
486 cc = r1 == 0;
487#endif
488 if !cc jump .Lwrlock_wait
489 [p1] = r1;
490 r1 = p1;
491#ifdef __ARCH_SYNC_CORE_DCACHE
492 call _start_lock_coherent
493#else
494 call _put_core_lock;
495#endif
496 rets = [sp++];
497 rts;
498
499.Lwrlock_wait:
500 r1 = p1;
501 call _put_core_lock;
502 SSYNC(r2);
503 r0 = p1;
504 call _get_core_lock;
505 r1 = [p1];
506#ifdef __ARCH_SYNC_CORE_DCACHE
507 r1 <<= 4;
508 r1 >>= 4;
509#endif
510 cc = r1 == r3;
511 if !cc jump .Lwrlock_wait;
512 jump .Lwrlock_try
513ENDPROC(___raw_write_lock_asm)
514
515
516
517
518
519
520ENTRY(___raw_write_trylock_asm)
521 p1 = r0;
522 [--sp] = rets;
523 call _get_core_lock;
524 r1 = [p1];
525 r2.l = lo(RW_LOCK_BIAS);
526 r2.h = hi(RW_LOCK_BIAS);
527 cc = r1 == r2;
528 if !cc jump .Lfailed_trywrlock;
529#ifdef __ARCH_SYNC_CORE_DCACHE
530 r1 >>= 28;
531 r1 <<= 28;
532#else
533 r1 = 0;
534#endif
535 [p1] = r1;
536 r1 = p1;
537#ifdef __ARCH_SYNC_CORE_DCACHE
538 call _start_lock_coherent
539#else
540 call _put_core_lock;
541#endif
542 rets = [sp++];
543 r0 = 1;
544 rts;
545
546.Lfailed_trywrlock:
547 r1 = p1;
548 call _put_core_lock;
549 rets = [sp++];
550 r0 = 0;
551 rts;
552ENDPROC(___raw_write_trylock_asm)
553
554
555
556
557
558
559ENTRY(___raw_write_unlock_asm)
560 p1 = r0;
561 r3.l = lo(RW_LOCK_BIAS);
562 r3.h = hi(RW_LOCK_BIAS);
563 [--sp] = rets;
564 call _get_core_lock;
565 r1 = [p1];
566 r1 = r1 + r3;
567 [p1] = r1;
568 r1 = p1;
569#ifdef __ARCH_SYNC_CORE_DCACHE
570 call _end_lock_coherent
571#else
572 call _put_core_lock;
573#endif
574 rets = [sp++];
575 rts;
576ENDPROC(___raw_write_unlock_asm)
577
578
579
580
581
582
583
584
585ENTRY(___raw_atomic_update_asm)
586 p1 = r0;
587 r3 = r1;
588 [--sp] = rets;
589 call _get_core_lock;
590 r2 = [p1];
591 r3 = r3 + r2;
592 [p1] = r3;
593 r1 = p1;
594 call _put_core_lock;
595 r0 = r3;
596 rets = [sp++];
597 rts;
598ENDPROC(___raw_atomic_update_asm)
599
600
601
602
603
604
605
606
607
608ENTRY(___raw_atomic_clear_asm)
609 p1 = r0;
610 r3 = ~r1;
611 [--sp] = rets;
612 call _get_core_lock;
613 r2 = [p1];
614 r3 = r2 & r3;
615 [p1] = r3;
616 r3 = r2;
617 r1 = p1;
618 call _put_core_lock;
619 r0 = r3;
620 rets = [sp++];
621 rts;
622ENDPROC(___raw_atomic_clear_asm)
623
624
625
626
627
628
629
630
631
632ENTRY(___raw_atomic_set_asm)
633 p1 = r0;
634 r3 = r1;
635 [--sp] = rets;
636 call _get_core_lock;
637 r2 = [p1];
638 r3 = r2 | r3;
639 [p1] = r3;
640 r3 = r2;
641 r1 = p1;
642 call _put_core_lock;
643 r0 = r3;
644 rets = [sp++];
645 rts;
646ENDPROC(___raw_atomic_set_asm)
647
648
649
650
651
652
653
654
655
656ENTRY(___raw_atomic_xor_asm)
657 p1 = r0;
658 r3 = r1;
659 [--sp] = rets;
660 call _get_core_lock;
661 r2 = [p1];
662 r3 = r2 ^ r3;
663 [p1] = r3;
664 r3 = r2;
665 r1 = p1;
666 call _put_core_lock;
667 r0 = r3;
668 rets = [sp++];
669 rts;
670ENDPROC(___raw_atomic_xor_asm)
671
672
673
674
675
676
677
678
679
680
681
682ENTRY(___raw_atomic_test_asm)
683 p1 = r0;
684 r3 = r1;
685 r1 = -L1_CACHE_BYTES;
686 r1 = r0 & r1;
687 p0 = r1;
688 flushinv[p0];
689 SSYNC(r2);
690 r0 = [p1];
691 r0 = r0 & r3;
692 rts;
693ENDPROC(___raw_atomic_test_asm)
694
695
696
697
698
699
700
701
702#define __do_xchg(src, dst) \
703 p1 = r0; \
704 r3 = r1; \
705 [--sp] = rets; \
706 call _get_core_lock; \
707 r2 = src; \
708 dst = r3; \
709 r3 = r2; \
710 r1 = p1; \
711 call _put_core_lock; \
712 r0 = r3; \
713 rets = [sp++]; \
714 rts;
715
716ENTRY(___raw_xchg_1_asm)
717 __do_xchg(b[p1] (z), b[p1])
718ENDPROC(___raw_xchg_1_asm)
719
720ENTRY(___raw_xchg_2_asm)
721 __do_xchg(w[p1] (z), w[p1])
722ENDPROC(___raw_xchg_2_asm)
723
724ENTRY(___raw_xchg_4_asm)
725 __do_xchg([p1], [p1])
726ENDPROC(___raw_xchg_4_asm)
727
728
729
730
731
732
733
734
735
736
737
738#define __do_cmpxchg(src, dst) \
739 [--sp] = rets; \
740 [--sp] = r4; \
741 p1 = r0; \
742 r3 = r1; \
743 r4 = r2; \
744 call _get_core_lock; \
745 r2 = src; \
746 cc = r2 == r4; \
747 if !cc jump 1f; \
748 dst = r3; \
749 1: r3 = r2; \
750 r1 = p1; \
751 call _put_core_lock; \
752 r0 = r3; \
753 r4 = [sp++]; \
754 rets = [sp++]; \
755 rts;
756
757ENTRY(___raw_cmpxchg_1_asm)
758 __do_cmpxchg(b[p1] (z), b[p1])
759ENDPROC(___raw_cmpxchg_1_asm)
760
761ENTRY(___raw_cmpxchg_2_asm)
762 __do_cmpxchg(w[p1] (z), w[p1])
763ENDPROC(___raw_cmpxchg_2_asm)
764
765ENTRY(___raw_cmpxchg_4_asm)
766 __do_cmpxchg([p1], [p1])
767ENDPROC(___raw_cmpxchg_4_asm)
768
769
770
771
772
773
774
775
776ENTRY(___raw_bit_set_asm)
777 r2 = r1;
778 r1 = 1;
779 r1 <<= r2;
780 jump ___raw_atomic_set_asm
781ENDPROC(___raw_bit_set_asm)
782
783
784
785
786
787
788
789
790ENTRY(___raw_bit_clear_asm)
791 r2 = r1;
792 r1 = 1;
793 r1 <<= r2;
794 jump ___raw_atomic_clear_asm
795ENDPROC(___raw_bit_clear_asm)
796
797
798
799
800
801
802
803
804ENTRY(___raw_bit_toggle_asm)
805 r2 = r1;
806 r1 = 1;
807 r1 <<= r2;
808 jump ___raw_atomic_xor_asm
809ENDPROC(___raw_bit_toggle_asm)
810
811
812
813
814
815
816
817
818ENTRY(___raw_bit_test_set_asm)
819 [--sp] = rets;
820 [--sp] = r1;
821 call ___raw_bit_set_asm
822 r1 = [sp++];
823 r2 = 1;
824 r2 <<= r1;
825 r0 = r0 & r2;
826 cc = r0 == 0;
827 if cc jump 1f
828 r0 = 1;
8291:
830 rets = [sp++];
831 rts;
832ENDPROC(___raw_bit_test_set_asm)
833
834
835
836
837
838
839
840
841ENTRY(___raw_bit_test_clear_asm)
842 [--sp] = rets;
843 [--sp] = r1;
844 call ___raw_bit_clear_asm
845 r1 = [sp++];
846 r2 = 1;
847 r2 <<= r1;
848 r0 = r0 & r2;
849 cc = r0 == 0;
850 if cc jump 1f
851 r0 = 1;
8521:
853 rets = [sp++];
854 rts;
855ENDPROC(___raw_bit_test_clear_asm)
856
857
858
859
860
861
862
863
864
865ENTRY(___raw_bit_test_toggle_asm)
866 [--sp] = rets;
867 [--sp] = r1;
868 call ___raw_bit_toggle_asm
869 r1 = [sp++];
870 r2 = 1;
871 r2 <<= r1;
872 r0 = r0 & r2;
873 cc = r0 == 0;
874 if cc jump 1f
875 r0 = 1;
8761:
877 rets = [sp++];
878 rts;
879ENDPROC(___raw_bit_test_toggle_asm)
880
881
882
883
884
885
886
887
888
889
890
891ENTRY(___raw_bit_test_asm)
892 r2 = r1;
893 r1 = 1;
894 r1 <<= r2;
895 jump ___raw_atomic_test_asm
896ENDPROC(___raw_bit_test_asm)
897
898
899
900
901
902
903
904
905ENTRY(___raw_uncached_fetch_asm)
906 p1 = r0;
907 r1 = -L1_CACHE_BYTES;
908 r1 = r0 & r1;
909 p0 = r1;
910 flushinv[p0];
911 SSYNC(r2);
912 r0 = [p1];
913 rts;
914ENDPROC(___raw_uncached_fetch_asm)
915