1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32#ifdef __MICROBLAZEEL__
33
34#endif
35
36#include <linux/linkage.h>
37 .text
38 .globl memcpy
39 .type memcpy, @function
40 .ent memcpy
41
42memcpy:
43fast_memcpy_ascending:
44
45 addi r3, r5, 0
46
47 addi r4, r0, 4
48 cmpu r4, r4, r7
49 blti r4, a_xfer_end
50
51
52 andi r4, r5, 3
53
54 beqi r4, a_dalign_done
55
56 rsubi r4, r4, 4
57 rsub r7, r4, r7
58
59a_xfer_first_loop:
60
61 beqi r4, a_dalign_done
62 lbui r11, r6, 0
63 sbi r11, r5, 0
64 addi r6, r6, 1
65 addi r5, r5, 1
66 brid a_xfer_first_loop
67 addi r4, r4, -1
68
69a_dalign_done:
70 addi r4, r0, 32
71 cmpu r4, r4, r7
72
73 blti r4, a_block_done
74
75a_block_xfer:
76 andi r4, r7, 0xffffffe0
77 rsub r7, r4, r7
78
79 andi r9, r6, 3
80
81 bnei r9, a_block_unaligned
82
83a_block_aligned:
84 lwi r9, r6, 0
85 lwi r10, r6, 4
86 lwi r11, r6, 8
87 lwi r12, r6, 12
88 swi r9, r5, 0
89 swi r10, r5, 4
90 swi r11, r5, 8
91 swi r12, r5, 12
92 lwi r9, r6, 16
93 lwi r10, r6, 20
94 lwi r11, r6, 24
95 lwi r12, r6, 28
96 swi r9, r5, 16
97 swi r10, r5, 20
98 swi r11, r5, 24
99 swi r12, r5, 28
100 addi r6, r6, 32
101 addi r4, r4, -32
102 bneid r4, a_block_aligned
103 addi r5, r5, 32
104 bri a_block_done
105
106a_block_unaligned:
107 andi r8, r6, 0xfffffffc
108 add r6, r6, r4
109 lwi r11, r8, 0
110
111 addi r9, r9, -1
112 beqi r9, a_block_u1
113 addi r9, r9, -1
114 beqi r9, a_block_u2
115
116a_block_u3:
117 bslli r11, r11, 24
118a_bu3_loop:
119 lwi r12, r8, 4
120 bsrli r9, r12, 8
121 or r9, r11, r9
122 swi r9, r5, 0
123 bslli r11, r12, 24
124 lwi r12, r8, 8
125 bsrli r9, r12, 8
126 or r9, r11, r9
127 swi r9, r5, 4
128 bslli r11, r12, 24
129 lwi r12, r8, 12
130 bsrli r9, r12, 8
131 or r9, r11, r9
132 swi r9, r5, 8
133 bslli r11, r12, 24
134 lwi r12, r8, 16
135 bsrli r9, r12, 8
136 or r9, r11, r9
137 swi r9, r5, 12
138 bslli r11, r12, 24
139 lwi r12, r8, 20
140 bsrli r9, r12, 8
141 or r9, r11, r9
142 swi r9, r5, 16
143 bslli r11, r12, 24
144 lwi r12, r8, 24
145 bsrli r9, r12, 8
146 or r9, r11, r9
147 swi r9, r5, 20
148 bslli r11, r12, 24
149 lwi r12, r8, 28
150 bsrli r9, r12, 8
151 or r9, r11, r9
152 swi r9, r5, 24
153 bslli r11, r12, 24
154 lwi r12, r8, 32
155 bsrli r9, r12, 8
156 or r9, r11, r9
157 swi r9, r5, 28
158 bslli r11, r12, 24
159 addi r8, r8, 32
160 addi r4, r4, -32
161 bneid r4, a_bu3_loop
162 addi r5, r5, 32
163 bri a_block_done
164
165a_block_u1:
166 bslli r11, r11, 8
167a_bu1_loop:
168 lwi r12, r8, 4
169 bsrli r9, r12, 24
170 or r9, r11, r9
171 swi r9, r5, 0
172 bslli r11, r12, 8
173 lwi r12, r8, 8
174 bsrli r9, r12, 24
175 or r9, r11, r9
176 swi r9, r5, 4
177 bslli r11, r12, 8
178 lwi r12, r8, 12
179 bsrli r9, r12, 24
180 or r9, r11, r9
181 swi r9, r5, 8
182 bslli r11, r12, 8
183 lwi r12, r8, 16
184 bsrli r9, r12, 24
185 or r9, r11, r9
186 swi r9, r5, 12
187 bslli r11, r12, 8
188 lwi r12, r8, 20
189 bsrli r9, r12, 24
190 or r9, r11, r9
191 swi r9, r5, 16
192 bslli r11, r12, 8
193 lwi r12, r8, 24
194 bsrli r9, r12, 24
195 or r9, r11, r9
196 swi r9, r5, 20
197 bslli r11, r12, 8
198 lwi r12, r8, 28
199 bsrli r9, r12, 24
200 or r9, r11, r9
201 swi r9, r5, 24
202 bslli r11, r12, 8
203 lwi r12, r8, 32
204 bsrli r9, r12, 24
205 or r9, r11, r9
206 swi r9, r5, 28
207 bslli r11, r12, 8
208 addi r8, r8, 32
209 addi r4, r4, -32
210 bneid r4, a_bu1_loop
211 addi r5, r5, 32
212 bri a_block_done
213
214a_block_u2:
215 bslli r11, r11, 16
216a_bu2_loop:
217 lwi r12, r8, 4
218 bsrli r9, r12, 16
219 or r9, r11, r9
220 swi r9, r5, 0
221 bslli r11, r12, 16
222 lwi r12, r8, 8
223 bsrli r9, r12, 16
224 or r9, r11, r9
225 swi r9, r5, 4
226 bslli r11, r12, 16
227 lwi r12, r8, 12
228 bsrli r9, r12, 16
229 or r9, r11, r9
230 swi r9, r5, 8
231 bslli r11, r12, 16
232 lwi r12, r8, 16
233 bsrli r9, r12, 16
234 or r9, r11, r9
235 swi r9, r5, 12
236 bslli r11, r12, 16
237 lwi r12, r8, 20
238 bsrli r9, r12, 16
239 or r9, r11, r9
240 swi r9, r5, 16
241 bslli r11, r12, 16
242 lwi r12, r8, 24
243 bsrli r9, r12, 16
244 or r9, r11, r9
245 swi r9, r5, 20
246 bslli r11, r12, 16
247 lwi r12, r8, 28
248 bsrli r9, r12, 16
249 or r9, r11, r9
250 swi r9, r5, 24
251 bslli r11, r12, 16
252 lwi r12, r8, 32
253 bsrli r9, r12, 16
254 or r9, r11, r9
255 swi r9, r5, 28
256 bslli r11, r12, 16
257 addi r8, r8, 32
258 addi r4, r4, -32
259 bneid r4, a_bu2_loop
260 addi r5, r5, 32
261
262a_block_done:
263 addi r4, r0, 4
264 cmpu r4, r4, r7
265 blti r4, a_xfer_end
266
267a_word_xfer:
268 andi r4, r7, 0xfffffffc
269 addi r10, r0, 0
270
271 andi r9, r6, 3
272
273 bnei r9, a_word_unaligned
274
275a_word_aligned:
276 lw r9, r6, r10
277 sw r9, r5, r10
278 addi r4, r4,-4
279 bneid r4, a_word_aligned
280 addi r10, r10, 4
281
282 bri a_word_done
283
284a_word_unaligned:
285 andi r8, r6, 0xfffffffc
286 lwi r11, r8, 0
287 addi r8, r8, 4
288
289 addi r9, r9, -1
290 beqi r9, a_word_u1
291 addi r9, r9, -1
292 beqi r9, a_word_u2
293
294a_word_u3:
295 bslli r11, r11, 24
296a_wu3_loop:
297 lw r12, r8, r10
298 bsrli r9, r12, 8
299 or r9, r11, r9
300 sw r9, r5, r10
301 bslli r11, r12, 24
302 addi r4, r4,-4
303 bneid r4, a_wu3_loop
304 addi r10, r10, 4
305
306 bri a_word_done
307
308a_word_u1:
309 bslli r11, r11, 8
310a_wu1_loop:
311 lw r12, r8, r10
312 bsrli r9, r12, 24
313 or r9, r11, r9
314 sw r9, r5, r10
315 bslli r11, r12, 8
316 addi r4, r4,-4
317 bneid r4, a_wu1_loop
318 addi r10, r10, 4
319
320 bri a_word_done
321
322a_word_u2:
323 bslli r11, r11, 16
324a_wu2_loop:
325 lw r12, r8, r10
326 bsrli r9, r12, 16
327 or r9, r11, r9
328 sw r9, r5, r10
329 bslli r11, r12, 16
330 addi r4, r4,-4
331 bneid r4, a_wu2_loop
332 addi r10, r10, 4
333
334a_word_done:
335 add r5, r5, r10
336 add r6, r6, r10
337 rsub r7, r10, r7
338
339a_xfer_end:
340a_xfer_end_loop:
341 beqi r7, a_done
342 lbui r9, r6, 0
343 addi r6, r6, 1
344 sbi r9, r5, 0
345 addi r7, r7, -1
346 brid a_xfer_end_loop
347 addi r5, r5, 1
348
349a_done:
350 rtsd r15, 8
351 nop
352
353.size memcpy, . - memcpy
354.end memcpy
355
356 .globl memmove
357 .type memmove, @function
358 .ent memmove
359
360memmove:
361 cmpu r4, r5, r6
362 bgei r4,fast_memcpy_ascending
363
364fast_memcpy_descending:
365
366 addi r3, r5, 0
367
368 add r5, r5, r7
369 add r6, r6, r7
370
371 addi r4, r0, 4
372 cmpu r4, r4, r7
373 blti r4,d_xfer_end
374
375
376 andi r4, r5, 3
377
378 beqi r4,d_dalign_done
379 rsub r7, r4, r7
380
381d_xfer_first_loop:
382
383 beqi r4,d_dalign_done
384 addi r6, r6, -1
385 addi r5, r5, -1
386 lbui r11, r6, 0
387 sbi r11, r5, 0
388 brid d_xfer_first_loop
389 addi r4, r4, -1
390
391d_dalign_done:
392 addi r4, r0, 32
393 cmpu r4, r4, r7
394
395 blti r4, d_block_done
396
397d_block_xfer:
398 andi r4, r7, 0xffffffe0
399 rsub r7, r4, r7
400
401 andi r9, r6, 3
402
403 bnei r9, d_block_unaligned
404
405d_block_aligned:
406 addi r6, r6, -32
407 addi r5, r5, -32
408 lwi r9, r6, 28
409 lwi r10, r6, 24
410 lwi r11, r6, 20
411 lwi r12, r6, 16
412 swi r9, r5, 28
413 swi r10, r5, 24
414 swi r11, r5, 20
415 swi r12, r5, 16
416 lwi r9, r6, 12
417 lwi r10, r6, 8
418 lwi r11, r6, 4
419 lwi r12, r6, 0
420 swi r9, r5, 12
421 swi r10, r5, 8
422 swi r11, r5, 4
423 addi r4, r4, -32
424 bneid r4, d_block_aligned
425 swi r12, r5, 0
426 bri d_block_done
427
428d_block_unaligned:
429 andi r8, r6, 0xfffffffc
430 rsub r6, r4, r6
431 lwi r11, r8, 0
432
433 addi r9, r9, -1
434 beqi r9,d_block_u1
435 addi r9, r9, -1
436 beqi r9,d_block_u2
437
438d_block_u3:
439 bsrli r11, r11, 8
440d_bu3_loop:
441 addi r8, r8, -32
442 addi r5, r5, -32
443 lwi r12, r8, 28
444 bslli r9, r12, 24
445 or r9, r11, r9
446 swi r9, r5, 28
447 bsrli r11, r12, 8
448 lwi r12, r8, 24
449 bslli r9, r12, 24
450 or r9, r11, r9
451 swi r9, r5, 24
452 bsrli r11, r12, 8
453 lwi r12, r8, 20
454 bslli r9, r12, 24
455 or r9, r11, r9
456 swi r9, r5, 20
457 bsrli r11, r12, 8
458 lwi r12, r8, 16
459 bslli r9, r12, 24
460 or r9, r11, r9
461 swi r9, r5, 16
462 bsrli r11, r12, 8
463 lwi r12, r8, 12
464 bslli r9, r12, 24
465 or r9, r11, r9
466 swi r9, r5, 12
467 bsrli r11, r12, 8
468 lwi r12, r8, 8
469 bslli r9, r12, 24
470 or r9, r11, r9
471 swi r9, r5, 8
472 bsrli r11, r12, 8
473 lwi r12, r8, 4
474 bslli r9, r12, 24
475 or r9, r11, r9
476 swi r9, r5, 4
477 bsrli r11, r12, 8
478 lwi r12, r8, 0
479 bslli r9, r12, 24
480 or r9, r11, r9
481 swi r9, r5, 0
482 addi r4, r4, -32
483 bneid r4, d_bu3_loop
484 bsrli r11, r12, 8
485 bri d_block_done
486
487d_block_u1:
488 bsrli r11, r11, 24
489d_bu1_loop:
490 addi r8, r8, -32
491 addi r5, r5, -32
492 lwi r12, r8, 28
493 bslli r9, r12, 8
494 or r9, r11, r9
495 swi r9, r5, 28
496 bsrli r11, r12, 24
497 lwi r12, r8, 24
498 bslli r9, r12, 8
499 or r9, r11, r9
500 swi r9, r5, 24
501 bsrli r11, r12, 24
502 lwi r12, r8, 20
503 bslli r9, r12, 8
504 or r9, r11, r9
505 swi r9, r5, 20
506 bsrli r11, r12, 24
507 lwi r12, r8, 16
508 bslli r9, r12, 8
509 or r9, r11, r9
510 swi r9, r5, 16
511 bsrli r11, r12, 24
512 lwi r12, r8, 12
513 bslli r9, r12, 8
514 or r9, r11, r9
515 swi r9, r5, 12
516 bsrli r11, r12, 24
517 lwi r12, r8, 8
518 bslli r9, r12, 8
519 or r9, r11, r9
520 swi r9, r5, 8
521 bsrli r11, r12, 24
522 lwi r12, r8, 4
523 bslli r9, r12, 8
524 or r9, r11, r9
525 swi r9, r5, 4
526 bsrli r11, r12, 24
527 lwi r12, r8, 0
528 bslli r9, r12, 8
529 or r9, r11, r9
530 swi r9, r5, 0
531 addi r4, r4, -32
532 bneid r4, d_bu1_loop
533 bsrli r11, r12, 24
534 bri d_block_done
535
536d_block_u2:
537 bsrli r11, r11, 16
538d_bu2_loop:
539 addi r8, r8, -32
540 addi r5, r5, -32
541 lwi r12, r8, 28
542 bslli r9, r12, 16
543 or r9, r11, r9
544 swi r9, r5, 28
545 bsrli r11, r12, 16
546 lwi r12, r8, 24
547 bslli r9, r12, 16
548 or r9, r11, r9
549 swi r9, r5, 24
550 bsrli r11, r12, 16
551 lwi r12, r8, 20
552 bslli r9, r12, 16
553 or r9, r11, r9
554 swi r9, r5, 20
555 bsrli r11, r12, 16
556 lwi r12, r8, 16
557 bslli r9, r12, 16
558 or r9, r11, r9
559 swi r9, r5, 16
560 bsrli r11, r12, 16
561 lwi r12, r8, 12
562 bslli r9, r12, 16
563 or r9, r11, r9
564 swi r9, r5, 12
565 bsrli r11, r12, 16
566 lwi r12, r8, 8
567 bslli r9, r12, 16
568 or r9, r11, r9
569 swi r9, r5, 8
570 bsrli r11, r12, 16
571 lwi r12, r8, 4
572 bslli r9, r12, 16
573 or r9, r11, r9
574 swi r9, r5, 4
575 bsrli r11, r12, 16
576 lwi r12, r8, 0
577 bslli r9, r12, 16
578 or r9, r11, r9
579 swi r9, r5, 0
580 addi r4, r4, -32
581 bneid r4, d_bu2_loop
582 bsrli r11, r12, 16
583
584d_block_done:
585 addi r4, r0, 4
586 cmpu r4, r4, r7
587 blti r4,d_xfer_end
588
589d_word_xfer:
590 andi r4, r7, 0xfffffffc
591 rsub r5, r4, r5
592 rsub r6, r4, r6
593 rsub r7, r4, r7
594
595 andi r9, r6, 3
596
597 bnei r9, d_word_unaligned
598
599d_word_aligned:
600 addi r4, r4,-4
601 lw r9, r6, r4
602 bneid r4, d_word_aligned
603 sw r9, r5, r4
604
605 bri d_word_done
606
607d_word_unaligned:
608 andi r8, r6, 0xfffffffc
609 lw r11, r8, r4
610
611 addi r9, r9, -1
612 beqi r9,d_word_u1
613 addi r9, r9, -1
614 beqi r9,d_word_u2
615
616d_word_u3:
617 bsrli r11, r11, 8
618d_wu3_loop:
619 addi r4, r4,-4
620 lw r12, r8, r4
621 bslli r9, r12, 24
622 or r9, r11, r9
623 sw r9, r5, r4
624 bneid r4, d_wu3_loop
625 bsrli r11, r12, 8
626
627 bri d_word_done
628
629d_word_u1:
630 bsrli r11, r11, 24
631d_wu1_loop:
632 addi r4, r4,-4
633 lw r12, r8, r4
634 bslli r9, r12, 8
635 or r9, r11, r9
636 sw r9, r5, r4
637 bneid r4, d_wu1_loop
638 bsrli r11, r12, 24
639
640 bri d_word_done
641
642d_word_u2:
643 bsrli r11, r11, 16
644d_wu2_loop:
645 addi r4, r4,-4
646 lw r12, r8, r4
647 bslli r9, r12, 16
648 or r9, r11, r9
649 sw r9, r5, r4
650 bneid r4, d_wu2_loop
651 bsrli r11, r12, 16
652
653d_word_done:
654
655d_xfer_end:
656d_xfer_end_loop:
657 beqi r7, a_done
658 addi r6, r6, -1
659 lbui r9, r6, 0
660 addi r5, r5, -1
661 sbi r9, r5, 0
662 brid d_xfer_end_loop
663 addi r7, r7, -1
664
665d_done:
666 rtsd r15, 8
667 nop
668
669.size memmove, . - memmove
670.end memmove
671