linux/arch/x86/crypto/salsa20-x86_64-asm_64.S
<<
>>
Prefs
   1# enter ECRYPT_encrypt_bytes
   2.text
   3.p2align 5
   4.globl ECRYPT_encrypt_bytes
   5ECRYPT_encrypt_bytes:
   6        mov     %rsp,%r11
   7        and     $31,%r11
   8        add     $256,%r11
   9        sub     %r11,%rsp
  10        # x = arg1
  11        mov     %rdi,%r8
  12        # m = arg2
  13        mov     %rsi,%rsi
  14        # out = arg3
  15        mov     %rdx,%rdi
  16        # bytes = arg4
  17        mov     %rcx,%rdx
  18        #               unsigned>? bytes - 0
  19        cmp     $0,%rdx
  20        # comment:fp stack unchanged by jump
  21        # goto done if !unsigned>
  22        jbe     ._done
  23        # comment:fp stack unchanged by fallthrough
  24# start:
  25._start:
  26        # r11_stack = r11
  27        movq    %r11,0(%rsp)
  28        # r12_stack = r12
  29        movq    %r12,8(%rsp)
  30        # r13_stack = r13
  31        movq    %r13,16(%rsp)
  32        # r14_stack = r14
  33        movq    %r14,24(%rsp)
  34        # r15_stack = r15
  35        movq    %r15,32(%rsp)
  36        # rbx_stack = rbx
  37        movq    %rbx,40(%rsp)
  38        # rbp_stack = rbp
  39        movq    %rbp,48(%rsp)
  40        # in0 = *(uint64 *) (x + 0)
  41        movq    0(%r8),%rcx
  42        # in2 = *(uint64 *) (x + 8)
  43        movq    8(%r8),%r9
  44        # in4 = *(uint64 *) (x + 16)
  45        movq    16(%r8),%rax
  46        # in6 = *(uint64 *) (x + 24)
  47        movq    24(%r8),%r10
  48        # in8 = *(uint64 *) (x + 32)
  49        movq    32(%r8),%r11
  50        # in10 = *(uint64 *) (x + 40)
  51        movq    40(%r8),%r12
  52        # in12 = *(uint64 *) (x + 48)
  53        movq    48(%r8),%r13
  54        # in14 = *(uint64 *) (x + 56)
  55        movq    56(%r8),%r14
  56        # j0 = in0
  57        movq    %rcx,56(%rsp)
  58        # j2 = in2
  59        movq    %r9,64(%rsp)
  60        # j4 = in4
  61        movq    %rax,72(%rsp)
  62        # j6 = in6
  63        movq    %r10,80(%rsp)
  64        # j8 = in8
  65        movq    %r11,88(%rsp)
  66        # j10 = in10
  67        movq    %r12,96(%rsp)
  68        # j12 = in12
  69        movq    %r13,104(%rsp)
  70        # j14 = in14
  71        movq    %r14,112(%rsp)
  72        # x_backup = x
  73        movq    %r8,120(%rsp)
  74# bytesatleast1:
  75._bytesatleast1:
  76        #                   unsigned<? bytes - 64
  77        cmp     $64,%rdx
  78        # comment:fp stack unchanged by jump
  79        #   goto nocopy if !unsigned<
  80        jae     ._nocopy
  81        #     ctarget = out
  82        movq    %rdi,128(%rsp)
  83        #     out = &tmp
  84        leaq    192(%rsp),%rdi
  85        #     i = bytes
  86        mov     %rdx,%rcx
  87        #     while (i) { *out++ = *m++; --i }
  88        rep     movsb
  89        #     out = &tmp
  90        leaq    192(%rsp),%rdi
  91        #     m = &tmp
  92        leaq    192(%rsp),%rsi
  93        # comment:fp stack unchanged by fallthrough
  94#   nocopy:
  95._nocopy:
  96        #   out_backup = out
  97        movq    %rdi,136(%rsp)
  98        #   m_backup = m
  99        movq    %rsi,144(%rsp)
 100        #   bytes_backup = bytes
 101        movq    %rdx,152(%rsp)
 102        #   x1 = j0
 103        movq    56(%rsp),%rdi
 104        #   x0 = x1
 105        mov     %rdi,%rdx
 106        #   (uint64) x1 >>= 32
 107        shr     $32,%rdi
 108        #               x3 = j2
 109        movq    64(%rsp),%rsi
 110        #               x2 = x3
 111        mov     %rsi,%rcx
 112        #               (uint64) x3 >>= 32
 113        shr     $32,%rsi
 114        #   x5 = j4
 115        movq    72(%rsp),%r8
 116        #   x4 = x5
 117        mov     %r8,%r9
 118        #   (uint64) x5 >>= 32
 119        shr     $32,%r8
 120        #   x5_stack = x5
 121        movq    %r8,160(%rsp)
 122        #               x7 = j6
 123        movq    80(%rsp),%r8
 124        #               x6 = x7
 125        mov     %r8,%rax
 126        #               (uint64) x7 >>= 32
 127        shr     $32,%r8
 128        #   x9 = j8
 129        movq    88(%rsp),%r10
 130        #   x8 = x9
 131        mov     %r10,%r11
 132        #   (uint64) x9 >>= 32
 133        shr     $32,%r10
 134        #               x11 = j10
 135        movq    96(%rsp),%r12
 136        #               x10 = x11
 137        mov     %r12,%r13
 138        #               x10_stack = x10
 139        movq    %r13,168(%rsp)
 140        #               (uint64) x11 >>= 32
 141        shr     $32,%r12
 142        #   x13 = j12
 143        movq    104(%rsp),%r13
 144        #   x12 = x13
 145        mov     %r13,%r14
 146        #   (uint64) x13 >>= 32
 147        shr     $32,%r13
 148        #               x15 = j14
 149        movq    112(%rsp),%r15
 150        #               x14 = x15
 151        mov     %r15,%rbx
 152        #               (uint64) x15 >>= 32
 153        shr     $32,%r15
 154        #               x15_stack = x15
 155        movq    %r15,176(%rsp)
 156        #   i = 20
 157        mov     $20,%r15
 158#   mainloop:
 159._mainloop:
 160        #   i_backup = i
 161        movq    %r15,184(%rsp)
 162        #               x5 = x5_stack
 163        movq    160(%rsp),%r15
 164        # a = x12 + x0
 165        lea     (%r14,%rdx),%rbp
 166        # (uint32) a <<<= 7
 167        rol     $7,%ebp
 168        # x4 ^= a
 169        xor     %rbp,%r9
 170        #               b = x1 + x5
 171        lea     (%rdi,%r15),%rbp
 172        #               (uint32) b <<<= 7
 173        rol     $7,%ebp
 174        #               x9 ^= b
 175        xor     %rbp,%r10
 176        # a = x0 + x4
 177        lea     (%rdx,%r9),%rbp
 178        # (uint32) a <<<= 9
 179        rol     $9,%ebp
 180        # x8 ^= a
 181        xor     %rbp,%r11
 182        #               b = x5 + x9
 183        lea     (%r15,%r10),%rbp
 184        #               (uint32) b <<<= 9
 185        rol     $9,%ebp
 186        #               x13 ^= b
 187        xor     %rbp,%r13
 188        # a = x4 + x8
 189        lea     (%r9,%r11),%rbp
 190        # (uint32) a <<<= 13
 191        rol     $13,%ebp
 192        # x12 ^= a
 193        xor     %rbp,%r14
 194        #               b = x9 + x13
 195        lea     (%r10,%r13),%rbp
 196        #               (uint32) b <<<= 13
 197        rol     $13,%ebp
 198        #               x1 ^= b
 199        xor     %rbp,%rdi
 200        # a = x8 + x12
 201        lea     (%r11,%r14),%rbp
 202        # (uint32) a <<<= 18
 203        rol     $18,%ebp
 204        # x0 ^= a
 205        xor     %rbp,%rdx
 206        #               b = x13 + x1
 207        lea     (%r13,%rdi),%rbp
 208        #               (uint32) b <<<= 18
 209        rol     $18,%ebp
 210        #               x5 ^= b
 211        xor     %rbp,%r15
 212        #                               x10 = x10_stack
 213        movq    168(%rsp),%rbp
 214        #               x5_stack = x5
 215        movq    %r15,160(%rsp)
 216        #                               c = x6 + x10
 217        lea     (%rax,%rbp),%r15
 218        #                               (uint32) c <<<= 7
 219        rol     $7,%r15d
 220        #                               x14 ^= c
 221        xor     %r15,%rbx
 222        #                               c = x10 + x14
 223        lea     (%rbp,%rbx),%r15
 224        #                               (uint32) c <<<= 9
 225        rol     $9,%r15d
 226        #                               x2 ^= c
 227        xor     %r15,%rcx
 228        #                               c = x14 + x2
 229        lea     (%rbx,%rcx),%r15
 230        #                               (uint32) c <<<= 13
 231        rol     $13,%r15d
 232        #                               x6 ^= c
 233        xor     %r15,%rax
 234        #                               c = x2 + x6
 235        lea     (%rcx,%rax),%r15
 236        #                               (uint32) c <<<= 18
 237        rol     $18,%r15d
 238        #                               x10 ^= c
 239        xor     %r15,%rbp
 240        #                                               x15 = x15_stack
 241        movq    176(%rsp),%r15
 242        #                               x10_stack = x10
 243        movq    %rbp,168(%rsp)
 244        #                                               d = x11 + x15
 245        lea     (%r12,%r15),%rbp
 246        #                                               (uint32) d <<<= 7
 247        rol     $7,%ebp
 248        #                                               x3 ^= d
 249        xor     %rbp,%rsi
 250        #                                               d = x15 + x3
 251        lea     (%r15,%rsi),%rbp
 252        #                                               (uint32) d <<<= 9
 253        rol     $9,%ebp
 254        #                                               x7 ^= d
 255        xor     %rbp,%r8
 256        #                                               d = x3 + x7
 257        lea     (%rsi,%r8),%rbp
 258        #                                               (uint32) d <<<= 13
 259        rol     $13,%ebp
 260        #                                               x11 ^= d
 261        xor     %rbp,%r12
 262        #                                               d = x7 + x11
 263        lea     (%r8,%r12),%rbp
 264        #                                               (uint32) d <<<= 18
 265        rol     $18,%ebp
 266        #                                               x15 ^= d
 267        xor     %rbp,%r15
 268        #                                               x15_stack = x15
 269        movq    %r15,176(%rsp)
 270        #               x5 = x5_stack
 271        movq    160(%rsp),%r15
 272        # a = x3 + x0
 273        lea     (%rsi,%rdx),%rbp
 274        # (uint32) a <<<= 7
 275        rol     $7,%ebp
 276        # x1 ^= a
 277        xor     %rbp,%rdi
 278        #               b = x4 + x5
 279        lea     (%r9,%r15),%rbp
 280        #               (uint32) b <<<= 7
 281        rol     $7,%ebp
 282        #               x6 ^= b
 283        xor     %rbp,%rax
 284        # a = x0 + x1
 285        lea     (%rdx,%rdi),%rbp
 286        # (uint32) a <<<= 9
 287        rol     $9,%ebp
 288        # x2 ^= a
 289        xor     %rbp,%rcx
 290        #               b = x5 + x6
 291        lea     (%r15,%rax),%rbp
 292        #               (uint32) b <<<= 9
 293        rol     $9,%ebp
 294        #               x7 ^= b
 295        xor     %rbp,%r8
 296        # a = x1 + x2
 297        lea     (%rdi,%rcx),%rbp
 298        # (uint32) a <<<= 13
 299        rol     $13,%ebp
 300        # x3 ^= a
 301        xor     %rbp,%rsi
 302        #               b = x6 + x7
 303        lea     (%rax,%r8),%rbp
 304        #               (uint32) b <<<= 13
 305        rol     $13,%ebp
 306        #               x4 ^= b
 307        xor     %rbp,%r9
 308        # a = x2 + x3
 309        lea     (%rcx,%rsi),%rbp
 310        # (uint32) a <<<= 18
 311        rol     $18,%ebp
 312        # x0 ^= a
 313        xor     %rbp,%rdx
 314        #               b = x7 + x4
 315        lea     (%r8,%r9),%rbp
 316        #               (uint32) b <<<= 18
 317        rol     $18,%ebp
 318        #               x5 ^= b
 319        xor     %rbp,%r15
 320        #                               x10 = x10_stack
 321        movq    168(%rsp),%rbp
 322        #               x5_stack = x5
 323        movq    %r15,160(%rsp)
 324        #                               c = x9 + x10
 325        lea     (%r10,%rbp),%r15
 326        #                               (uint32) c <<<= 7
 327        rol     $7,%r15d
 328        #                               x11 ^= c
 329        xor     %r15,%r12
 330        #                               c = x10 + x11
 331        lea     (%rbp,%r12),%r15
 332        #                               (uint32) c <<<= 9
 333        rol     $9,%r15d
 334        #                               x8 ^= c
 335        xor     %r15,%r11
 336        #                               c = x11 + x8
 337        lea     (%r12,%r11),%r15
 338        #                               (uint32) c <<<= 13
 339        rol     $13,%r15d
 340        #                               x9 ^= c
 341        xor     %r15,%r10
 342        #                               c = x8 + x9
 343        lea     (%r11,%r10),%r15
 344        #                               (uint32) c <<<= 18
 345        rol     $18,%r15d
 346        #                               x10 ^= c
 347        xor     %r15,%rbp
 348        #                                               x15 = x15_stack
 349        movq    176(%rsp),%r15
 350        #                               x10_stack = x10
 351        movq    %rbp,168(%rsp)
 352        #                                               d = x14 + x15
 353        lea     (%rbx,%r15),%rbp
 354        #                                               (uint32) d <<<= 7
 355        rol     $7,%ebp
 356        #                                               x12 ^= d
 357        xor     %rbp,%r14
 358        #                                               d = x15 + x12
 359        lea     (%r15,%r14),%rbp
 360        #                                               (uint32) d <<<= 9
 361        rol     $9,%ebp
 362        #                                               x13 ^= d
 363        xor     %rbp,%r13
 364        #                                               d = x12 + x13
 365        lea     (%r14,%r13),%rbp
 366        #                                               (uint32) d <<<= 13
 367        rol     $13,%ebp
 368        #                                               x14 ^= d
 369        xor     %rbp,%rbx
 370        #                                               d = x13 + x14
 371        lea     (%r13,%rbx),%rbp
 372        #                                               (uint32) d <<<= 18
 373        rol     $18,%ebp
 374        #                                               x15 ^= d
 375        xor     %rbp,%r15
 376        #                                               x15_stack = x15
 377        movq    %r15,176(%rsp)
 378        #               x5 = x5_stack
 379        movq    160(%rsp),%r15
 380        # a = x12 + x0
 381        lea     (%r14,%rdx),%rbp
 382        # (uint32) a <<<= 7
 383        rol     $7,%ebp
 384        # x4 ^= a
 385        xor     %rbp,%r9
 386        #               b = x1 + x5
 387        lea     (%rdi,%r15),%rbp
 388        #               (uint32) b <<<= 7
 389        rol     $7,%ebp
 390        #               x9 ^= b
 391        xor     %rbp,%r10
 392        # a = x0 + x4
 393        lea     (%rdx,%r9),%rbp
 394        # (uint32) a <<<= 9
 395        rol     $9,%ebp
 396        # x8 ^= a
 397        xor     %rbp,%r11
 398        #               b = x5 + x9
 399        lea     (%r15,%r10),%rbp
 400        #               (uint32) b <<<= 9
 401        rol     $9,%ebp
 402        #               x13 ^= b
 403        xor     %rbp,%r13
 404        # a = x4 + x8
 405        lea     (%r9,%r11),%rbp
 406        # (uint32) a <<<= 13
 407        rol     $13,%ebp
 408        # x12 ^= a
 409        xor     %rbp,%r14
 410        #               b = x9 + x13
 411        lea     (%r10,%r13),%rbp
 412        #               (uint32) b <<<= 13
 413        rol     $13,%ebp
 414        #               x1 ^= b
 415        xor     %rbp,%rdi
 416        # a = x8 + x12
 417        lea     (%r11,%r14),%rbp
 418        # (uint32) a <<<= 18
 419        rol     $18,%ebp
 420        # x0 ^= a
 421        xor     %rbp,%rdx
 422        #               b = x13 + x1
 423        lea     (%r13,%rdi),%rbp
 424        #               (uint32) b <<<= 18
 425        rol     $18,%ebp
 426        #               x5 ^= b
 427        xor     %rbp,%r15
 428        #                               x10 = x10_stack
 429        movq    168(%rsp),%rbp
 430        #               x5_stack = x5
 431        movq    %r15,160(%rsp)
 432        #                               c = x6 + x10
 433        lea     (%rax,%rbp),%r15
 434        #                               (uint32) c <<<= 7
 435        rol     $7,%r15d
 436        #                               x14 ^= c
 437        xor     %r15,%rbx
 438        #                               c = x10 + x14
 439        lea     (%rbp,%rbx),%r15
 440        #                               (uint32) c <<<= 9
 441        rol     $9,%r15d
 442        #                               x2 ^= c
 443        xor     %r15,%rcx
 444        #                               c = x14 + x2
 445        lea     (%rbx,%rcx),%r15
 446        #                               (uint32) c <<<= 13
 447        rol     $13,%r15d
 448        #                               x6 ^= c
 449        xor     %r15,%rax
 450        #                               c = x2 + x6
 451        lea     (%rcx,%rax),%r15
 452        #                               (uint32) c <<<= 18
 453        rol     $18,%r15d
 454        #                               x10 ^= c
 455        xor     %r15,%rbp
 456        #                                               x15 = x15_stack
 457        movq    176(%rsp),%r15
 458        #                               x10_stack = x10
 459        movq    %rbp,168(%rsp)
 460        #                                               d = x11 + x15
 461        lea     (%r12,%r15),%rbp
 462        #                                               (uint32) d <<<= 7
 463        rol     $7,%ebp
 464        #                                               x3 ^= d
 465        xor     %rbp,%rsi
 466        #                                               d = x15 + x3
 467        lea     (%r15,%rsi),%rbp
 468        #                                               (uint32) d <<<= 9
 469        rol     $9,%ebp
 470        #                                               x7 ^= d
 471        xor     %rbp,%r8
 472        #                                               d = x3 + x7
 473        lea     (%rsi,%r8),%rbp
 474        #                                               (uint32) d <<<= 13
 475        rol     $13,%ebp
 476        #                                               x11 ^= d
 477        xor     %rbp,%r12
 478        #                                               d = x7 + x11
 479        lea     (%r8,%r12),%rbp
 480        #                                               (uint32) d <<<= 18
 481        rol     $18,%ebp
 482        #                                               x15 ^= d
 483        xor     %rbp,%r15
 484        #                                               x15_stack = x15
 485        movq    %r15,176(%rsp)
 486        #               x5 = x5_stack
 487        movq    160(%rsp),%r15
 488        # a = x3 + x0
 489        lea     (%rsi,%rdx),%rbp
 490        # (uint32) a <<<= 7
 491        rol     $7,%ebp
 492        # x1 ^= a
 493        xor     %rbp,%rdi
 494        #               b = x4 + x5
 495        lea     (%r9,%r15),%rbp
 496        #               (uint32) b <<<= 7
 497        rol     $7,%ebp
 498        #               x6 ^= b
 499        xor     %rbp,%rax
 500        # a = x0 + x1
 501        lea     (%rdx,%rdi),%rbp
 502        # (uint32) a <<<= 9
 503        rol     $9,%ebp
 504        # x2 ^= a
 505        xor     %rbp,%rcx
 506        #               b = x5 + x6
 507        lea     (%r15,%rax),%rbp
 508        #               (uint32) b <<<= 9
 509        rol     $9,%ebp
 510        #               x7 ^= b
 511        xor     %rbp,%r8
 512        # a = x1 + x2
 513        lea     (%rdi,%rcx),%rbp
 514        # (uint32) a <<<= 13
 515        rol     $13,%ebp
 516        # x3 ^= a
 517        xor     %rbp,%rsi
 518        #               b = x6 + x7
 519        lea     (%rax,%r8),%rbp
 520        #               (uint32) b <<<= 13
 521        rol     $13,%ebp
 522        #               x4 ^= b
 523        xor     %rbp,%r9
 524        # a = x2 + x3
 525        lea     (%rcx,%rsi),%rbp
 526        # (uint32) a <<<= 18
 527        rol     $18,%ebp
 528        # x0 ^= a
 529        xor     %rbp,%rdx
 530        #               b = x7 + x4
 531        lea     (%r8,%r9),%rbp
 532        #               (uint32) b <<<= 18
 533        rol     $18,%ebp
 534        #               x5 ^= b
 535        xor     %rbp,%r15
 536        #                               x10 = x10_stack
 537        movq    168(%rsp),%rbp
 538        #               x5_stack = x5
 539        movq    %r15,160(%rsp)
 540        #                               c = x9 + x10
 541        lea     (%r10,%rbp),%r15
 542        #                               (uint32) c <<<= 7
 543        rol     $7,%r15d
 544        #                               x11 ^= c
 545        xor     %r15,%r12
 546        #                               c = x10 + x11
 547        lea     (%rbp,%r12),%r15
 548        #                               (uint32) c <<<= 9
 549        rol     $9,%r15d
 550        #                               x8 ^= c
 551        xor     %r15,%r11
 552        #                               c = x11 + x8
 553        lea     (%r12,%r11),%r15
 554        #                               (uint32) c <<<= 13
 555        rol     $13,%r15d
 556        #                               x9 ^= c
 557        xor     %r15,%r10
 558        #                               c = x8 + x9
 559        lea     (%r11,%r10),%r15
 560        #                               (uint32) c <<<= 18
 561        rol     $18,%r15d
 562        #                               x10 ^= c
 563        xor     %r15,%rbp
 564        #                                               x15 = x15_stack
 565        movq    176(%rsp),%r15
 566        #                               x10_stack = x10
 567        movq    %rbp,168(%rsp)
 568        #                                               d = x14 + x15
 569        lea     (%rbx,%r15),%rbp
 570        #                                               (uint32) d <<<= 7
 571        rol     $7,%ebp
 572        #                                               x12 ^= d
 573        xor     %rbp,%r14
 574        #                                               d = x15 + x12
 575        lea     (%r15,%r14),%rbp
 576        #                                               (uint32) d <<<= 9
 577        rol     $9,%ebp
 578        #                                               x13 ^= d
 579        xor     %rbp,%r13
 580        #                                               d = x12 + x13
 581        lea     (%r14,%r13),%rbp
 582        #                                               (uint32) d <<<= 13
 583        rol     $13,%ebp
 584        #                                               x14 ^= d
 585        xor     %rbp,%rbx
 586        #                                               d = x13 + x14
 587        lea     (%r13,%rbx),%rbp
 588        #                                               (uint32) d <<<= 18
 589        rol     $18,%ebp
 590        #                                               x15 ^= d
 591        xor     %rbp,%r15
 592        #                                               x15_stack = x15
 593        movq    %r15,176(%rsp)
 594        #   i = i_backup
 595        movq    184(%rsp),%r15
 596        #                  unsigned>? i -= 4
 597        sub     $4,%r15
 598        # comment:fp stack unchanged by jump
 599        # goto mainloop if unsigned>
 600        ja      ._mainloop
 601        #   (uint32) x2 += j2
 602        addl    64(%rsp),%ecx
 603        #   x3 <<= 32
 604        shl     $32,%rsi
 605        #   x3 += j2
 606        addq    64(%rsp),%rsi
 607        #   (uint64) x3 >>= 32
 608        shr     $32,%rsi
 609        #   x3 <<= 32
 610        shl     $32,%rsi
 611        #   x2 += x3
 612        add     %rsi,%rcx
 613        #   (uint32) x6 += j6
 614        addl    80(%rsp),%eax
 615        #   x7 <<= 32
 616        shl     $32,%r8
 617        #   x7 += j6
 618        addq    80(%rsp),%r8
 619        #   (uint64) x7 >>= 32
 620        shr     $32,%r8
 621        #   x7 <<= 32
 622        shl     $32,%r8
 623        #   x6 += x7
 624        add     %r8,%rax
 625        #   (uint32) x8 += j8
 626        addl    88(%rsp),%r11d
 627        #   x9 <<= 32
 628        shl     $32,%r10
 629        #   x9 += j8
 630        addq    88(%rsp),%r10
 631        #   (uint64) x9 >>= 32
 632        shr     $32,%r10
 633        #   x9 <<= 32
 634        shl     $32,%r10
 635        #   x8 += x9
 636        add     %r10,%r11
 637        #   (uint32) x12 += j12
 638        addl    104(%rsp),%r14d
 639        #   x13 <<= 32
 640        shl     $32,%r13
 641        #   x13 += j12
 642        addq    104(%rsp),%r13
 643        #   (uint64) x13 >>= 32
 644        shr     $32,%r13
 645        #   x13 <<= 32
 646        shl     $32,%r13
 647        #   x12 += x13
 648        add     %r13,%r14
 649        #   (uint32) x0 += j0
 650        addl    56(%rsp),%edx
 651        #   x1 <<= 32
 652        shl     $32,%rdi
 653        #   x1 += j0
 654        addq    56(%rsp),%rdi
 655        #   (uint64) x1 >>= 32
 656        shr     $32,%rdi
 657        #   x1 <<= 32
 658        shl     $32,%rdi
 659        #   x0 += x1
 660        add     %rdi,%rdx
 661        #   x5 = x5_stack
 662        movq    160(%rsp),%rdi
 663        #   (uint32) x4 += j4
 664        addl    72(%rsp),%r9d
 665        #   x5 <<= 32
 666        shl     $32,%rdi
 667        #   x5 += j4
 668        addq    72(%rsp),%rdi
 669        #   (uint64) x5 >>= 32
 670        shr     $32,%rdi
 671        #   x5 <<= 32
 672        shl     $32,%rdi
 673        #   x4 += x5
 674        add     %rdi,%r9
 675        #   x10 = x10_stack
 676        movq    168(%rsp),%r8
 677        #   (uint32) x10 += j10
 678        addl    96(%rsp),%r8d
 679        #   x11 <<= 32
 680        shl     $32,%r12
 681        #   x11 += j10
 682        addq    96(%rsp),%r12
 683        #   (uint64) x11 >>= 32
 684        shr     $32,%r12
 685        #   x11 <<= 32
 686        shl     $32,%r12
 687        #   x10 += x11
 688        add     %r12,%r8
 689        #   x15 = x15_stack
 690        movq    176(%rsp),%rdi
 691        #   (uint32) x14 += j14
 692        addl    112(%rsp),%ebx
 693        #   x15 <<= 32
 694        shl     $32,%rdi
 695        #   x15 += j14
 696        addq    112(%rsp),%rdi
 697        #   (uint64) x15 >>= 32
 698        shr     $32,%rdi
 699        #   x15 <<= 32
 700        shl     $32,%rdi
 701        #   x14 += x15
 702        add     %rdi,%rbx
 703        #   out = out_backup
 704        movq    136(%rsp),%rdi
 705        #   m = m_backup
 706        movq    144(%rsp),%rsi
 707        #   x0 ^= *(uint64 *) (m + 0)
 708        xorq    0(%rsi),%rdx
 709        #   *(uint64 *) (out + 0) = x0
 710        movq    %rdx,0(%rdi)
 711        #   x2 ^= *(uint64 *) (m + 8)
 712        xorq    8(%rsi),%rcx
 713        #   *(uint64 *) (out + 8) = x2
 714        movq    %rcx,8(%rdi)
 715        #   x4 ^= *(uint64 *) (m + 16)
 716        xorq    16(%rsi),%r9
 717        #   *(uint64 *) (out + 16) = x4
 718        movq    %r9,16(%rdi)
 719        #   x6 ^= *(uint64 *) (m + 24)
 720        xorq    24(%rsi),%rax
 721        #   *(uint64 *) (out + 24) = x6
 722        movq    %rax,24(%rdi)
 723        #   x8 ^= *(uint64 *) (m + 32)
 724        xorq    32(%rsi),%r11
 725        #   *(uint64 *) (out + 32) = x8
 726        movq    %r11,32(%rdi)
 727        #   x10 ^= *(uint64 *) (m + 40)
 728        xorq    40(%rsi),%r8
 729        #   *(uint64 *) (out + 40) = x10
 730        movq    %r8,40(%rdi)
 731        #   x12 ^= *(uint64 *) (m + 48)
 732        xorq    48(%rsi),%r14
 733        #   *(uint64 *) (out + 48) = x12
 734        movq    %r14,48(%rdi)
 735        #   x14 ^= *(uint64 *) (m + 56)
 736        xorq    56(%rsi),%rbx
 737        #   *(uint64 *) (out + 56) = x14
 738        movq    %rbx,56(%rdi)
 739        #   bytes = bytes_backup
 740        movq    152(%rsp),%rdx
 741        #   in8 = j8
 742        movq    88(%rsp),%rcx
 743        #   in8 += 1
 744        add     $1,%rcx
 745        #   j8 = in8
 746        movq    %rcx,88(%rsp)
 747        #                          unsigned>? unsigned<? bytes - 64
 748        cmp     $64,%rdx
 749        # comment:fp stack unchanged by jump
 750        #   goto bytesatleast65 if unsigned>
 751        ja      ._bytesatleast65
 752        # comment:fp stack unchanged by jump
 753        #     goto bytesatleast64 if !unsigned<
 754        jae     ._bytesatleast64
 755        #       m = out
 756        mov     %rdi,%rsi
 757        #       out = ctarget
 758        movq    128(%rsp),%rdi
 759        #       i = bytes
 760        mov     %rdx,%rcx
 761        #       while (i) { *out++ = *m++; --i }
 762        rep     movsb
 763        # comment:fp stack unchanged by fallthrough
 764#     bytesatleast64:
 765._bytesatleast64:
 766        #     x = x_backup
 767        movq    120(%rsp),%rdi
 768        #     in8 = j8
 769        movq    88(%rsp),%rsi
 770        #     *(uint64 *) (x + 32) = in8
 771        movq    %rsi,32(%rdi)
 772        #     r11 = r11_stack
 773        movq    0(%rsp),%r11
 774        #     r12 = r12_stack
 775        movq    8(%rsp),%r12
 776        #     r13 = r13_stack
 777        movq    16(%rsp),%r13
 778        #     r14 = r14_stack
 779        movq    24(%rsp),%r14
 780        #     r15 = r15_stack
 781        movq    32(%rsp),%r15
 782        #     rbx = rbx_stack
 783        movq    40(%rsp),%rbx
 784        #     rbp = rbp_stack
 785        movq    48(%rsp),%rbp
 786        # comment:fp stack unchanged by fallthrough
 787#     done:
 788._done:
 789        #     leave
 790        add     %r11,%rsp
 791        mov     %rdi,%rax
 792        mov     %rsi,%rdx
 793        ret
 794#   bytesatleast65:
 795._bytesatleast65:
 796        #   bytes -= 64
 797        sub     $64,%rdx
 798        #   out += 64
 799        add     $64,%rdi
 800        #   m += 64
 801        add     $64,%rsi
 802        # comment:fp stack unchanged by jump
 803        # goto bytesatleast1
 804        jmp     ._bytesatleast1
 805# enter ECRYPT_keysetup
 806.text
 807.p2align 5
 808.globl ECRYPT_keysetup
 809ECRYPT_keysetup:
 810        mov     %rsp,%r11
 811        and     $31,%r11
 812        add     $256,%r11
 813        sub     %r11,%rsp
 814        #   k = arg2
 815        mov     %rsi,%rsi
 816        #   kbits = arg3
 817        mov     %rdx,%rdx
 818        #   x = arg1
 819        mov     %rdi,%rdi
 820        #   in0 = *(uint64 *) (k + 0)
 821        movq    0(%rsi),%r8
 822        #   in2 = *(uint64 *) (k + 8)
 823        movq    8(%rsi),%r9
 824        #   *(uint64 *) (x + 4) = in0
 825        movq    %r8,4(%rdi)
 826        #   *(uint64 *) (x + 12) = in2
 827        movq    %r9,12(%rdi)
 828        #                    unsigned<? kbits - 256
 829        cmp     $256,%rdx
 830        # comment:fp stack unchanged by jump
 831        #   goto kbits128 if unsigned<
 832        jb      ._kbits128
 833#   kbits256:
 834._kbits256:
 835        #     in10 = *(uint64 *) (k + 16)
 836        movq    16(%rsi),%rdx
 837        #     in12 = *(uint64 *) (k + 24)
 838        movq    24(%rsi),%rsi
 839        #     *(uint64 *) (x + 44) = in10
 840        movq    %rdx,44(%rdi)
 841        #     *(uint64 *) (x + 52) = in12
 842        movq    %rsi,52(%rdi)
 843        #     in0 = 1634760805
 844        mov     $1634760805,%rsi
 845        #     in4 = 857760878
 846        mov     $857760878,%rdx
 847        #     in10 = 2036477234
 848        mov     $2036477234,%rcx
 849        #     in14 = 1797285236
 850        mov     $1797285236,%r8
 851        #     *(uint32 *) (x + 0) = in0
 852        movl    %esi,0(%rdi)
 853        #     *(uint32 *) (x + 20) = in4
 854        movl    %edx,20(%rdi)
 855        #     *(uint32 *) (x + 40) = in10
 856        movl    %ecx,40(%rdi)
 857        #     *(uint32 *) (x + 60) = in14
 858        movl    %r8d,60(%rdi)
 859        # comment:fp stack unchanged by jump
 860        #   goto keysetupdone
 861        jmp     ._keysetupdone
 862#   kbits128:
 863._kbits128:
 864        #     in10 = *(uint64 *) (k + 0)
 865        movq    0(%rsi),%rdx
 866        #     in12 = *(uint64 *) (k + 8)
 867        movq    8(%rsi),%rsi
 868        #     *(uint64 *) (x + 44) = in10
 869        movq    %rdx,44(%rdi)
 870        #     *(uint64 *) (x + 52) = in12
 871        movq    %rsi,52(%rdi)
 872        #     in0 = 1634760805
 873        mov     $1634760805,%rsi
 874        #     in4 = 824206446
 875        mov     $824206446,%rdx
 876        #     in10 = 2036477238
 877        mov     $2036477238,%rcx
 878        #     in14 = 1797285236
 879        mov     $1797285236,%r8
 880        #     *(uint32 *) (x + 0) = in0
 881        movl    %esi,0(%rdi)
 882        #     *(uint32 *) (x + 20) = in4
 883        movl    %edx,20(%rdi)
 884        #     *(uint32 *) (x + 40) = in10
 885        movl    %ecx,40(%rdi)
 886        #     *(uint32 *) (x + 60) = in14
 887        movl    %r8d,60(%rdi)
 888#   keysetupdone:
 889._keysetupdone:
 890        # leave
 891        add     %r11,%rsp
 892        mov     %rdi,%rax
 893        mov     %rsi,%rdx
 894        ret
 895# enter ECRYPT_ivsetup
 896.text
 897.p2align 5
 898.globl ECRYPT_ivsetup
 899ECRYPT_ivsetup:
 900        mov     %rsp,%r11
 901        and     $31,%r11
 902        add     $256,%r11
 903        sub     %r11,%rsp
 904        #   iv = arg2
 905        mov     %rsi,%rsi
 906        #   x = arg1
 907        mov     %rdi,%rdi
 908        #   in6 = *(uint64 *) (iv + 0)
 909        movq    0(%rsi),%rsi
 910        #   in8 = 0
 911        mov     $0,%r8
 912        #   *(uint64 *) (x + 24) = in6
 913        movq    %rsi,24(%rdi)
 914        #   *(uint64 *) (x + 32) = in8
 915        movq    %r8,32(%rdi)
 916        # leave
 917        add     %r11,%rsp
 918        mov     %rdi,%rax
 919        mov     %rsi,%rdx
 920        ret
 921