linux/arch/x86/crypto/aegis128-aesni-asm.S
<<
>>
Prefs
   1/*
   2 * AES-NI + SSE2 implementation of AEGIS-128
   3 *
   4 * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
   5 * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
   6 *
   7 * This program is free software; you can redistribute it and/or modify it
   8 * under the terms of the GNU General Public License version 2 as published
   9 * by the Free Software Foundation.
  10 */
  11
  12#include <linux/linkage.h>
  13#include <asm/frame.h>
  14
  15#define STATE0  %xmm0
  16#define STATE1  %xmm1
  17#define STATE2  %xmm2
  18#define STATE3  %xmm3
  19#define STATE4  %xmm4
  20#define KEY     %xmm5
  21#define MSG     %xmm5
  22#define T0      %xmm6
  23#define T1      %xmm7
  24
  25#define STATEP  %rdi
  26#define LEN     %rsi
  27#define SRC     %rdx
  28#define DST     %rcx
  29
  30.section .rodata.cst16.aegis128_const, "aM", @progbits, 32
  31.align 16
  32.Laegis128_const_0:
  33        .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
  34        .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
  35.Laegis128_const_1:
  36        .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
  37        .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
  38
  39.section .rodata.cst16.aegis128_counter, "aM", @progbits, 16
  40.align 16
  41.Laegis128_counter:
  42        .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
  43        .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
  44
  45.text
  46
  47/*
  48 * aegis128_update
  49 * input:
  50 *   STATE[0-4] - input state
  51 * output:
  52 *   STATE[0-4] - output state (shifted positions)
  53 * changed:
  54 *   T0
  55 */
  56.macro aegis128_update
  57        movdqa STATE4, T0
  58        aesenc STATE0, STATE4
  59        aesenc STATE1, STATE0
  60        aesenc STATE2, STATE1
  61        aesenc STATE3, STATE2
  62        aesenc T0,     STATE3
  63.endm
  64
  65/*
  66 * __load_partial: internal ABI
  67 * input:
  68 *   LEN - bytes
  69 *   SRC - src
  70 * output:
  71 *   MSG  - message block
  72 * changed:
  73 *   T0
  74 *   %r8
  75 *   %r9
  76 */
  77__load_partial:
  78        xor %r9d, %r9d
  79        pxor MSG, MSG
  80
  81        mov LEN, %r8
  82        and $0x1, %r8
  83        jz .Lld_partial_1
  84
  85        mov LEN, %r8
  86        and $0x1E, %r8
  87        add SRC, %r8
  88        mov (%r8), %r9b
  89
  90.Lld_partial_1:
  91        mov LEN, %r8
  92        and $0x2, %r8
  93        jz .Lld_partial_2
  94
  95        mov LEN, %r8
  96        and $0x1C, %r8
  97        add SRC, %r8
  98        shl $0x10, %r9
  99        mov (%r8), %r9w
 100
 101.Lld_partial_2:
 102        mov LEN, %r8
 103        and $0x4, %r8
 104        jz .Lld_partial_4
 105
 106        mov LEN, %r8
 107        and $0x18, %r8
 108        add SRC, %r8
 109        shl $32, %r9
 110        mov (%r8), %r8d
 111        xor %r8, %r9
 112
 113.Lld_partial_4:
 114        movq %r9, MSG
 115
 116        mov LEN, %r8
 117        and $0x8, %r8
 118        jz .Lld_partial_8
 119
 120        mov LEN, %r8
 121        and $0x10, %r8
 122        add SRC, %r8
 123        pslldq $8, MSG
 124        movq (%r8), T0
 125        pxor T0, MSG
 126
 127.Lld_partial_8:
 128        ret
 129ENDPROC(__load_partial)
 130
 131/*
 132 * __store_partial: internal ABI
 133 * input:
 134 *   LEN - bytes
 135 *   DST - dst
 136 * output:
 137 *   T0   - message block
 138 * changed:
 139 *   %r8
 140 *   %r9
 141 *   %r10
 142 */
 143__store_partial:
 144        mov LEN, %r8
 145        mov DST, %r9
 146
 147        movq T0, %r10
 148
 149        cmp $8, %r8
 150        jl .Lst_partial_8
 151
 152        mov %r10, (%r9)
 153        psrldq $8, T0
 154        movq T0, %r10
 155
 156        sub $8, %r8
 157        add $8, %r9
 158
 159.Lst_partial_8:
 160        cmp $4, %r8
 161        jl .Lst_partial_4
 162
 163        mov %r10d, (%r9)
 164        shr $32, %r10
 165
 166        sub $4, %r8
 167        add $4, %r9
 168
 169.Lst_partial_4:
 170        cmp $2, %r8
 171        jl .Lst_partial_2
 172
 173        mov %r10w, (%r9)
 174        shr $0x10, %r10
 175
 176        sub $2, %r8
 177        add $2, %r9
 178
 179.Lst_partial_2:
 180        cmp $1, %r8
 181        jl .Lst_partial_1
 182
 183        mov %r10b, (%r9)
 184
 185.Lst_partial_1:
 186        ret
 187ENDPROC(__store_partial)
 188
 189/*
 190 * void crypto_aegis128_aesni_init(void *state, const void *key, const void *iv);
 191 */
 192ENTRY(crypto_aegis128_aesni_init)
 193        FRAME_BEGIN
 194
 195        /* load IV: */
 196        movdqu (%rdx), T1
 197
 198        /* load key: */
 199        movdqa (%rsi), KEY
 200        pxor KEY, T1
 201        movdqa T1, STATE0
 202        movdqa KEY, STATE3
 203        movdqa KEY, STATE4
 204
 205        /* load the constants: */
 206        movdqa .Laegis128_const_0, STATE2
 207        movdqa .Laegis128_const_1, STATE1
 208        pxor STATE2, STATE3
 209        pxor STATE1, STATE4
 210
 211        /* update 10 times with KEY / KEY xor IV: */
 212        aegis128_update; pxor KEY, STATE4
 213        aegis128_update; pxor T1,  STATE3
 214        aegis128_update; pxor KEY, STATE2
 215        aegis128_update; pxor T1,  STATE1
 216        aegis128_update; pxor KEY, STATE0
 217        aegis128_update; pxor T1,  STATE4
 218        aegis128_update; pxor KEY, STATE3
 219        aegis128_update; pxor T1,  STATE2
 220        aegis128_update; pxor KEY, STATE1
 221        aegis128_update; pxor T1,  STATE0
 222
 223        /* store the state: */
 224        movdqu STATE0, 0x00(STATEP)
 225        movdqu STATE1, 0x10(STATEP)
 226        movdqu STATE2, 0x20(STATEP)
 227        movdqu STATE3, 0x30(STATEP)
 228        movdqu STATE4, 0x40(STATEP)
 229
 230        FRAME_END
 231        ret
 232ENDPROC(crypto_aegis128_aesni_init)
 233
 234/*
 235 * void crypto_aegis128_aesni_ad(void *state, unsigned int length,
 236 *                               const void *data);
 237 */
 238ENTRY(crypto_aegis128_aesni_ad)
 239        FRAME_BEGIN
 240
 241        cmp $0x10, LEN
 242        jb .Lad_out
 243
 244        /* load the state: */
 245        movdqu 0x00(STATEP), STATE0
 246        movdqu 0x10(STATEP), STATE1
 247        movdqu 0x20(STATEP), STATE2
 248        movdqu 0x30(STATEP), STATE3
 249        movdqu 0x40(STATEP), STATE4
 250
 251        mov SRC, %r8
 252        and $0xF, %r8
 253        jnz .Lad_u_loop
 254
 255.align 8
 256.Lad_a_loop:
 257        movdqa 0x00(SRC), MSG
 258        aegis128_update
 259        pxor MSG, STATE4
 260        sub $0x10, LEN
 261        cmp $0x10, LEN
 262        jl .Lad_out_1
 263
 264        movdqa 0x10(SRC), MSG
 265        aegis128_update
 266        pxor MSG, STATE3
 267        sub $0x10, LEN
 268        cmp $0x10, LEN
 269        jl .Lad_out_2
 270
 271        movdqa 0x20(SRC), MSG
 272        aegis128_update
 273        pxor MSG, STATE2
 274        sub $0x10, LEN
 275        cmp $0x10, LEN
 276        jl .Lad_out_3
 277
 278        movdqa 0x30(SRC), MSG
 279        aegis128_update
 280        pxor MSG, STATE1
 281        sub $0x10, LEN
 282        cmp $0x10, LEN
 283        jl .Lad_out_4
 284
 285        movdqa 0x40(SRC), MSG
 286        aegis128_update
 287        pxor MSG, STATE0
 288        sub $0x10, LEN
 289        cmp $0x10, LEN
 290        jl .Lad_out_0
 291
 292        add $0x50, SRC
 293        jmp .Lad_a_loop
 294
 295.align 8
 296.Lad_u_loop:
 297        movdqu 0x00(SRC), MSG
 298        aegis128_update
 299        pxor MSG, STATE4
 300        sub $0x10, LEN
 301        cmp $0x10, LEN
 302        jl .Lad_out_1
 303
 304        movdqu 0x10(SRC), MSG
 305        aegis128_update
 306        pxor MSG, STATE3
 307        sub $0x10, LEN
 308        cmp $0x10, LEN
 309        jl .Lad_out_2
 310
 311        movdqu 0x20(SRC), MSG
 312        aegis128_update
 313        pxor MSG, STATE2
 314        sub $0x10, LEN
 315        cmp $0x10, LEN
 316        jl .Lad_out_3
 317
 318        movdqu 0x30(SRC), MSG
 319        aegis128_update
 320        pxor MSG, STATE1
 321        sub $0x10, LEN
 322        cmp $0x10, LEN
 323        jl .Lad_out_4
 324
 325        movdqu 0x40(SRC), MSG
 326        aegis128_update
 327        pxor MSG, STATE0
 328        sub $0x10, LEN
 329        cmp $0x10, LEN
 330        jl .Lad_out_0
 331
 332        add $0x50, SRC
 333        jmp .Lad_u_loop
 334
 335        /* store the state: */
 336.Lad_out_0:
 337        movdqu STATE0, 0x00(STATEP)
 338        movdqu STATE1, 0x10(STATEP)
 339        movdqu STATE2, 0x20(STATEP)
 340        movdqu STATE3, 0x30(STATEP)
 341        movdqu STATE4, 0x40(STATEP)
 342        FRAME_END
 343        ret
 344
 345.Lad_out_1:
 346        movdqu STATE4, 0x00(STATEP)
 347        movdqu STATE0, 0x10(STATEP)
 348        movdqu STATE1, 0x20(STATEP)
 349        movdqu STATE2, 0x30(STATEP)
 350        movdqu STATE3, 0x40(STATEP)
 351        FRAME_END
 352        ret
 353
 354.Lad_out_2:
 355        movdqu STATE3, 0x00(STATEP)
 356        movdqu STATE4, 0x10(STATEP)
 357        movdqu STATE0, 0x20(STATEP)
 358        movdqu STATE1, 0x30(STATEP)
 359        movdqu STATE2, 0x40(STATEP)
 360        FRAME_END
 361        ret
 362
 363.Lad_out_3:
 364        movdqu STATE2, 0x00(STATEP)
 365        movdqu STATE3, 0x10(STATEP)
 366        movdqu STATE4, 0x20(STATEP)
 367        movdqu STATE0, 0x30(STATEP)
 368        movdqu STATE1, 0x40(STATEP)
 369        FRAME_END
 370        ret
 371
 372.Lad_out_4:
 373        movdqu STATE1, 0x00(STATEP)
 374        movdqu STATE2, 0x10(STATEP)
 375        movdqu STATE3, 0x20(STATEP)
 376        movdqu STATE4, 0x30(STATEP)
 377        movdqu STATE0, 0x40(STATEP)
 378        FRAME_END
 379        ret
 380
 381.Lad_out:
 382        FRAME_END
 383        ret
 384ENDPROC(crypto_aegis128_aesni_ad)
 385
 386.macro encrypt_block a s0 s1 s2 s3 s4 i
 387        movdq\a (\i * 0x10)(SRC), MSG
 388        movdqa MSG, T0
 389        pxor \s1, T0
 390        pxor \s4, T0
 391        movdqa \s2, T1
 392        pand \s3, T1
 393        pxor T1, T0
 394        movdq\a T0, (\i * 0x10)(DST)
 395
 396        aegis128_update
 397        pxor MSG, \s4
 398
 399        sub $0x10, LEN
 400        cmp $0x10, LEN
 401        jl .Lenc_out_\i
 402.endm
 403
 404/*
 405 * void crypto_aegis128_aesni_enc(void *state, unsigned int length,
 406 *                                const void *src, void *dst);
 407 */
 408ENTRY(crypto_aegis128_aesni_enc)
 409        FRAME_BEGIN
 410
 411        cmp $0x10, LEN
 412        jb .Lenc_out
 413
 414        /* load the state: */
 415        movdqu 0x00(STATEP), STATE0
 416        movdqu 0x10(STATEP), STATE1
 417        movdqu 0x20(STATEP), STATE2
 418        movdqu 0x30(STATEP), STATE3
 419        movdqu 0x40(STATEP), STATE4
 420
 421        mov  SRC,  %r8
 422        or   DST,  %r8
 423        and $0xF, %r8
 424        jnz .Lenc_u_loop
 425
 426.align 8
 427.Lenc_a_loop:
 428        encrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0
 429        encrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1
 430        encrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2
 431        encrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3
 432        encrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4
 433
 434        add $0x50, SRC
 435        add $0x50, DST
 436        jmp .Lenc_a_loop
 437
 438.align 8
 439.Lenc_u_loop:
 440        encrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0
 441        encrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1
 442        encrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2
 443        encrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3
 444        encrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4
 445
 446        add $0x50, SRC
 447        add $0x50, DST
 448        jmp .Lenc_u_loop
 449
 450        /* store the state: */
 451.Lenc_out_0:
 452        movdqu STATE4, 0x00(STATEP)
 453        movdqu STATE0, 0x10(STATEP)
 454        movdqu STATE1, 0x20(STATEP)
 455        movdqu STATE2, 0x30(STATEP)
 456        movdqu STATE3, 0x40(STATEP)
 457        FRAME_END
 458        ret
 459
 460.Lenc_out_1:
 461        movdqu STATE3, 0x00(STATEP)
 462        movdqu STATE4, 0x10(STATEP)
 463        movdqu STATE0, 0x20(STATEP)
 464        movdqu STATE1, 0x30(STATEP)
 465        movdqu STATE2, 0x40(STATEP)
 466        FRAME_END
 467        ret
 468
 469.Lenc_out_2:
 470        movdqu STATE2, 0x00(STATEP)
 471        movdqu STATE3, 0x10(STATEP)
 472        movdqu STATE4, 0x20(STATEP)
 473        movdqu STATE0, 0x30(STATEP)
 474        movdqu STATE1, 0x40(STATEP)
 475        FRAME_END
 476        ret
 477
 478.Lenc_out_3:
 479        movdqu STATE1, 0x00(STATEP)
 480        movdqu STATE2, 0x10(STATEP)
 481        movdqu STATE3, 0x20(STATEP)
 482        movdqu STATE4, 0x30(STATEP)
 483        movdqu STATE0, 0x40(STATEP)
 484        FRAME_END
 485        ret
 486
 487.Lenc_out_4:
 488        movdqu STATE0, 0x00(STATEP)
 489        movdqu STATE1, 0x10(STATEP)
 490        movdqu STATE2, 0x20(STATEP)
 491        movdqu STATE3, 0x30(STATEP)
 492        movdqu STATE4, 0x40(STATEP)
 493        FRAME_END
 494        ret
 495
 496.Lenc_out:
 497        FRAME_END
 498        ret
 499ENDPROC(crypto_aegis128_aesni_enc)
 500
 501/*
 502 * void crypto_aegis128_aesni_enc_tail(void *state, unsigned int length,
 503 *                                     const void *src, void *dst);
 504 */
 505ENTRY(crypto_aegis128_aesni_enc_tail)
 506        FRAME_BEGIN
 507
 508        /* load the state: */
 509        movdqu 0x00(STATEP), STATE0
 510        movdqu 0x10(STATEP), STATE1
 511        movdqu 0x20(STATEP), STATE2
 512        movdqu 0x30(STATEP), STATE3
 513        movdqu 0x40(STATEP), STATE4
 514
 515        /* encrypt message: */
 516        call __load_partial
 517
 518        movdqa MSG, T0
 519        pxor STATE1, T0
 520        pxor STATE4, T0
 521        movdqa STATE2, T1
 522        pand STATE3, T1
 523        pxor T1, T0
 524
 525        call __store_partial
 526
 527        aegis128_update
 528        pxor MSG, STATE4
 529
 530        /* store the state: */
 531        movdqu STATE4, 0x00(STATEP)
 532        movdqu STATE0, 0x10(STATEP)
 533        movdqu STATE1, 0x20(STATEP)
 534        movdqu STATE2, 0x30(STATEP)
 535        movdqu STATE3, 0x40(STATEP)
 536
 537        FRAME_END
 538        ret
 539ENDPROC(crypto_aegis128_aesni_enc_tail)
 540
 541.macro decrypt_block a s0 s1 s2 s3 s4 i
 542        movdq\a (\i * 0x10)(SRC), MSG
 543        pxor \s1, MSG
 544        pxor \s4, MSG
 545        movdqa \s2, T1
 546        pand \s3, T1
 547        pxor T1, MSG
 548        movdq\a MSG, (\i * 0x10)(DST)
 549
 550        aegis128_update
 551        pxor MSG, \s4
 552
 553        sub $0x10, LEN
 554        cmp $0x10, LEN
 555        jl .Ldec_out_\i
 556.endm
 557
 558/*
 559 * void crypto_aegis128_aesni_dec(void *state, unsigned int length,
 560 *                                const void *src, void *dst);
 561 */
 562ENTRY(crypto_aegis128_aesni_dec)
 563        FRAME_BEGIN
 564
 565        cmp $0x10, LEN
 566        jb .Ldec_out
 567
 568        /* load the state: */
 569        movdqu 0x00(STATEP), STATE0
 570        movdqu 0x10(STATEP), STATE1
 571        movdqu 0x20(STATEP), STATE2
 572        movdqu 0x30(STATEP), STATE3
 573        movdqu 0x40(STATEP), STATE4
 574
 575        mov  SRC, %r8
 576        or   DST, %r8
 577        and $0xF, %r8
 578        jnz .Ldec_u_loop
 579
 580.align 8
 581.Ldec_a_loop:
 582        decrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0
 583        decrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1
 584        decrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2
 585        decrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3
 586        decrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4
 587
 588        add $0x50, SRC
 589        add $0x50, DST
 590        jmp .Ldec_a_loop
 591
 592.align 8
 593.Ldec_u_loop:
 594        decrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0
 595        decrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1
 596        decrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2
 597        decrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3
 598        decrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4
 599
 600        add $0x50, SRC
 601        add $0x50, DST
 602        jmp .Ldec_u_loop
 603
 604        /* store the state: */
 605.Ldec_out_0:
 606        movdqu STATE4, 0x00(STATEP)
 607        movdqu STATE0, 0x10(STATEP)
 608        movdqu STATE1, 0x20(STATEP)
 609        movdqu STATE2, 0x30(STATEP)
 610        movdqu STATE3, 0x40(STATEP)
 611        FRAME_END
 612        ret
 613
 614.Ldec_out_1:
 615        movdqu STATE3, 0x00(STATEP)
 616        movdqu STATE4, 0x10(STATEP)
 617        movdqu STATE0, 0x20(STATEP)
 618        movdqu STATE1, 0x30(STATEP)
 619        movdqu STATE2, 0x40(STATEP)
 620        FRAME_END
 621        ret
 622
 623.Ldec_out_2:
 624        movdqu STATE2, 0x00(STATEP)
 625        movdqu STATE3, 0x10(STATEP)
 626        movdqu STATE4, 0x20(STATEP)
 627        movdqu STATE0, 0x30(STATEP)
 628        movdqu STATE1, 0x40(STATEP)
 629        FRAME_END
 630        ret
 631
 632.Ldec_out_3:
 633        movdqu STATE1, 0x00(STATEP)
 634        movdqu STATE2, 0x10(STATEP)
 635        movdqu STATE3, 0x20(STATEP)
 636        movdqu STATE4, 0x30(STATEP)
 637        movdqu STATE0, 0x40(STATEP)
 638        FRAME_END
 639        ret
 640
 641.Ldec_out_4:
 642        movdqu STATE0, 0x00(STATEP)
 643        movdqu STATE1, 0x10(STATEP)
 644        movdqu STATE2, 0x20(STATEP)
 645        movdqu STATE3, 0x30(STATEP)
 646        movdqu STATE4, 0x40(STATEP)
 647        FRAME_END
 648        ret
 649
 650.Ldec_out:
 651        FRAME_END
 652        ret
 653ENDPROC(crypto_aegis128_aesni_dec)
 654
 655/*
 656 * void crypto_aegis128_aesni_dec_tail(void *state, unsigned int length,
 657 *                                     const void *src, void *dst);
 658 */
 659ENTRY(crypto_aegis128_aesni_dec_tail)
 660        FRAME_BEGIN
 661
 662        /* load the state: */
 663        movdqu 0x00(STATEP), STATE0
 664        movdqu 0x10(STATEP), STATE1
 665        movdqu 0x20(STATEP), STATE2
 666        movdqu 0x30(STATEP), STATE3
 667        movdqu 0x40(STATEP), STATE4
 668
 669        /* decrypt message: */
 670        call __load_partial
 671
 672        pxor STATE1, MSG
 673        pxor STATE4, MSG
 674        movdqa STATE2, T1
 675        pand STATE3, T1
 676        pxor T1, MSG
 677
 678        movdqa MSG, T0
 679        call __store_partial
 680
 681        /* mask with byte count: */
 682        movq LEN, T0
 683        punpcklbw T0, T0
 684        punpcklbw T0, T0
 685        punpcklbw T0, T0
 686        punpcklbw T0, T0
 687        movdqa .Laegis128_counter, T1
 688        pcmpgtb T1, T0
 689        pand T0, MSG
 690
 691        aegis128_update
 692        pxor MSG, STATE4
 693
 694        /* store the state: */
 695        movdqu STATE4, 0x00(STATEP)
 696        movdqu STATE0, 0x10(STATEP)
 697        movdqu STATE1, 0x20(STATEP)
 698        movdqu STATE2, 0x30(STATEP)
 699        movdqu STATE3, 0x40(STATEP)
 700
 701        FRAME_END
 702        ret
 703ENDPROC(crypto_aegis128_aesni_dec_tail)
 704
 705/*
 706 * void crypto_aegis128_aesni_final(void *state, void *tag_xor,
 707 *                                  u64 assoclen, u64 cryptlen);
 708 */
 709ENTRY(crypto_aegis128_aesni_final)
 710        FRAME_BEGIN
 711
 712        /* load the state: */
 713        movdqu 0x00(STATEP), STATE0
 714        movdqu 0x10(STATEP), STATE1
 715        movdqu 0x20(STATEP), STATE2
 716        movdqu 0x30(STATEP), STATE3
 717        movdqu 0x40(STATEP), STATE4
 718
 719        /* prepare length block: */
 720        movq %rdx, MSG
 721        movq %rcx, T0
 722        pslldq $8, T0
 723        pxor T0, MSG
 724        psllq $3, MSG /* multiply by 8 (to get bit count) */
 725
 726        pxor STATE3, MSG
 727
 728        /* update state: */
 729        aegis128_update; pxor MSG, STATE4
 730        aegis128_update; pxor MSG, STATE3
 731        aegis128_update; pxor MSG, STATE2
 732        aegis128_update; pxor MSG, STATE1
 733        aegis128_update; pxor MSG, STATE0
 734        aegis128_update; pxor MSG, STATE4
 735        aegis128_update; pxor MSG, STATE3
 736
 737        /* xor tag: */
 738        movdqu (%rsi), MSG
 739
 740        pxor STATE0, MSG
 741        pxor STATE1, MSG
 742        pxor STATE2, MSG
 743        pxor STATE3, MSG
 744        pxor STATE4, MSG
 745
 746        movdqu MSG, (%rsi)
 747
 748        FRAME_END
 749        ret
 750ENDPROC(crypto_aegis128_aesni_final)
 751