linux/arch/x86/crypto/ghash-clmulni-intel_asm.S
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0-only */
   2/*
   3 * Accelerated GHASH implementation with Intel PCLMULQDQ-NI
   4 * instructions. This file contains accelerated part of ghash
   5 * implementation. More information about PCLMULQDQ can be found at:
   6 *
   7 * http://software.intel.com/en-us/articles/carry-less-multiplication-and-its-usage-for-computing-the-gcm-mode/
   8 *
   9 * Copyright (c) 2009 Intel Corp.
  10 *   Author: Huang Ying <ying.huang@intel.com>
  11 *           Vinodh Gopal
  12 *           Erdinc Ozturk
  13 *           Deniz Karakoyunlu
  14 */
  15
  16#include <linux/linkage.h>
  17#include <asm/inst.h>
  18#include <asm/frame.h>
  19
  20.section        .rodata.cst16.bswap_mask, "aM", @progbits, 16
  21.align 16
  22.Lbswap_mask:
  23        .octa 0x000102030405060708090a0b0c0d0e0f
  24
  25#define DATA    %xmm0
  26#define SHASH   %xmm1
  27#define T1      %xmm2
  28#define T2      %xmm3
  29#define T3      %xmm4
  30#define BSWAP   %xmm5
  31#define IN1     %xmm6
  32
  33.text
  34
  35/*
  36 * __clmul_gf128mul_ble:        internal ABI
  37 * input:
  38 *      DATA:                   operand1
  39 *      SHASH:                  operand2, hash_key << 1 mod poly
  40 * output:
  41 *      DATA:                   operand1 * operand2 mod poly
  42 * changed:
  43 *      T1
  44 *      T2
  45 *      T3
  46 */
  47SYM_FUNC_START_LOCAL(__clmul_gf128mul_ble)
  48        movaps DATA, T1
  49        pshufd $0b01001110, DATA, T2
  50        pshufd $0b01001110, SHASH, T3
  51        pxor DATA, T2
  52        pxor SHASH, T3
  53
  54        PCLMULQDQ 0x00 SHASH DATA       # DATA = a0 * b0
  55        PCLMULQDQ 0x11 SHASH T1         # T1 = a1 * b1
  56        PCLMULQDQ 0x00 T3 T2            # T2 = (a1 + a0) * (b1 + b0)
  57        pxor DATA, T2
  58        pxor T1, T2                     # T2 = a0 * b1 + a1 * b0
  59
  60        movaps T2, T3
  61        pslldq $8, T3
  62        psrldq $8, T2
  63        pxor T3, DATA
  64        pxor T2, T1                     # <T1:DATA> is result of
  65                                        # carry-less multiplication
  66
  67        # first phase of the reduction
  68        movaps DATA, T3
  69        psllq $1, T3
  70        pxor DATA, T3
  71        psllq $5, T3
  72        pxor DATA, T3
  73        psllq $57, T3
  74        movaps T3, T2
  75        pslldq $8, T2
  76        psrldq $8, T3
  77        pxor T2, DATA
  78        pxor T3, T1
  79
  80        # second phase of the reduction
  81        movaps DATA, T2
  82        psrlq $5, T2
  83        pxor DATA, T2
  84        psrlq $1, T2
  85        pxor DATA, T2
  86        psrlq $1, T2
  87        pxor T2, T1
  88        pxor T1, DATA
  89        ret
  90SYM_FUNC_END(__clmul_gf128mul_ble)
  91
  92/* void clmul_ghash_mul(char *dst, const u128 *shash) */
  93SYM_FUNC_START(clmul_ghash_mul)
  94        FRAME_BEGIN
  95        movups (%rdi), DATA
  96        movups (%rsi), SHASH
  97        movaps .Lbswap_mask, BSWAP
  98        PSHUFB_XMM BSWAP DATA
  99        call __clmul_gf128mul_ble
 100        PSHUFB_XMM BSWAP DATA
 101        movups DATA, (%rdi)
 102        FRAME_END
 103        ret
 104SYM_FUNC_END(clmul_ghash_mul)
 105
 106/*
 107 * void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
 108 *                         const u128 *shash);
 109 */
 110SYM_FUNC_START(clmul_ghash_update)
 111        FRAME_BEGIN
 112        cmp $16, %rdx
 113        jb .Lupdate_just_ret    # check length
 114        movaps .Lbswap_mask, BSWAP
 115        movups (%rdi), DATA
 116        movups (%rcx), SHASH
 117        PSHUFB_XMM BSWAP DATA
 118.align 4
 119.Lupdate_loop:
 120        movups (%rsi), IN1
 121        PSHUFB_XMM BSWAP IN1
 122        pxor IN1, DATA
 123        call __clmul_gf128mul_ble
 124        sub $16, %rdx
 125        add $16, %rsi
 126        cmp $16, %rdx
 127        jge .Lupdate_loop
 128        PSHUFB_XMM BSWAP DATA
 129        movups DATA, (%rdi)
 130.Lupdate_just_ret:
 131        FRAME_END
 132        ret
 133SYM_FUNC_END(clmul_ghash_update)
 134