linux/arch/x86/crypto/nh-sse2-x86_64.S
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0 */
   2/*
   3 * NH - ε-almost-universal hash function, x86_64 SSE2 accelerated
   4 *
   5 * Copyright 2018 Google LLC
   6 *
   7 * Author: Eric Biggers <ebiggers@google.com>
   8 */
   9
  10#include <linux/linkage.h>
  11
  12#define         PASS0_SUMS      %xmm0
  13#define         PASS1_SUMS      %xmm1
  14#define         PASS2_SUMS      %xmm2
  15#define         PASS3_SUMS      %xmm3
  16#define         K0              %xmm4
  17#define         K1              %xmm5
  18#define         K2              %xmm6
  19#define         K3              %xmm7
  20#define         T0              %xmm8
  21#define         T1              %xmm9
  22#define         T2              %xmm10
  23#define         T3              %xmm11
  24#define         T4              %xmm12
  25#define         T5              %xmm13
  26#define         T6              %xmm14
  27#define         T7              %xmm15
  28#define         KEY             %rdi
  29#define         MESSAGE         %rsi
  30#define         MESSAGE_LEN     %rdx
  31#define         HASH            %rcx
  32
  33.macro _nh_stride       k0, k1, k2, k3, offset
  34
  35        // Load next message stride
  36        movdqu          \offset(MESSAGE), T1
  37
  38        // Load next key stride
  39        movdqu          \offset(KEY), \k3
  40
  41        // Add message words to key words
  42        movdqa          T1, T2
  43        movdqa          T1, T3
  44        paddd           T1, \k0    // reuse k0 to avoid a move
  45        paddd           \k1, T1
  46        paddd           \k2, T2
  47        paddd           \k3, T3
  48
  49        // Multiply 32x32 => 64 and accumulate
  50        pshufd          $0x10, \k0, T4
  51        pshufd          $0x32, \k0, \k0
  52        pshufd          $0x10, T1, T5
  53        pshufd          $0x32, T1, T1
  54        pshufd          $0x10, T2, T6
  55        pshufd          $0x32, T2, T2
  56        pshufd          $0x10, T3, T7
  57        pshufd          $0x32, T3, T3
  58        pmuludq         T4, \k0
  59        pmuludq         T5, T1
  60        pmuludq         T6, T2
  61        pmuludq         T7, T3
  62        paddq           \k0, PASS0_SUMS
  63        paddq           T1, PASS1_SUMS
  64        paddq           T2, PASS2_SUMS
  65        paddq           T3, PASS3_SUMS
  66.endm
  67
  68/*
  69 * void nh_sse2(const u32 *key, const u8 *message, size_t message_len,
  70 *              u8 hash[NH_HASH_BYTES])
  71 *
  72 * It's guaranteed that message_len % 16 == 0.
  73 */
  74ENTRY(nh_sse2)
  75
  76        movdqu          0x00(KEY), K0
  77        movdqu          0x10(KEY), K1
  78        movdqu          0x20(KEY), K2
  79        add             $0x30, KEY
  80        pxor            PASS0_SUMS, PASS0_SUMS
  81        pxor            PASS1_SUMS, PASS1_SUMS
  82        pxor            PASS2_SUMS, PASS2_SUMS
  83        pxor            PASS3_SUMS, PASS3_SUMS
  84
  85        sub             $0x40, MESSAGE_LEN
  86        jl              .Lloop4_done
  87.Lloop4:
  88        _nh_stride      K0, K1, K2, K3, 0x00
  89        _nh_stride      K1, K2, K3, K0, 0x10
  90        _nh_stride      K2, K3, K0, K1, 0x20
  91        _nh_stride      K3, K0, K1, K2, 0x30
  92        add             $0x40, KEY
  93        add             $0x40, MESSAGE
  94        sub             $0x40, MESSAGE_LEN
  95        jge             .Lloop4
  96
  97.Lloop4_done:
  98        and             $0x3f, MESSAGE_LEN
  99        jz              .Ldone
 100        _nh_stride      K0, K1, K2, K3, 0x00
 101
 102        sub             $0x10, MESSAGE_LEN
 103        jz              .Ldone
 104        _nh_stride      K1, K2, K3, K0, 0x10
 105
 106        sub             $0x10, MESSAGE_LEN
 107        jz              .Ldone
 108        _nh_stride      K2, K3, K0, K1, 0x20
 109
 110.Ldone:
 111        // Sum the accumulators for each pass, then store the sums to 'hash'
 112        movdqa          PASS0_SUMS, T0
 113        movdqa          PASS2_SUMS, T1
 114        punpcklqdq      PASS1_SUMS, T0          // => (PASS0_SUM_A PASS1_SUM_A)
 115        punpcklqdq      PASS3_SUMS, T1          // => (PASS2_SUM_A PASS3_SUM_A)
 116        punpckhqdq      PASS1_SUMS, PASS0_SUMS  // => (PASS0_SUM_B PASS1_SUM_B)
 117        punpckhqdq      PASS3_SUMS, PASS2_SUMS  // => (PASS2_SUM_B PASS3_SUM_B)
 118        paddq           PASS0_SUMS, T0
 119        paddq           PASS2_SUMS, T1
 120        movdqu          T0, 0x00(HASH)
 121        movdqu          T1, 0x10(HASH)
 122        ret
 123ENDPROC(nh_sse2)
 124