linux/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S
<<
>>
Prefs
   1/*
   2 * Buffer submit code for multi buffer SHA1 algorithm
   3 *
   4 * This file is provided under a dual BSD/GPLv2 license.  When using or
   5 * redistributing this file, you may do so under either license.
   6 *
   7 * GPL LICENSE SUMMARY
   8 *
   9 *  Copyright(c) 2014 Intel Corporation.
  10 *
  11 *  This program is free software; you can redistribute it and/or modify
  12 *  it under the terms of version 2 of the GNU General Public License as
  13 *  published by the Free Software Foundation.
  14 *
  15 *  This program is distributed in the hope that it will be useful, but
  16 *  WITHOUT ANY WARRANTY; without even the implied warranty of
  17 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18 *  General Public License for more details.
  19 *
  20 *  Contact Information:
  21 *      James Guilford <james.guilford@intel.com>
  22 *      Tim Chen <tim.c.chen@linux.intel.com>
  23 *
  24 *  BSD LICENSE
  25 *
  26 *  Copyright(c) 2014 Intel Corporation.
  27 *
  28 *  Redistribution and use in source and binary forms, with or without
  29 *  modification, are permitted provided that the following conditions
  30 *  are met:
  31 *
  32 *    * Redistributions of source code must retain the above copyright
  33 *      notice, this list of conditions and the following disclaimer.
  34 *    * Redistributions in binary form must reproduce the above copyright
  35 *      notice, this list of conditions and the following disclaimer in
  36 *      the documentation and/or other materials provided with the
  37 *      distribution.
  38 *    * Neither the name of Intel Corporation nor the names of its
  39 *      contributors may be used to endorse or promote products derived
  40 *      from this software without specific prior written permission.
  41 *
  42 *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  43 *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  44 *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  45 *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  46 *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  47 *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  48 *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  49 *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  50 *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  51 *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  52 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  53 */
  54
  55#include <linux/linkage.h>
  56#include <asm/frame.h>
  57#include "sha1_mb_mgr_datastruct.S"
  58
  59
  60.extern sha1_x8_avx
  61
  62# LINUX register definitions
  63arg1    = %rdi
  64arg2    = %rsi
  65size_offset     = %rcx
  66tmp2            = %rcx
  67extra_blocks    = %rdx
  68
  69# Common definitions
  70#define state   arg1
  71#define job     %rsi
  72#define len2    arg2
  73#define p2      arg2
  74
  75# idx must be a register not clobberred by sha1_x8_avx2
  76idx             = %r8
  77DWORD_idx       = %r8d
  78last_len        = %r8
  79
  80p               = %r11
  81start_offset    = %r11
  82
  83unused_lanes    = %rbx
  84BYTE_unused_lanes = %bl
  85
  86job_rax         = %rax
  87len             = %rax
  88DWORD_len       = %eax
  89
  90lane            = %r12
  91tmp3            = %r12
  92
  93tmp             = %r9
  94DWORD_tmp       = %r9d
  95
  96lane_data       = %r10
  97
  98# JOB* submit_mb_mgr_submit_avx2(MB_MGR *state, job_sha1 *job)
  99# arg 1 : rcx : state
 100# arg 2 : rdx : job
 101ENTRY(sha1_mb_mgr_submit_avx2)
 102        FRAME_BEGIN
 103        push    %rbx
 104        push    %r12
 105
 106        mov     _unused_lanes(state), unused_lanes
 107        mov     unused_lanes, lane
 108        and     $0xF, lane
 109        shr     $4, unused_lanes
 110        imul    $_LANE_DATA_size, lane, lane_data
 111        movl    $STS_BEING_PROCESSED, _status(job)
 112        lea     _ldata(state, lane_data), lane_data
 113        mov     unused_lanes, _unused_lanes(state)
 114        movl    _len(job),  DWORD_len
 115
 116        mov     job, _job_in_lane(lane_data)
 117        shl     $4, len
 118        or      lane, len
 119
 120        movl    DWORD_len,  _lens(state , lane, 4)
 121
 122        # Load digest words from result_digest
 123        vmovdqu _result_digest(job), %xmm0
 124        mov     _result_digest+1*16(job), DWORD_tmp
 125        vmovd    %xmm0, _args_digest(state, lane, 4)
 126        vpextrd  $1, %xmm0, _args_digest+1*32(state , lane, 4)
 127        vpextrd  $2, %xmm0, _args_digest+2*32(state , lane, 4)
 128        vpextrd  $3, %xmm0, _args_digest+3*32(state , lane, 4)
 129        movl    DWORD_tmp, _args_digest+4*32(state , lane, 4)
 130
 131        mov     _buffer(job), p
 132        mov     p, _args_data_ptr(state, lane, 8)
 133
 134        cmp     $0xF, unused_lanes
 135        jne     return_null
 136
 137start_loop:
 138        # Find min length
 139        vmovdqa _lens(state), %xmm0
 140        vmovdqa _lens+1*16(state), %xmm1
 141
 142        vpminud %xmm1, %xmm0, %xmm2        # xmm2 has {D,C,B,A}
 143        vpalignr $8, %xmm2, %xmm3, %xmm3   # xmm3 has {x,x,D,C}
 144        vpminud %xmm3, %xmm2, %xmm2        # xmm2 has {x,x,E,F}
 145        vpalignr $4, %xmm2, %xmm3, %xmm3   # xmm3 has {x,x,x,E}
 146        vpminud %xmm3, %xmm2, %xmm2        # xmm2 has min value in low dword
 147
 148        vmovd   %xmm2, DWORD_idx
 149        mov    idx, len2
 150        and    $0xF, idx
 151        shr    $4, len2
 152        jz     len_is_0
 153
 154        vpand   clear_low_nibble(%rip), %xmm2, %xmm2
 155        vpshufd $0, %xmm2, %xmm2
 156
 157        vpsubd  %xmm2, %xmm0, %xmm0
 158        vpsubd  %xmm2, %xmm1, %xmm1
 159
 160        vmovdqa %xmm0, _lens + 0*16(state)
 161        vmovdqa %xmm1, _lens + 1*16(state)
 162
 163
 164        # "state" and "args" are the same address, arg1
 165        # len is arg2
 166        call    sha1_x8_avx2
 167
 168        # state and idx are intact
 169
 170len_is_0:
 171        # process completed job "idx"
 172        imul    $_LANE_DATA_size, idx, lane_data
 173        lea     _ldata(state, lane_data), lane_data
 174
 175        mov     _job_in_lane(lane_data), job_rax
 176        mov     _unused_lanes(state), unused_lanes
 177        movq    $0, _job_in_lane(lane_data)
 178        movl    $STS_COMPLETED, _status(job_rax)
 179        shl     $4, unused_lanes
 180        or      idx, unused_lanes
 181        mov     unused_lanes, _unused_lanes(state)
 182
 183        movl    $0xFFFFFFFF, _lens(state, idx, 4)
 184
 185        vmovd    _args_digest(state, idx, 4), %xmm0
 186        vpinsrd  $1, _args_digest+1*32(state , idx, 4), %xmm0, %xmm0
 187        vpinsrd  $2, _args_digest+2*32(state , idx, 4), %xmm0, %xmm0
 188        vpinsrd  $3, _args_digest+3*32(state , idx, 4), %xmm0, %xmm0
 189        movl     _args_digest+4*32(state, idx, 4), DWORD_tmp
 190
 191        vmovdqu  %xmm0, _result_digest(job_rax)
 192        movl    DWORD_tmp, _result_digest+1*16(job_rax)
 193
 194return:
 195        pop     %r12
 196        pop     %rbx
 197        FRAME_END
 198        ret
 199
 200return_null:
 201        xor     job_rax, job_rax
 202        jmp     return
 203
 204ENDPROC(sha1_mb_mgr_submit_avx2)
 205
 206.data
 207
 208.align 16
 209clear_low_nibble:
 210        .octa   0x000000000000000000000000FFFFFFF0
 211