qemu/target/arm/tcg/crypto_helper.c
<<
>>
Prefs
   1/*
   2 * crypto_helper.c - emulate v8 Crypto Extensions instructions
   3 *
   4 * Copyright (C) 2013 - 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2.1 of the License, or (at your option) any later version.
  10 */
  11
  12#include "qemu/osdep.h"
  13
  14#include "cpu.h"
  15#include "exec/helper-proto.h"
  16#include "tcg/tcg-gvec-desc.h"
  17#include "crypto/aes.h"
  18#include "crypto/sm4.h"
  19#include "vec_internal.h"
  20
  21union CRYPTO_STATE {
  22    uint8_t    bytes[16];
  23    uint32_t   words[4];
  24    uint64_t   l[2];
  25};
  26
  27#if HOST_BIG_ENDIAN
  28#define CR_ST_BYTE(state, i)   ((state).bytes[(15 - (i)) ^ 8])
  29#define CR_ST_WORD(state, i)   ((state).words[(3 - (i)) ^ 2])
  30#else
  31#define CR_ST_BYTE(state, i)   ((state).bytes[i])
  32#define CR_ST_WORD(state, i)   ((state).words[i])
  33#endif
  34
  35/*
  36 * The caller has not been converted to full gvec, and so only
  37 * modifies the low 16 bytes of the vector register.
  38 */
  39static void clear_tail_16(void *vd, uint32_t desc)
  40{
  41    int opr_sz = simd_oprsz(desc);
  42    int max_sz = simd_maxsz(desc);
  43
  44    assert(opr_sz == 16);
  45    clear_tail(vd, opr_sz, max_sz);
  46}
  47
  48static void do_crypto_aese(uint64_t *rd, uint64_t *rn,
  49                           uint64_t *rm, bool decrypt)
  50{
  51    static uint8_t const * const sbox[2] = { AES_sbox, AES_isbox };
  52    static uint8_t const * const shift[2] = { AES_shifts, AES_ishifts };
  53    union CRYPTO_STATE rk = { .l = { rm[0], rm[1] } };
  54    union CRYPTO_STATE st = { .l = { rn[0], rn[1] } };
  55    int i;
  56
  57    /* xor state vector with round key */
  58    rk.l[0] ^= st.l[0];
  59    rk.l[1] ^= st.l[1];
  60
  61    /* combine ShiftRows operation and sbox substitution */
  62    for (i = 0; i < 16; i++) {
  63        CR_ST_BYTE(st, i) = sbox[decrypt][CR_ST_BYTE(rk, shift[decrypt][i])];
  64    }
  65
  66    rd[0] = st.l[0];
  67    rd[1] = st.l[1];
  68}
  69
  70void HELPER(crypto_aese)(void *vd, void *vn, void *vm, uint32_t desc)
  71{
  72    intptr_t i, opr_sz = simd_oprsz(desc);
  73    bool decrypt = simd_data(desc);
  74
  75    for (i = 0; i < opr_sz; i += 16) {
  76        do_crypto_aese(vd + i, vn + i, vm + i, decrypt);
  77    }
  78    clear_tail(vd, opr_sz, simd_maxsz(desc));
  79}
  80
  81static void do_crypto_aesmc(uint64_t *rd, uint64_t *rm, bool decrypt)
  82{
  83    static uint32_t const mc[][256] = { {
  84        /* MixColumns lookup table */
  85        0x00000000, 0x03010102, 0x06020204, 0x05030306,
  86        0x0c040408, 0x0f05050a, 0x0a06060c, 0x0907070e,
  87        0x18080810, 0x1b090912, 0x1e0a0a14, 0x1d0b0b16,
  88        0x140c0c18, 0x170d0d1a, 0x120e0e1c, 0x110f0f1e,
  89        0x30101020, 0x33111122, 0x36121224, 0x35131326,
  90        0x3c141428, 0x3f15152a, 0x3a16162c, 0x3917172e,
  91        0x28181830, 0x2b191932, 0x2e1a1a34, 0x2d1b1b36,
  92        0x241c1c38, 0x271d1d3a, 0x221e1e3c, 0x211f1f3e,
  93        0x60202040, 0x63212142, 0x66222244, 0x65232346,
  94        0x6c242448, 0x6f25254a, 0x6a26264c, 0x6927274e,
  95        0x78282850, 0x7b292952, 0x7e2a2a54, 0x7d2b2b56,
  96        0x742c2c58, 0x772d2d5a, 0x722e2e5c, 0x712f2f5e,
  97        0x50303060, 0x53313162, 0x56323264, 0x55333366,
  98        0x5c343468, 0x5f35356a, 0x5a36366c, 0x5937376e,
  99        0x48383870, 0x4b393972, 0x4e3a3a74, 0x4d3b3b76,
 100        0x443c3c78, 0x473d3d7a, 0x423e3e7c, 0x413f3f7e,
 101        0xc0404080, 0xc3414182, 0xc6424284, 0xc5434386,
 102        0xcc444488, 0xcf45458a, 0xca46468c, 0xc947478e,
 103        0xd8484890, 0xdb494992, 0xde4a4a94, 0xdd4b4b96,
 104        0xd44c4c98, 0xd74d4d9a, 0xd24e4e9c, 0xd14f4f9e,
 105        0xf05050a0, 0xf35151a2, 0xf65252a4, 0xf55353a6,
 106        0xfc5454a8, 0xff5555aa, 0xfa5656ac, 0xf95757ae,
 107        0xe85858b0, 0xeb5959b2, 0xee5a5ab4, 0xed5b5bb6,
 108        0xe45c5cb8, 0xe75d5dba, 0xe25e5ebc, 0xe15f5fbe,
 109        0xa06060c0, 0xa36161c2, 0xa66262c4, 0xa56363c6,
 110        0xac6464c8, 0xaf6565ca, 0xaa6666cc, 0xa96767ce,
 111        0xb86868d0, 0xbb6969d2, 0xbe6a6ad4, 0xbd6b6bd6,
 112        0xb46c6cd8, 0xb76d6dda, 0xb26e6edc, 0xb16f6fde,
 113        0x907070e0, 0x937171e2, 0x967272e4, 0x957373e6,
 114        0x9c7474e8, 0x9f7575ea, 0x9a7676ec, 0x997777ee,
 115        0x887878f0, 0x8b7979f2, 0x8e7a7af4, 0x8d7b7bf6,
 116        0x847c7cf8, 0x877d7dfa, 0x827e7efc, 0x817f7ffe,
 117        0x9b80801b, 0x98818119, 0x9d82821f, 0x9e83831d,
 118        0x97848413, 0x94858511, 0x91868617, 0x92878715,
 119        0x8388880b, 0x80898909, 0x858a8a0f, 0x868b8b0d,
 120        0x8f8c8c03, 0x8c8d8d01, 0x898e8e07, 0x8a8f8f05,
 121        0xab90903b, 0xa8919139, 0xad92923f, 0xae93933d,
 122        0xa7949433, 0xa4959531, 0xa1969637, 0xa2979735,
 123        0xb398982b, 0xb0999929, 0xb59a9a2f, 0xb69b9b2d,
 124        0xbf9c9c23, 0xbc9d9d21, 0xb99e9e27, 0xba9f9f25,
 125        0xfba0a05b, 0xf8a1a159, 0xfda2a25f, 0xfea3a35d,
 126        0xf7a4a453, 0xf4a5a551, 0xf1a6a657, 0xf2a7a755,
 127        0xe3a8a84b, 0xe0a9a949, 0xe5aaaa4f, 0xe6abab4d,
 128        0xefacac43, 0xecadad41, 0xe9aeae47, 0xeaafaf45,
 129        0xcbb0b07b, 0xc8b1b179, 0xcdb2b27f, 0xceb3b37d,
 130        0xc7b4b473, 0xc4b5b571, 0xc1b6b677, 0xc2b7b775,
 131        0xd3b8b86b, 0xd0b9b969, 0xd5baba6f, 0xd6bbbb6d,
 132        0xdfbcbc63, 0xdcbdbd61, 0xd9bebe67, 0xdabfbf65,
 133        0x5bc0c09b, 0x58c1c199, 0x5dc2c29f, 0x5ec3c39d,
 134        0x57c4c493, 0x54c5c591, 0x51c6c697, 0x52c7c795,
 135        0x43c8c88b, 0x40c9c989, 0x45caca8f, 0x46cbcb8d,
 136        0x4fcccc83, 0x4ccdcd81, 0x49cece87, 0x4acfcf85,
 137        0x6bd0d0bb, 0x68d1d1b9, 0x6dd2d2bf, 0x6ed3d3bd,
 138        0x67d4d4b3, 0x64d5d5b1, 0x61d6d6b7, 0x62d7d7b5,
 139        0x73d8d8ab, 0x70d9d9a9, 0x75dadaaf, 0x76dbdbad,
 140        0x7fdcdca3, 0x7cdddda1, 0x79dedea7, 0x7adfdfa5,
 141        0x3be0e0db, 0x38e1e1d9, 0x3de2e2df, 0x3ee3e3dd,
 142        0x37e4e4d3, 0x34e5e5d1, 0x31e6e6d7, 0x32e7e7d5,
 143        0x23e8e8cb, 0x20e9e9c9, 0x25eaeacf, 0x26ebebcd,
 144        0x2fececc3, 0x2cededc1, 0x29eeeec7, 0x2aefefc5,
 145        0x0bf0f0fb, 0x08f1f1f9, 0x0df2f2ff, 0x0ef3f3fd,
 146        0x07f4f4f3, 0x04f5f5f1, 0x01f6f6f7, 0x02f7f7f5,
 147        0x13f8f8eb, 0x10f9f9e9, 0x15fafaef, 0x16fbfbed,
 148        0x1ffcfce3, 0x1cfdfde1, 0x19fefee7, 0x1affffe5,
 149    }, {
 150        /* Inverse MixColumns lookup table */
 151        0x00000000, 0x0b0d090e, 0x161a121c, 0x1d171b12,
 152        0x2c342438, 0x27392d36, 0x3a2e3624, 0x31233f2a,
 153        0x58684870, 0x5365417e, 0x4e725a6c, 0x457f5362,
 154        0x745c6c48, 0x7f516546, 0x62467e54, 0x694b775a,
 155        0xb0d090e0, 0xbbdd99ee, 0xa6ca82fc, 0xadc78bf2,
 156        0x9ce4b4d8, 0x97e9bdd6, 0x8afea6c4, 0x81f3afca,
 157        0xe8b8d890, 0xe3b5d19e, 0xfea2ca8c, 0xf5afc382,
 158        0xc48cfca8, 0xcf81f5a6, 0xd296eeb4, 0xd99be7ba,
 159        0x7bbb3bdb, 0x70b632d5, 0x6da129c7, 0x66ac20c9,
 160        0x578f1fe3, 0x5c8216ed, 0x41950dff, 0x4a9804f1,
 161        0x23d373ab, 0x28de7aa5, 0x35c961b7, 0x3ec468b9,
 162        0x0fe75793, 0x04ea5e9d, 0x19fd458f, 0x12f04c81,
 163        0xcb6bab3b, 0xc066a235, 0xdd71b927, 0xd67cb029,
 164        0xe75f8f03, 0xec52860d, 0xf1459d1f, 0xfa489411,
 165        0x9303e34b, 0x980eea45, 0x8519f157, 0x8e14f859,
 166        0xbf37c773, 0xb43ace7d, 0xa92dd56f, 0xa220dc61,
 167        0xf66d76ad, 0xfd607fa3, 0xe07764b1, 0xeb7a6dbf,
 168        0xda595295, 0xd1545b9b, 0xcc434089, 0xc74e4987,
 169        0xae053edd, 0xa50837d3, 0xb81f2cc1, 0xb31225cf,
 170        0x82311ae5, 0x893c13eb, 0x942b08f9, 0x9f2601f7,
 171        0x46bde64d, 0x4db0ef43, 0x50a7f451, 0x5baafd5f,
 172        0x6a89c275, 0x6184cb7b, 0x7c93d069, 0x779ed967,
 173        0x1ed5ae3d, 0x15d8a733, 0x08cfbc21, 0x03c2b52f,
 174        0x32e18a05, 0x39ec830b, 0x24fb9819, 0x2ff69117,
 175        0x8dd64d76, 0x86db4478, 0x9bcc5f6a, 0x90c15664,
 176        0xa1e2694e, 0xaaef6040, 0xb7f87b52, 0xbcf5725c,
 177        0xd5be0506, 0xdeb30c08, 0xc3a4171a, 0xc8a91e14,
 178        0xf98a213e, 0xf2872830, 0xef903322, 0xe49d3a2c,
 179        0x3d06dd96, 0x360bd498, 0x2b1ccf8a, 0x2011c684,
 180        0x1132f9ae, 0x1a3ff0a0, 0x0728ebb2, 0x0c25e2bc,
 181        0x656e95e6, 0x6e639ce8, 0x737487fa, 0x78798ef4,
 182        0x495ab1de, 0x4257b8d0, 0x5f40a3c2, 0x544daacc,
 183        0xf7daec41, 0xfcd7e54f, 0xe1c0fe5d, 0xeacdf753,
 184        0xdbeec879, 0xd0e3c177, 0xcdf4da65, 0xc6f9d36b,
 185        0xafb2a431, 0xa4bfad3f, 0xb9a8b62d, 0xb2a5bf23,
 186        0x83868009, 0x888b8907, 0x959c9215, 0x9e919b1b,
 187        0x470a7ca1, 0x4c0775af, 0x51106ebd, 0x5a1d67b3,
 188        0x6b3e5899, 0x60335197, 0x7d244a85, 0x7629438b,
 189        0x1f6234d1, 0x146f3ddf, 0x097826cd, 0x02752fc3,
 190        0x335610e9, 0x385b19e7, 0x254c02f5, 0x2e410bfb,
 191        0x8c61d79a, 0x876cde94, 0x9a7bc586, 0x9176cc88,
 192        0xa055f3a2, 0xab58faac, 0xb64fe1be, 0xbd42e8b0,
 193        0xd4099fea, 0xdf0496e4, 0xc2138df6, 0xc91e84f8,
 194        0xf83dbbd2, 0xf330b2dc, 0xee27a9ce, 0xe52aa0c0,
 195        0x3cb1477a, 0x37bc4e74, 0x2aab5566, 0x21a65c68,
 196        0x10856342, 0x1b886a4c, 0x069f715e, 0x0d927850,
 197        0x64d90f0a, 0x6fd40604, 0x72c31d16, 0x79ce1418,
 198        0x48ed2b32, 0x43e0223c, 0x5ef7392e, 0x55fa3020,
 199        0x01b79aec, 0x0aba93e2, 0x17ad88f0, 0x1ca081fe,
 200        0x2d83bed4, 0x268eb7da, 0x3b99acc8, 0x3094a5c6,
 201        0x59dfd29c, 0x52d2db92, 0x4fc5c080, 0x44c8c98e,
 202        0x75ebf6a4, 0x7ee6ffaa, 0x63f1e4b8, 0x68fcedb6,
 203        0xb1670a0c, 0xba6a0302, 0xa77d1810, 0xac70111e,
 204        0x9d532e34, 0x965e273a, 0x8b493c28, 0x80443526,
 205        0xe90f427c, 0xe2024b72, 0xff155060, 0xf418596e,
 206        0xc53b6644, 0xce366f4a, 0xd3217458, 0xd82c7d56,
 207        0x7a0ca137, 0x7101a839, 0x6c16b32b, 0x671bba25,
 208        0x5638850f, 0x5d358c01, 0x40229713, 0x4b2f9e1d,
 209        0x2264e947, 0x2969e049, 0x347efb5b, 0x3f73f255,
 210        0x0e50cd7f, 0x055dc471, 0x184adf63, 0x1347d66d,
 211        0xcadc31d7, 0xc1d138d9, 0xdcc623cb, 0xd7cb2ac5,
 212        0xe6e815ef, 0xede51ce1, 0xf0f207f3, 0xfbff0efd,
 213        0x92b479a7, 0x99b970a9, 0x84ae6bbb, 0x8fa362b5,
 214        0xbe805d9f, 0xb58d5491, 0xa89a4f83, 0xa397468d,
 215    } };
 216
 217    union CRYPTO_STATE st = { .l = { rm[0], rm[1] } };
 218    int i;
 219
 220    for (i = 0; i < 16; i += 4) {
 221        CR_ST_WORD(st, i >> 2) =
 222            mc[decrypt][CR_ST_BYTE(st, i)] ^
 223            rol32(mc[decrypt][CR_ST_BYTE(st, i + 1)], 8) ^
 224            rol32(mc[decrypt][CR_ST_BYTE(st, i + 2)], 16) ^
 225            rol32(mc[decrypt][CR_ST_BYTE(st, i + 3)], 24);
 226    }
 227
 228    rd[0] = st.l[0];
 229    rd[1] = st.l[1];
 230}
 231
 232void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t desc)
 233{
 234    intptr_t i, opr_sz = simd_oprsz(desc);
 235    bool decrypt = simd_data(desc);
 236
 237    for (i = 0; i < opr_sz; i += 16) {
 238        do_crypto_aesmc(vd + i, vm + i, decrypt);
 239    }
 240    clear_tail(vd, opr_sz, simd_maxsz(desc));
 241}
 242
 243/*
 244 * SHA-1 logical functions
 245 */
 246
 247static uint32_t cho(uint32_t x, uint32_t y, uint32_t z)
 248{
 249    return (x & (y ^ z)) ^ z;
 250}
 251
 252static uint32_t par(uint32_t x, uint32_t y, uint32_t z)
 253{
 254    return x ^ y ^ z;
 255}
 256
 257static uint32_t maj(uint32_t x, uint32_t y, uint32_t z)
 258{
 259    return (x & y) | ((x | y) & z);
 260}
 261
 262void HELPER(crypto_sha1su0)(void *vd, void *vn, void *vm, uint32_t desc)
 263{
 264    uint64_t *d = vd, *n = vn, *m = vm;
 265    uint64_t d0, d1;
 266
 267    d0 = d[1] ^ d[0] ^ m[0];
 268    d1 = n[0] ^ d[1] ^ m[1];
 269    d[0] = d0;
 270    d[1] = d1;
 271
 272    clear_tail_16(vd, desc);
 273}
 274
 275static inline void crypto_sha1_3reg(uint64_t *rd, uint64_t *rn,
 276                                    uint64_t *rm, uint32_t desc,
 277                                    uint32_t (*fn)(union CRYPTO_STATE *d))
 278{
 279    union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
 280    union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
 281    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
 282    int i;
 283
 284    for (i = 0; i < 4; i++) {
 285        uint32_t t = fn(&d);
 286
 287        t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0)
 288             + CR_ST_WORD(m, i);
 289
 290        CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3);
 291        CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
 292        CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2);
 293        CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
 294        CR_ST_WORD(d, 0) = t;
 295    }
 296    rd[0] = d.l[0];
 297    rd[1] = d.l[1];
 298
 299    clear_tail_16(rd, desc);
 300}
 301
 302static uint32_t do_sha1c(union CRYPTO_STATE *d)
 303{
 304    return cho(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
 305}
 306
 307void HELPER(crypto_sha1c)(void *vd, void *vn, void *vm, uint32_t desc)
 308{
 309    crypto_sha1_3reg(vd, vn, vm, desc, do_sha1c);
 310}
 311
 312static uint32_t do_sha1p(union CRYPTO_STATE *d)
 313{
 314    return par(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
 315}
 316
 317void HELPER(crypto_sha1p)(void *vd, void *vn, void *vm, uint32_t desc)
 318{
 319    crypto_sha1_3reg(vd, vn, vm, desc, do_sha1p);
 320}
 321
 322static uint32_t do_sha1m(union CRYPTO_STATE *d)
 323{
 324    return maj(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
 325}
 326
 327void HELPER(crypto_sha1m)(void *vd, void *vn, void *vm, uint32_t desc)
 328{
 329    crypto_sha1_3reg(vd, vn, vm, desc, do_sha1m);
 330}
 331
 332void HELPER(crypto_sha1h)(void *vd, void *vm, uint32_t desc)
 333{
 334    uint64_t *rd = vd;
 335    uint64_t *rm = vm;
 336    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
 337
 338    CR_ST_WORD(m, 0) = ror32(CR_ST_WORD(m, 0), 2);
 339    CR_ST_WORD(m, 1) = CR_ST_WORD(m, 2) = CR_ST_WORD(m, 3) = 0;
 340
 341    rd[0] = m.l[0];
 342    rd[1] = m.l[1];
 343
 344    clear_tail_16(vd, desc);
 345}
 346
 347void HELPER(crypto_sha1su1)(void *vd, void *vm, uint32_t desc)
 348{
 349    uint64_t *rd = vd;
 350    uint64_t *rm = vm;
 351    union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
 352    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
 353
 354    CR_ST_WORD(d, 0) = rol32(CR_ST_WORD(d, 0) ^ CR_ST_WORD(m, 1), 1);
 355    CR_ST_WORD(d, 1) = rol32(CR_ST_WORD(d, 1) ^ CR_ST_WORD(m, 2), 1);
 356    CR_ST_WORD(d, 2) = rol32(CR_ST_WORD(d, 2) ^ CR_ST_WORD(m, 3), 1);
 357    CR_ST_WORD(d, 3) = rol32(CR_ST_WORD(d, 3) ^ CR_ST_WORD(d, 0), 1);
 358
 359    rd[0] = d.l[0];
 360    rd[1] = d.l[1];
 361
 362    clear_tail_16(vd, desc);
 363}
 364
 365/*
 366 * The SHA-256 logical functions, according to
 367 * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf
 368 */
 369
 370static uint32_t S0(uint32_t x)
 371{
 372    return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22);
 373}
 374
 375static uint32_t S1(uint32_t x)
 376{
 377    return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25);
 378}
 379
 380static uint32_t s0(uint32_t x)
 381{
 382    return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3);
 383}
 384
 385static uint32_t s1(uint32_t x)
 386{
 387    return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10);
 388}
 389
 390void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm, uint32_t desc)
 391{
 392    uint64_t *rd = vd;
 393    uint64_t *rn = vn;
 394    uint64_t *rm = vm;
 395    union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
 396    union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
 397    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
 398    int i;
 399
 400    for (i = 0; i < 4; i++) {
 401        uint32_t t = cho(CR_ST_WORD(n, 0), CR_ST_WORD(n, 1), CR_ST_WORD(n, 2))
 402                     + CR_ST_WORD(n, 3) + S1(CR_ST_WORD(n, 0))
 403                     + CR_ST_WORD(m, i);
 404
 405        CR_ST_WORD(n, 3) = CR_ST_WORD(n, 2);
 406        CR_ST_WORD(n, 2) = CR_ST_WORD(n, 1);
 407        CR_ST_WORD(n, 1) = CR_ST_WORD(n, 0);
 408        CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3) + t;
 409
 410        t += maj(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2))
 411             + S0(CR_ST_WORD(d, 0));
 412
 413        CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
 414        CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1);
 415        CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
 416        CR_ST_WORD(d, 0) = t;
 417    }
 418
 419    rd[0] = d.l[0];
 420    rd[1] = d.l[1];
 421
 422    clear_tail_16(vd, desc);
 423}
 424
 425void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm, uint32_t desc)
 426{
 427    uint64_t *rd = vd;
 428    uint64_t *rn = vn;
 429    uint64_t *rm = vm;
 430    union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
 431    union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
 432    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
 433    int i;
 434
 435    for (i = 0; i < 4; i++) {
 436        uint32_t t = cho(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2))
 437                     + CR_ST_WORD(d, 3) + S1(CR_ST_WORD(d, 0))
 438                     + CR_ST_WORD(m, i);
 439
 440        CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
 441        CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1);
 442        CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
 443        CR_ST_WORD(d, 0) = CR_ST_WORD(n, 3 - i) + t;
 444    }
 445
 446    rd[0] = d.l[0];
 447    rd[1] = d.l[1];
 448
 449    clear_tail_16(vd, desc);
 450}
 451
 452void HELPER(crypto_sha256su0)(void *vd, void *vm, uint32_t desc)
 453{
 454    uint64_t *rd = vd;
 455    uint64_t *rm = vm;
 456    union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
 457    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
 458
 459    CR_ST_WORD(d, 0) += s0(CR_ST_WORD(d, 1));
 460    CR_ST_WORD(d, 1) += s0(CR_ST_WORD(d, 2));
 461    CR_ST_WORD(d, 2) += s0(CR_ST_WORD(d, 3));
 462    CR_ST_WORD(d, 3) += s0(CR_ST_WORD(m, 0));
 463
 464    rd[0] = d.l[0];
 465    rd[1] = d.l[1];
 466
 467    clear_tail_16(vd, desc);
 468}
 469
 470void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm, uint32_t desc)
 471{
 472    uint64_t *rd = vd;
 473    uint64_t *rn = vn;
 474    uint64_t *rm = vm;
 475    union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
 476    union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
 477    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
 478
 479    CR_ST_WORD(d, 0) += s1(CR_ST_WORD(m, 2)) + CR_ST_WORD(n, 1);
 480    CR_ST_WORD(d, 1) += s1(CR_ST_WORD(m, 3)) + CR_ST_WORD(n, 2);
 481    CR_ST_WORD(d, 2) += s1(CR_ST_WORD(d, 0)) + CR_ST_WORD(n, 3);
 482    CR_ST_WORD(d, 3) += s1(CR_ST_WORD(d, 1)) + CR_ST_WORD(m, 0);
 483
 484    rd[0] = d.l[0];
 485    rd[1] = d.l[1];
 486
 487    clear_tail_16(vd, desc);
 488}
 489
 490/*
 491 * The SHA-512 logical functions (same as above but using 64-bit operands)
 492 */
 493
 494static uint64_t cho512(uint64_t x, uint64_t y, uint64_t z)
 495{
 496    return (x & (y ^ z)) ^ z;
 497}
 498
 499static uint64_t maj512(uint64_t x, uint64_t y, uint64_t z)
 500{
 501    return (x & y) | ((x | y) & z);
 502}
 503
 504static uint64_t S0_512(uint64_t x)
 505{
 506    return ror64(x, 28) ^ ror64(x, 34) ^ ror64(x, 39);
 507}
 508
 509static uint64_t S1_512(uint64_t x)
 510{
 511    return ror64(x, 14) ^ ror64(x, 18) ^ ror64(x, 41);
 512}
 513
 514static uint64_t s0_512(uint64_t x)
 515{
 516    return ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7);
 517}
 518
 519static uint64_t s1_512(uint64_t x)
 520{
 521    return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6);
 522}
 523
 524void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm, uint32_t desc)
 525{
 526    uint64_t *rd = vd;
 527    uint64_t *rn = vn;
 528    uint64_t *rm = vm;
 529    uint64_t d0 = rd[0];
 530    uint64_t d1 = rd[1];
 531
 532    d1 += S1_512(rm[1]) + cho512(rm[1], rn[0], rn[1]);
 533    d0 += S1_512(d1 + rm[0]) + cho512(d1 + rm[0], rm[1], rn[0]);
 534
 535    rd[0] = d0;
 536    rd[1] = d1;
 537
 538    clear_tail_16(vd, desc);
 539}
 540
 541void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm, uint32_t desc)
 542{
 543    uint64_t *rd = vd;
 544    uint64_t *rn = vn;
 545    uint64_t *rm = vm;
 546    uint64_t d0 = rd[0];
 547    uint64_t d1 = rd[1];
 548
 549    d1 += S0_512(rm[0]) + maj512(rn[0], rm[1], rm[0]);
 550    d0 += S0_512(d1) + maj512(d1, rm[0], rm[1]);
 551
 552    rd[0] = d0;
 553    rd[1] = d1;
 554
 555    clear_tail_16(vd, desc);
 556}
 557
 558void HELPER(crypto_sha512su0)(void *vd, void *vn, uint32_t desc)
 559{
 560    uint64_t *rd = vd;
 561    uint64_t *rn = vn;
 562    uint64_t d0 = rd[0];
 563    uint64_t d1 = rd[1];
 564
 565    d0 += s0_512(rd[1]);
 566    d1 += s0_512(rn[0]);
 567
 568    rd[0] = d0;
 569    rd[1] = d1;
 570
 571    clear_tail_16(vd, desc);
 572}
 573
 574void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm, uint32_t desc)
 575{
 576    uint64_t *rd = vd;
 577    uint64_t *rn = vn;
 578    uint64_t *rm = vm;
 579
 580    rd[0] += s1_512(rn[0]) + rm[0];
 581    rd[1] += s1_512(rn[1]) + rm[1];
 582
 583    clear_tail_16(vd, desc);
 584}
 585
 586void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm, uint32_t desc)
 587{
 588    uint64_t *rd = vd;
 589    uint64_t *rn = vn;
 590    uint64_t *rm = vm;
 591    union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
 592    union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
 593    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
 594    uint32_t t;
 595
 596    t = CR_ST_WORD(d, 0) ^ CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 1), 17);
 597    CR_ST_WORD(d, 0) = t ^ ror32(t, 17) ^ ror32(t, 9);
 598
 599    t = CR_ST_WORD(d, 1) ^ CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 2), 17);
 600    CR_ST_WORD(d, 1) = t ^ ror32(t, 17) ^ ror32(t, 9);
 601
 602    t = CR_ST_WORD(d, 2) ^ CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 3), 17);
 603    CR_ST_WORD(d, 2) = t ^ ror32(t, 17) ^ ror32(t, 9);
 604
 605    t = CR_ST_WORD(d, 3) ^ CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 0), 17);
 606    CR_ST_WORD(d, 3) = t ^ ror32(t, 17) ^ ror32(t, 9);
 607
 608    rd[0] = d.l[0];
 609    rd[1] = d.l[1];
 610
 611    clear_tail_16(vd, desc);
 612}
 613
 614void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm, uint32_t desc)
 615{
 616    uint64_t *rd = vd;
 617    uint64_t *rn = vn;
 618    uint64_t *rm = vm;
 619    union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
 620    union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
 621    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
 622    uint32_t t = CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 0), 25);
 623
 624    CR_ST_WORD(d, 0) ^= t;
 625    CR_ST_WORD(d, 1) ^= CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 1), 25);
 626    CR_ST_WORD(d, 2) ^= CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 2), 25);
 627    CR_ST_WORD(d, 3) ^= CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(m, 3), 25) ^
 628                        ror32(t, 17) ^ ror32(t, 2) ^ ror32(t, 26);
 629
 630    rd[0] = d.l[0];
 631    rd[1] = d.l[1];
 632
 633    clear_tail_16(vd, desc);
 634}
 635
 636static inline void QEMU_ALWAYS_INLINE
 637crypto_sm3tt(uint64_t *rd, uint64_t *rn, uint64_t *rm,
 638             uint32_t desc, uint32_t opcode)
 639{
 640    union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
 641    union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
 642    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
 643    uint32_t imm2 = simd_data(desc);
 644    uint32_t t;
 645
 646    assert(imm2 < 4);
 647
 648    if (opcode == 0 || opcode == 2) {
 649        /* SM3TT1A, SM3TT2A */
 650        t = par(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
 651    } else if (opcode == 1) {
 652        /* SM3TT1B */
 653        t = maj(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
 654    } else if (opcode == 3) {
 655        /* SM3TT2B */
 656        t = cho(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
 657    } else {
 658        qemu_build_not_reached();
 659    }
 660
 661    t += CR_ST_WORD(d, 0) + CR_ST_WORD(m, imm2);
 662
 663    CR_ST_WORD(d, 0) = CR_ST_WORD(d, 1);
 664
 665    if (opcode < 2) {
 666        /* SM3TT1A, SM3TT1B */
 667        t += CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 3), 20);
 668
 669        CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 23);
 670    } else {
 671        /* SM3TT2A, SM3TT2B */
 672        t += CR_ST_WORD(n, 3);
 673        t ^= rol32(t, 9) ^ rol32(t, 17);
 674
 675        CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 13);
 676    }
 677
 678    CR_ST_WORD(d, 2) = CR_ST_WORD(d, 3);
 679    CR_ST_WORD(d, 3) = t;
 680
 681    rd[0] = d.l[0];
 682    rd[1] = d.l[1];
 683
 684    clear_tail_16(rd, desc);
 685}
 686
 687#define DO_SM3TT(NAME, OPCODE) \
 688    void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
 689    { crypto_sm3tt(vd, vn, vm, desc, OPCODE); }
 690
 691DO_SM3TT(crypto_sm3tt1a, 0)
 692DO_SM3TT(crypto_sm3tt1b, 1)
 693DO_SM3TT(crypto_sm3tt2a, 2)
 694DO_SM3TT(crypto_sm3tt2b, 3)
 695
 696#undef DO_SM3TT
 697
 698static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm)
 699{
 700    union CRYPTO_STATE d = { .l = { rn[0], rn[1] } };
 701    union CRYPTO_STATE n = { .l = { rm[0], rm[1] } };
 702    uint32_t t, i;
 703
 704    for (i = 0; i < 4; i++) {
 705        t = CR_ST_WORD(d, (i + 1) % 4) ^
 706            CR_ST_WORD(d, (i + 2) % 4) ^
 707            CR_ST_WORD(d, (i + 3) % 4) ^
 708            CR_ST_WORD(n, i);
 709
 710        t = sm4_sbox[t & 0xff] |
 711            sm4_sbox[(t >> 8) & 0xff] << 8 |
 712            sm4_sbox[(t >> 16) & 0xff] << 16 |
 713            sm4_sbox[(t >> 24) & 0xff] << 24;
 714
 715        CR_ST_WORD(d, i) ^= t ^ rol32(t, 2) ^ rol32(t, 10) ^ rol32(t, 18) ^
 716                            rol32(t, 24);
 717    }
 718
 719    rd[0] = d.l[0];
 720    rd[1] = d.l[1];
 721}
 722
 723void HELPER(crypto_sm4e)(void *vd, void *vn, void *vm, uint32_t desc)
 724{
 725    intptr_t i, opr_sz = simd_oprsz(desc);
 726
 727    for (i = 0; i < opr_sz; i += 16) {
 728        do_crypto_sm4e(vd + i, vn + i, vm + i);
 729    }
 730    clear_tail(vd, opr_sz, simd_maxsz(desc));
 731}
 732
 733static void do_crypto_sm4ekey(uint64_t *rd, uint64_t *rn, uint64_t *rm)
 734{
 735    union CRYPTO_STATE d;
 736    union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
 737    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
 738    uint32_t t, i;
 739
 740    d = n;
 741    for (i = 0; i < 4; i++) {
 742        t = CR_ST_WORD(d, (i + 1) % 4) ^
 743            CR_ST_WORD(d, (i + 2) % 4) ^
 744            CR_ST_WORD(d, (i + 3) % 4) ^
 745            CR_ST_WORD(m, i);
 746
 747        t = sm4_sbox[t & 0xff] |
 748            sm4_sbox[(t >> 8) & 0xff] << 8 |
 749            sm4_sbox[(t >> 16) & 0xff] << 16 |
 750            sm4_sbox[(t >> 24) & 0xff] << 24;
 751
 752        CR_ST_WORD(d, i) ^= t ^ rol32(t, 13) ^ rol32(t, 23);
 753    }
 754
 755    rd[0] = d.l[0];
 756    rd[1] = d.l[1];
 757}
 758
 759void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm, uint32_t desc)
 760{
 761    intptr_t i, opr_sz = simd_oprsz(desc);
 762
 763    for (i = 0; i < opr_sz; i += 16) {
 764        do_crypto_sm4ekey(vd + i, vn + i, vm + i);
 765    }
 766    clear_tail(vd, opr_sz, simd_maxsz(desc));
 767}
 768
 769void HELPER(crypto_rax1)(void *vd, void *vn, void *vm, uint32_t desc)
 770{
 771    intptr_t i, opr_sz = simd_oprsz(desc);
 772    uint64_t *d = vd, *n = vn, *m = vm;
 773
 774    for (i = 0; i < opr_sz / 8; ++i) {
 775        d[i] = n[i] ^ rol64(m[i], 1);
 776    }
 777    clear_tail(vd, opr_sz, simd_maxsz(desc));
 778}
 779