qemu/target/arm/crypto_helper.c
<<
>>
Prefs
   1/*
   2 * crypto_helper.c - emulate v8 Crypto Extensions instructions
   3 *
   4 * Copyright (C) 2013 - 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2.1 of the License, or (at your option) any later version.
  10 */
  11
  12#include "qemu/osdep.h"
  13
  14#include "cpu.h"
  15#include "exec/helper-proto.h"
  16#include "tcg/tcg-gvec-desc.h"
  17#include "crypto/aes.h"
  18#include "vec_internal.h"
  19
  20union CRYPTO_STATE {
  21    uint8_t    bytes[16];
  22    uint32_t   words[4];
  23    uint64_t   l[2];
  24};
  25
  26#ifdef HOST_WORDS_BIGENDIAN
  27#define CR_ST_BYTE(state, i)   ((state).bytes[(15 - (i)) ^ 8])
  28#define CR_ST_WORD(state, i)   ((state).words[(3 - (i)) ^ 2])
  29#else
  30#define CR_ST_BYTE(state, i)   ((state).bytes[i])
  31#define CR_ST_WORD(state, i)   ((state).words[i])
  32#endif
  33
  34/*
  35 * The caller has not been converted to full gvec, and so only
  36 * modifies the low 16 bytes of the vector register.
  37 */
  38static void clear_tail_16(void *vd, uint32_t desc)
  39{
  40    int opr_sz = simd_oprsz(desc);
  41    int max_sz = simd_maxsz(desc);
  42
  43    assert(opr_sz == 16);
  44    clear_tail(vd, opr_sz, max_sz);
  45}
  46
  47static void do_crypto_aese(uint64_t *rd, uint64_t *rn,
  48                           uint64_t *rm, bool decrypt)
  49{
  50    static uint8_t const * const sbox[2] = { AES_sbox, AES_isbox };
  51    static uint8_t const * const shift[2] = { AES_shifts, AES_ishifts };
  52    union CRYPTO_STATE rk = { .l = { rm[0], rm[1] } };
  53    union CRYPTO_STATE st = { .l = { rn[0], rn[1] } };
  54    int i;
  55
  56    /* xor state vector with round key */
  57    rk.l[0] ^= st.l[0];
  58    rk.l[1] ^= st.l[1];
  59
  60    /* combine ShiftRows operation and sbox substitution */
  61    for (i = 0; i < 16; i++) {
  62        CR_ST_BYTE(st, i) = sbox[decrypt][CR_ST_BYTE(rk, shift[decrypt][i])];
  63    }
  64
  65    rd[0] = st.l[0];
  66    rd[1] = st.l[1];
  67}
  68
  69void HELPER(crypto_aese)(void *vd, void *vn, void *vm, uint32_t desc)
  70{
  71    intptr_t i, opr_sz = simd_oprsz(desc);
  72    bool decrypt = simd_data(desc);
  73
  74    for (i = 0; i < opr_sz; i += 16) {
  75        do_crypto_aese(vd + i, vn + i, vm + i, decrypt);
  76    }
  77    clear_tail(vd, opr_sz, simd_maxsz(desc));
  78}
  79
  80static void do_crypto_aesmc(uint64_t *rd, uint64_t *rm, bool decrypt)
  81{
  82    static uint32_t const mc[][256] = { {
  83        /* MixColumns lookup table */
  84        0x00000000, 0x03010102, 0x06020204, 0x05030306,
  85        0x0c040408, 0x0f05050a, 0x0a06060c, 0x0907070e,
  86        0x18080810, 0x1b090912, 0x1e0a0a14, 0x1d0b0b16,
  87        0x140c0c18, 0x170d0d1a, 0x120e0e1c, 0x110f0f1e,
  88        0x30101020, 0x33111122, 0x36121224, 0x35131326,
  89        0x3c141428, 0x3f15152a, 0x3a16162c, 0x3917172e,
  90        0x28181830, 0x2b191932, 0x2e1a1a34, 0x2d1b1b36,
  91        0x241c1c38, 0x271d1d3a, 0x221e1e3c, 0x211f1f3e,
  92        0x60202040, 0x63212142, 0x66222244, 0x65232346,
  93        0x6c242448, 0x6f25254a, 0x6a26264c, 0x6927274e,
  94        0x78282850, 0x7b292952, 0x7e2a2a54, 0x7d2b2b56,
  95        0x742c2c58, 0x772d2d5a, 0x722e2e5c, 0x712f2f5e,
  96        0x50303060, 0x53313162, 0x56323264, 0x55333366,
  97        0x5c343468, 0x5f35356a, 0x5a36366c, 0x5937376e,
  98        0x48383870, 0x4b393972, 0x4e3a3a74, 0x4d3b3b76,
  99        0x443c3c78, 0x473d3d7a, 0x423e3e7c, 0x413f3f7e,
 100        0xc0404080, 0xc3414182, 0xc6424284, 0xc5434386,
 101        0xcc444488, 0xcf45458a, 0xca46468c, 0xc947478e,
 102        0xd8484890, 0xdb494992, 0xde4a4a94, 0xdd4b4b96,
 103        0xd44c4c98, 0xd74d4d9a, 0xd24e4e9c, 0xd14f4f9e,
 104        0xf05050a0, 0xf35151a2, 0xf65252a4, 0xf55353a6,
 105        0xfc5454a8, 0xff5555aa, 0xfa5656ac, 0xf95757ae,
 106        0xe85858b0, 0xeb5959b2, 0xee5a5ab4, 0xed5b5bb6,
 107        0xe45c5cb8, 0xe75d5dba, 0xe25e5ebc, 0xe15f5fbe,
 108        0xa06060c0, 0xa36161c2, 0xa66262c4, 0xa56363c6,
 109        0xac6464c8, 0xaf6565ca, 0xaa6666cc, 0xa96767ce,
 110        0xb86868d0, 0xbb6969d2, 0xbe6a6ad4, 0xbd6b6bd6,
 111        0xb46c6cd8, 0xb76d6dda, 0xb26e6edc, 0xb16f6fde,
 112        0x907070e0, 0x937171e2, 0x967272e4, 0x957373e6,
 113        0x9c7474e8, 0x9f7575ea, 0x9a7676ec, 0x997777ee,
 114        0x887878f0, 0x8b7979f2, 0x8e7a7af4, 0x8d7b7bf6,
 115        0x847c7cf8, 0x877d7dfa, 0x827e7efc, 0x817f7ffe,
 116        0x9b80801b, 0x98818119, 0x9d82821f, 0x9e83831d,
 117        0x97848413, 0x94858511, 0x91868617, 0x92878715,
 118        0x8388880b, 0x80898909, 0x858a8a0f, 0x868b8b0d,
 119        0x8f8c8c03, 0x8c8d8d01, 0x898e8e07, 0x8a8f8f05,
 120        0xab90903b, 0xa8919139, 0xad92923f, 0xae93933d,
 121        0xa7949433, 0xa4959531, 0xa1969637, 0xa2979735,
 122        0xb398982b, 0xb0999929, 0xb59a9a2f, 0xb69b9b2d,
 123        0xbf9c9c23, 0xbc9d9d21, 0xb99e9e27, 0xba9f9f25,
 124        0xfba0a05b, 0xf8a1a159, 0xfda2a25f, 0xfea3a35d,
 125        0xf7a4a453, 0xf4a5a551, 0xf1a6a657, 0xf2a7a755,
 126        0xe3a8a84b, 0xe0a9a949, 0xe5aaaa4f, 0xe6abab4d,
 127        0xefacac43, 0xecadad41, 0xe9aeae47, 0xeaafaf45,
 128        0xcbb0b07b, 0xc8b1b179, 0xcdb2b27f, 0xceb3b37d,
 129        0xc7b4b473, 0xc4b5b571, 0xc1b6b677, 0xc2b7b775,
 130        0xd3b8b86b, 0xd0b9b969, 0xd5baba6f, 0xd6bbbb6d,
 131        0xdfbcbc63, 0xdcbdbd61, 0xd9bebe67, 0xdabfbf65,
 132        0x5bc0c09b, 0x58c1c199, 0x5dc2c29f, 0x5ec3c39d,
 133        0x57c4c493, 0x54c5c591, 0x51c6c697, 0x52c7c795,
 134        0x43c8c88b, 0x40c9c989, 0x45caca8f, 0x46cbcb8d,
 135        0x4fcccc83, 0x4ccdcd81, 0x49cece87, 0x4acfcf85,
 136        0x6bd0d0bb, 0x68d1d1b9, 0x6dd2d2bf, 0x6ed3d3bd,
 137        0x67d4d4b3, 0x64d5d5b1, 0x61d6d6b7, 0x62d7d7b5,
 138        0x73d8d8ab, 0x70d9d9a9, 0x75dadaaf, 0x76dbdbad,
 139        0x7fdcdca3, 0x7cdddda1, 0x79dedea7, 0x7adfdfa5,
 140        0x3be0e0db, 0x38e1e1d9, 0x3de2e2df, 0x3ee3e3dd,
 141        0x37e4e4d3, 0x34e5e5d1, 0x31e6e6d7, 0x32e7e7d5,
 142        0x23e8e8cb, 0x20e9e9c9, 0x25eaeacf, 0x26ebebcd,
 143        0x2fececc3, 0x2cededc1, 0x29eeeec7, 0x2aefefc5,
 144        0x0bf0f0fb, 0x08f1f1f9, 0x0df2f2ff, 0x0ef3f3fd,
 145        0x07f4f4f3, 0x04f5f5f1, 0x01f6f6f7, 0x02f7f7f5,
 146        0x13f8f8eb, 0x10f9f9e9, 0x15fafaef, 0x16fbfbed,
 147        0x1ffcfce3, 0x1cfdfde1, 0x19fefee7, 0x1affffe5,
 148    }, {
 149        /* Inverse MixColumns lookup table */
 150        0x00000000, 0x0b0d090e, 0x161a121c, 0x1d171b12,
 151        0x2c342438, 0x27392d36, 0x3a2e3624, 0x31233f2a,
 152        0x58684870, 0x5365417e, 0x4e725a6c, 0x457f5362,
 153        0x745c6c48, 0x7f516546, 0x62467e54, 0x694b775a,
 154        0xb0d090e0, 0xbbdd99ee, 0xa6ca82fc, 0xadc78bf2,
 155        0x9ce4b4d8, 0x97e9bdd6, 0x8afea6c4, 0x81f3afca,
 156        0xe8b8d890, 0xe3b5d19e, 0xfea2ca8c, 0xf5afc382,
 157        0xc48cfca8, 0xcf81f5a6, 0xd296eeb4, 0xd99be7ba,
 158        0x7bbb3bdb, 0x70b632d5, 0x6da129c7, 0x66ac20c9,
 159        0x578f1fe3, 0x5c8216ed, 0x41950dff, 0x4a9804f1,
 160        0x23d373ab, 0x28de7aa5, 0x35c961b7, 0x3ec468b9,
 161        0x0fe75793, 0x04ea5e9d, 0x19fd458f, 0x12f04c81,
 162        0xcb6bab3b, 0xc066a235, 0xdd71b927, 0xd67cb029,
 163        0xe75f8f03, 0xec52860d, 0xf1459d1f, 0xfa489411,
 164        0x9303e34b, 0x980eea45, 0x8519f157, 0x8e14f859,
 165        0xbf37c773, 0xb43ace7d, 0xa92dd56f, 0xa220dc61,
 166        0xf66d76ad, 0xfd607fa3, 0xe07764b1, 0xeb7a6dbf,
 167        0xda595295, 0xd1545b9b, 0xcc434089, 0xc74e4987,
 168        0xae053edd, 0xa50837d3, 0xb81f2cc1, 0xb31225cf,
 169        0x82311ae5, 0x893c13eb, 0x942b08f9, 0x9f2601f7,
 170        0x46bde64d, 0x4db0ef43, 0x50a7f451, 0x5baafd5f,
 171        0x6a89c275, 0x6184cb7b, 0x7c93d069, 0x779ed967,
 172        0x1ed5ae3d, 0x15d8a733, 0x08cfbc21, 0x03c2b52f,
 173        0x32e18a05, 0x39ec830b, 0x24fb9819, 0x2ff69117,
 174        0x8dd64d76, 0x86db4478, 0x9bcc5f6a, 0x90c15664,
 175        0xa1e2694e, 0xaaef6040, 0xb7f87b52, 0xbcf5725c,
 176        0xd5be0506, 0xdeb30c08, 0xc3a4171a, 0xc8a91e14,
 177        0xf98a213e, 0xf2872830, 0xef903322, 0xe49d3a2c,
 178        0x3d06dd96, 0x360bd498, 0x2b1ccf8a, 0x2011c684,
 179        0x1132f9ae, 0x1a3ff0a0, 0x0728ebb2, 0x0c25e2bc,
 180        0x656e95e6, 0x6e639ce8, 0x737487fa, 0x78798ef4,
 181        0x495ab1de, 0x4257b8d0, 0x5f40a3c2, 0x544daacc,
 182        0xf7daec41, 0xfcd7e54f, 0xe1c0fe5d, 0xeacdf753,
 183        0xdbeec879, 0xd0e3c177, 0xcdf4da65, 0xc6f9d36b,
 184        0xafb2a431, 0xa4bfad3f, 0xb9a8b62d, 0xb2a5bf23,
 185        0x83868009, 0x888b8907, 0x959c9215, 0x9e919b1b,
 186        0x470a7ca1, 0x4c0775af, 0x51106ebd, 0x5a1d67b3,
 187        0x6b3e5899, 0x60335197, 0x7d244a85, 0x7629438b,
 188        0x1f6234d1, 0x146f3ddf, 0x097826cd, 0x02752fc3,
 189        0x335610e9, 0x385b19e7, 0x254c02f5, 0x2e410bfb,
 190        0x8c61d79a, 0x876cde94, 0x9a7bc586, 0x9176cc88,
 191        0xa055f3a2, 0xab58faac, 0xb64fe1be, 0xbd42e8b0,
 192        0xd4099fea, 0xdf0496e4, 0xc2138df6, 0xc91e84f8,
 193        0xf83dbbd2, 0xf330b2dc, 0xee27a9ce, 0xe52aa0c0,
 194        0x3cb1477a, 0x37bc4e74, 0x2aab5566, 0x21a65c68,
 195        0x10856342, 0x1b886a4c, 0x069f715e, 0x0d927850,
 196        0x64d90f0a, 0x6fd40604, 0x72c31d16, 0x79ce1418,
 197        0x48ed2b32, 0x43e0223c, 0x5ef7392e, 0x55fa3020,
 198        0x01b79aec, 0x0aba93e2, 0x17ad88f0, 0x1ca081fe,
 199        0x2d83bed4, 0x268eb7da, 0x3b99acc8, 0x3094a5c6,
 200        0x59dfd29c, 0x52d2db92, 0x4fc5c080, 0x44c8c98e,
 201        0x75ebf6a4, 0x7ee6ffaa, 0x63f1e4b8, 0x68fcedb6,
 202        0xb1670a0c, 0xba6a0302, 0xa77d1810, 0xac70111e,
 203        0x9d532e34, 0x965e273a, 0x8b493c28, 0x80443526,
 204        0xe90f427c, 0xe2024b72, 0xff155060, 0xf418596e,
 205        0xc53b6644, 0xce366f4a, 0xd3217458, 0xd82c7d56,
 206        0x7a0ca137, 0x7101a839, 0x6c16b32b, 0x671bba25,
 207        0x5638850f, 0x5d358c01, 0x40229713, 0x4b2f9e1d,
 208        0x2264e947, 0x2969e049, 0x347efb5b, 0x3f73f255,
 209        0x0e50cd7f, 0x055dc471, 0x184adf63, 0x1347d66d,
 210        0xcadc31d7, 0xc1d138d9, 0xdcc623cb, 0xd7cb2ac5,
 211        0xe6e815ef, 0xede51ce1, 0xf0f207f3, 0xfbff0efd,
 212        0x92b479a7, 0x99b970a9, 0x84ae6bbb, 0x8fa362b5,
 213        0xbe805d9f, 0xb58d5491, 0xa89a4f83, 0xa397468d,
 214    } };
 215
 216    union CRYPTO_STATE st = { .l = { rm[0], rm[1] } };
 217    int i;
 218
 219    for (i = 0; i < 16; i += 4) {
 220        CR_ST_WORD(st, i >> 2) =
 221            mc[decrypt][CR_ST_BYTE(st, i)] ^
 222            rol32(mc[decrypt][CR_ST_BYTE(st, i + 1)], 8) ^
 223            rol32(mc[decrypt][CR_ST_BYTE(st, i + 2)], 16) ^
 224            rol32(mc[decrypt][CR_ST_BYTE(st, i + 3)], 24);
 225    }
 226
 227    rd[0] = st.l[0];
 228    rd[1] = st.l[1];
 229}
 230
 231void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t desc)
 232{
 233    intptr_t i, opr_sz = simd_oprsz(desc);
 234    bool decrypt = simd_data(desc);
 235
 236    for (i = 0; i < opr_sz; i += 16) {
 237        do_crypto_aesmc(vd + i, vm + i, decrypt);
 238    }
 239    clear_tail(vd, opr_sz, simd_maxsz(desc));
 240}
 241
 242/*
 243 * SHA-1 logical functions
 244 */
 245
 246static uint32_t cho(uint32_t x, uint32_t y, uint32_t z)
 247{
 248    return (x & (y ^ z)) ^ z;
 249}
 250
 251static uint32_t par(uint32_t x, uint32_t y, uint32_t z)
 252{
 253    return x ^ y ^ z;
 254}
 255
 256static uint32_t maj(uint32_t x, uint32_t y, uint32_t z)
 257{
 258    return (x & y) | ((x | y) & z);
 259}
 260
 261void HELPER(crypto_sha1su0)(void *vd, void *vn, void *vm, uint32_t desc)
 262{
 263    uint64_t *d = vd, *n = vn, *m = vm;
 264    uint64_t d0, d1;
 265
 266    d0 = d[1] ^ d[0] ^ m[0];
 267    d1 = n[0] ^ d[1] ^ m[1];
 268    d[0] = d0;
 269    d[1] = d1;
 270
 271    clear_tail_16(vd, desc);
 272}
 273
 274static inline void crypto_sha1_3reg(uint64_t *rd, uint64_t *rn,
 275                                    uint64_t *rm, uint32_t desc,
 276                                    uint32_t (*fn)(union CRYPTO_STATE *d))
 277{
 278    union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
 279    union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
 280    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
 281    int i;
 282
 283    for (i = 0; i < 4; i++) {
 284        uint32_t t = fn(&d);
 285
 286        t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0)
 287             + CR_ST_WORD(m, i);
 288
 289        CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3);
 290        CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
 291        CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2);
 292        CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
 293        CR_ST_WORD(d, 0) = t;
 294    }
 295    rd[0] = d.l[0];
 296    rd[1] = d.l[1];
 297
 298    clear_tail_16(rd, desc);
 299}
 300
 301static uint32_t do_sha1c(union CRYPTO_STATE *d)
 302{
 303    return cho(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
 304}
 305
 306void HELPER(crypto_sha1c)(void *vd, void *vn, void *vm, uint32_t desc)
 307{
 308    crypto_sha1_3reg(vd, vn, vm, desc, do_sha1c);
 309}
 310
 311static uint32_t do_sha1p(union CRYPTO_STATE *d)
 312{
 313    return par(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
 314}
 315
 316void HELPER(crypto_sha1p)(void *vd, void *vn, void *vm, uint32_t desc)
 317{
 318    crypto_sha1_3reg(vd, vn, vm, desc, do_sha1p);
 319}
 320
 321static uint32_t do_sha1m(union CRYPTO_STATE *d)
 322{
 323    return maj(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
 324}
 325
 326void HELPER(crypto_sha1m)(void *vd, void *vn, void *vm, uint32_t desc)
 327{
 328    crypto_sha1_3reg(vd, vn, vm, desc, do_sha1m);
 329}
 330
 331void HELPER(crypto_sha1h)(void *vd, void *vm, uint32_t desc)
 332{
 333    uint64_t *rd = vd;
 334    uint64_t *rm = vm;
 335    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
 336
 337    CR_ST_WORD(m, 0) = ror32(CR_ST_WORD(m, 0), 2);
 338    CR_ST_WORD(m, 1) = CR_ST_WORD(m, 2) = CR_ST_WORD(m, 3) = 0;
 339
 340    rd[0] = m.l[0];
 341    rd[1] = m.l[1];
 342
 343    clear_tail_16(vd, desc);
 344}
 345
 346void HELPER(crypto_sha1su1)(void *vd, void *vm, uint32_t desc)
 347{
 348    uint64_t *rd = vd;
 349    uint64_t *rm = vm;
 350    union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
 351    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
 352
 353    CR_ST_WORD(d, 0) = rol32(CR_ST_WORD(d, 0) ^ CR_ST_WORD(m, 1), 1);
 354    CR_ST_WORD(d, 1) = rol32(CR_ST_WORD(d, 1) ^ CR_ST_WORD(m, 2), 1);
 355    CR_ST_WORD(d, 2) = rol32(CR_ST_WORD(d, 2) ^ CR_ST_WORD(m, 3), 1);
 356    CR_ST_WORD(d, 3) = rol32(CR_ST_WORD(d, 3) ^ CR_ST_WORD(d, 0), 1);
 357
 358    rd[0] = d.l[0];
 359    rd[1] = d.l[1];
 360
 361    clear_tail_16(vd, desc);
 362}
 363
 364/*
 365 * The SHA-256 logical functions, according to
 366 * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf
 367 */
 368
 369static uint32_t S0(uint32_t x)
 370{
 371    return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22);
 372}
 373
 374static uint32_t S1(uint32_t x)
 375{
 376    return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25);
 377}
 378
 379static uint32_t s0(uint32_t x)
 380{
 381    return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3);
 382}
 383
 384static uint32_t s1(uint32_t x)
 385{
 386    return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10);
 387}
 388
 389void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm, uint32_t desc)
 390{
 391    uint64_t *rd = vd;
 392    uint64_t *rn = vn;
 393    uint64_t *rm = vm;
 394    union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
 395    union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
 396    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
 397    int i;
 398
 399    for (i = 0; i < 4; i++) {
 400        uint32_t t = cho(CR_ST_WORD(n, 0), CR_ST_WORD(n, 1), CR_ST_WORD(n, 2))
 401                     + CR_ST_WORD(n, 3) + S1(CR_ST_WORD(n, 0))
 402                     + CR_ST_WORD(m, i);
 403
 404        CR_ST_WORD(n, 3) = CR_ST_WORD(n, 2);
 405        CR_ST_WORD(n, 2) = CR_ST_WORD(n, 1);
 406        CR_ST_WORD(n, 1) = CR_ST_WORD(n, 0);
 407        CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3) + t;
 408
 409        t += maj(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2))
 410             + S0(CR_ST_WORD(d, 0));
 411
 412        CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
 413        CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1);
 414        CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
 415        CR_ST_WORD(d, 0) = t;
 416    }
 417
 418    rd[0] = d.l[0];
 419    rd[1] = d.l[1];
 420
 421    clear_tail_16(vd, desc);
 422}
 423
 424void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm, uint32_t desc)
 425{
 426    uint64_t *rd = vd;
 427    uint64_t *rn = vn;
 428    uint64_t *rm = vm;
 429    union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
 430    union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
 431    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
 432    int i;
 433
 434    for (i = 0; i < 4; i++) {
 435        uint32_t t = cho(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2))
 436                     + CR_ST_WORD(d, 3) + S1(CR_ST_WORD(d, 0))
 437                     + CR_ST_WORD(m, i);
 438
 439        CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
 440        CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1);
 441        CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
 442        CR_ST_WORD(d, 0) = CR_ST_WORD(n, 3 - i) + t;
 443    }
 444
 445    rd[0] = d.l[0];
 446    rd[1] = d.l[1];
 447
 448    clear_tail_16(vd, desc);
 449}
 450
 451void HELPER(crypto_sha256su0)(void *vd, void *vm, uint32_t desc)
 452{
 453    uint64_t *rd = vd;
 454    uint64_t *rm = vm;
 455    union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
 456    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
 457
 458    CR_ST_WORD(d, 0) += s0(CR_ST_WORD(d, 1));
 459    CR_ST_WORD(d, 1) += s0(CR_ST_WORD(d, 2));
 460    CR_ST_WORD(d, 2) += s0(CR_ST_WORD(d, 3));
 461    CR_ST_WORD(d, 3) += s0(CR_ST_WORD(m, 0));
 462
 463    rd[0] = d.l[0];
 464    rd[1] = d.l[1];
 465
 466    clear_tail_16(vd, desc);
 467}
 468
 469void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm, uint32_t desc)
 470{
 471    uint64_t *rd = vd;
 472    uint64_t *rn = vn;
 473    uint64_t *rm = vm;
 474    union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
 475    union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
 476    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
 477
 478    CR_ST_WORD(d, 0) += s1(CR_ST_WORD(m, 2)) + CR_ST_WORD(n, 1);
 479    CR_ST_WORD(d, 1) += s1(CR_ST_WORD(m, 3)) + CR_ST_WORD(n, 2);
 480    CR_ST_WORD(d, 2) += s1(CR_ST_WORD(d, 0)) + CR_ST_WORD(n, 3);
 481    CR_ST_WORD(d, 3) += s1(CR_ST_WORD(d, 1)) + CR_ST_WORD(m, 0);
 482
 483    rd[0] = d.l[0];
 484    rd[1] = d.l[1];
 485
 486    clear_tail_16(vd, desc);
 487}
 488
 489/*
 490 * The SHA-512 logical functions (same as above but using 64-bit operands)
 491 */
 492
 493static uint64_t cho512(uint64_t x, uint64_t y, uint64_t z)
 494{
 495    return (x & (y ^ z)) ^ z;
 496}
 497
 498static uint64_t maj512(uint64_t x, uint64_t y, uint64_t z)
 499{
 500    return (x & y) | ((x | y) & z);
 501}
 502
 503static uint64_t S0_512(uint64_t x)
 504{
 505    return ror64(x, 28) ^ ror64(x, 34) ^ ror64(x, 39);
 506}
 507
 508static uint64_t S1_512(uint64_t x)
 509{
 510    return ror64(x, 14) ^ ror64(x, 18) ^ ror64(x, 41);
 511}
 512
 513static uint64_t s0_512(uint64_t x)
 514{
 515    return ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7);
 516}
 517
 518static uint64_t s1_512(uint64_t x)
 519{
 520    return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6);
 521}
 522
 523void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm, uint32_t desc)
 524{
 525    uint64_t *rd = vd;
 526    uint64_t *rn = vn;
 527    uint64_t *rm = vm;
 528    uint64_t d0 = rd[0];
 529    uint64_t d1 = rd[1];
 530
 531    d1 += S1_512(rm[1]) + cho512(rm[1], rn[0], rn[1]);
 532    d0 += S1_512(d1 + rm[0]) + cho512(d1 + rm[0], rm[1], rn[0]);
 533
 534    rd[0] = d0;
 535    rd[1] = d1;
 536
 537    clear_tail_16(vd, desc);
 538}
 539
 540void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm, uint32_t desc)
 541{
 542    uint64_t *rd = vd;
 543    uint64_t *rn = vn;
 544    uint64_t *rm = vm;
 545    uint64_t d0 = rd[0];
 546    uint64_t d1 = rd[1];
 547
 548    d1 += S0_512(rm[0]) + maj512(rn[0], rm[1], rm[0]);
 549    d0 += S0_512(d1) + maj512(d1, rm[0], rm[1]);
 550
 551    rd[0] = d0;
 552    rd[1] = d1;
 553
 554    clear_tail_16(vd, desc);
 555}
 556
 557void HELPER(crypto_sha512su0)(void *vd, void *vn, uint32_t desc)
 558{
 559    uint64_t *rd = vd;
 560    uint64_t *rn = vn;
 561    uint64_t d0 = rd[0];
 562    uint64_t d1 = rd[1];
 563
 564    d0 += s0_512(rd[1]);
 565    d1 += s0_512(rn[0]);
 566
 567    rd[0] = d0;
 568    rd[1] = d1;
 569
 570    clear_tail_16(vd, desc);
 571}
 572
 573void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm, uint32_t desc)
 574{
 575    uint64_t *rd = vd;
 576    uint64_t *rn = vn;
 577    uint64_t *rm = vm;
 578
 579    rd[0] += s1_512(rn[0]) + rm[0];
 580    rd[1] += s1_512(rn[1]) + rm[1];
 581
 582    clear_tail_16(vd, desc);
 583}
 584
 585void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm, uint32_t desc)
 586{
 587    uint64_t *rd = vd;
 588    uint64_t *rn = vn;
 589    uint64_t *rm = vm;
 590    union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
 591    union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
 592    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
 593    uint32_t t;
 594
 595    t = CR_ST_WORD(d, 0) ^ CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 1), 17);
 596    CR_ST_WORD(d, 0) = t ^ ror32(t, 17) ^ ror32(t, 9);
 597
 598    t = CR_ST_WORD(d, 1) ^ CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 2), 17);
 599    CR_ST_WORD(d, 1) = t ^ ror32(t, 17) ^ ror32(t, 9);
 600
 601    t = CR_ST_WORD(d, 2) ^ CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 3), 17);
 602    CR_ST_WORD(d, 2) = t ^ ror32(t, 17) ^ ror32(t, 9);
 603
 604    t = CR_ST_WORD(d, 3) ^ CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 0), 17);
 605    CR_ST_WORD(d, 3) = t ^ ror32(t, 17) ^ ror32(t, 9);
 606
 607    rd[0] = d.l[0];
 608    rd[1] = d.l[1];
 609
 610    clear_tail_16(vd, desc);
 611}
 612
 613void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm, uint32_t desc)
 614{
 615    uint64_t *rd = vd;
 616    uint64_t *rn = vn;
 617    uint64_t *rm = vm;
 618    union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
 619    union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
 620    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
 621    uint32_t t = CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 0), 25);
 622
 623    CR_ST_WORD(d, 0) ^= t;
 624    CR_ST_WORD(d, 1) ^= CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 1), 25);
 625    CR_ST_WORD(d, 2) ^= CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 2), 25);
 626    CR_ST_WORD(d, 3) ^= CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(m, 3), 25) ^
 627                        ror32(t, 17) ^ ror32(t, 2) ^ ror32(t, 26);
 628
 629    rd[0] = d.l[0];
 630    rd[1] = d.l[1];
 631
 632    clear_tail_16(vd, desc);
 633}
 634
 635static inline void QEMU_ALWAYS_INLINE
 636crypto_sm3tt(uint64_t *rd, uint64_t *rn, uint64_t *rm,
 637             uint32_t desc, uint32_t opcode)
 638{
 639    union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
 640    union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
 641    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
 642    uint32_t imm2 = simd_data(desc);
 643    uint32_t t;
 644
 645    assert(imm2 < 4);
 646
 647    if (opcode == 0 || opcode == 2) {
 648        /* SM3TT1A, SM3TT2A */
 649        t = par(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
 650    } else if (opcode == 1) {
 651        /* SM3TT1B */
 652        t = maj(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
 653    } else if (opcode == 3) {
 654        /* SM3TT2B */
 655        t = cho(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
 656    } else {
 657        qemu_build_not_reached();
 658    }
 659
 660    t += CR_ST_WORD(d, 0) + CR_ST_WORD(m, imm2);
 661
 662    CR_ST_WORD(d, 0) = CR_ST_WORD(d, 1);
 663
 664    if (opcode < 2) {
 665        /* SM3TT1A, SM3TT1B */
 666        t += CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 3), 20);
 667
 668        CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 23);
 669    } else {
 670        /* SM3TT2A, SM3TT2B */
 671        t += CR_ST_WORD(n, 3);
 672        t ^= rol32(t, 9) ^ rol32(t, 17);
 673
 674        CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 13);
 675    }
 676
 677    CR_ST_WORD(d, 2) = CR_ST_WORD(d, 3);
 678    CR_ST_WORD(d, 3) = t;
 679
 680    rd[0] = d.l[0];
 681    rd[1] = d.l[1];
 682
 683    clear_tail_16(rd, desc);
 684}
 685
 686#define DO_SM3TT(NAME, OPCODE) \
 687    void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
 688    { crypto_sm3tt(vd, vn, vm, desc, OPCODE); }
 689
 690DO_SM3TT(crypto_sm3tt1a, 0)
 691DO_SM3TT(crypto_sm3tt1b, 1)
 692DO_SM3TT(crypto_sm3tt2a, 2)
 693DO_SM3TT(crypto_sm3tt2b, 3)
 694
 695#undef DO_SM3TT
 696
 697static uint8_t const sm4_sbox[] = {
 698    0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7,
 699    0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05,
 700    0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3,
 701    0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99,
 702    0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a,
 703    0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62,
 704    0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95,
 705    0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6,
 706    0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba,
 707    0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8,
 708    0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b,
 709    0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35,
 710    0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2,
 711    0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87,
 712    0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52,
 713    0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e,
 714    0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5,
 715    0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1,
 716    0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55,
 717    0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3,
 718    0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60,
 719    0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f,
 720    0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f,
 721    0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51,
 722    0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f,
 723    0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8,
 724    0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd,
 725    0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0,
 726    0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e,
 727    0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84,
 728    0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20,
 729    0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48,
 730};
 731
 732static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm)
 733{
 734    union CRYPTO_STATE d = { .l = { rn[0], rn[1] } };
 735    union CRYPTO_STATE n = { .l = { rm[0], rm[1] } };
 736    uint32_t t, i;
 737
 738    for (i = 0; i < 4; i++) {
 739        t = CR_ST_WORD(d, (i + 1) % 4) ^
 740            CR_ST_WORD(d, (i + 2) % 4) ^
 741            CR_ST_WORD(d, (i + 3) % 4) ^
 742            CR_ST_WORD(n, i);
 743
 744        t = sm4_sbox[t & 0xff] |
 745            sm4_sbox[(t >> 8) & 0xff] << 8 |
 746            sm4_sbox[(t >> 16) & 0xff] << 16 |
 747            sm4_sbox[(t >> 24) & 0xff] << 24;
 748
 749        CR_ST_WORD(d, i) ^= t ^ rol32(t, 2) ^ rol32(t, 10) ^ rol32(t, 18) ^
 750                            rol32(t, 24);
 751    }
 752
 753    rd[0] = d.l[0];
 754    rd[1] = d.l[1];
 755}
 756
 757void HELPER(crypto_sm4e)(void *vd, void *vn, void *vm, uint32_t desc)
 758{
 759    intptr_t i, opr_sz = simd_oprsz(desc);
 760
 761    for (i = 0; i < opr_sz; i += 16) {
 762        do_crypto_sm4e(vd + i, vn + i, vm + i);
 763    }
 764    clear_tail(vd, opr_sz, simd_maxsz(desc));
 765}
 766
 767static void do_crypto_sm4ekey(uint64_t *rd, uint64_t *rn, uint64_t *rm)
 768{
 769    union CRYPTO_STATE d;
 770    union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
 771    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
 772    uint32_t t, i;
 773
 774    d = n;
 775    for (i = 0; i < 4; i++) {
 776        t = CR_ST_WORD(d, (i + 1) % 4) ^
 777            CR_ST_WORD(d, (i + 2) % 4) ^
 778            CR_ST_WORD(d, (i + 3) % 4) ^
 779            CR_ST_WORD(m, i);
 780
 781        t = sm4_sbox[t & 0xff] |
 782            sm4_sbox[(t >> 8) & 0xff] << 8 |
 783            sm4_sbox[(t >> 16) & 0xff] << 16 |
 784            sm4_sbox[(t >> 24) & 0xff] << 24;
 785
 786        CR_ST_WORD(d, i) ^= t ^ rol32(t, 13) ^ rol32(t, 23);
 787    }
 788
 789    rd[0] = d.l[0];
 790    rd[1] = d.l[1];
 791}
 792
 793void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm, uint32_t desc)
 794{
 795    intptr_t i, opr_sz = simd_oprsz(desc);
 796
 797    for (i = 0; i < opr_sz; i += 16) {
 798        do_crypto_sm4ekey(vd + i, vn + i, vm + i);
 799    }
 800    clear_tail(vd, opr_sz, simd_maxsz(desc));
 801}
 802
 803void HELPER(crypto_rax1)(void *vd, void *vn, void *vm, uint32_t desc)
 804{
 805    intptr_t i, opr_sz = simd_oprsz(desc);
 806    uint64_t *d = vd, *n = vn, *m = vm;
 807
 808    for (i = 0; i < opr_sz / 8; ++i) {
 809        d[i] = n[i] ^ rol64(m[i], 1);
 810    }
 811    clear_tail(vd, opr_sz, simd_maxsz(desc));
 812}
 813