qemu/target/arm/crypto_helper.c
<<
>>
Prefs
   1/*
   2 * crypto_helper.c - emulate v8 Crypto Extensions instructions
   3 *
   4 * Copyright (C) 2013 - 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2 of the License, or (at your option) any later version.
  10 */
  11
  12#include "qemu/osdep.h"
  13
  14#include "cpu.h"
  15#include "exec/exec-all.h"
  16#include "exec/helper-proto.h"
  17#include "crypto/aes.h"
  18
  19union CRYPTO_STATE {
  20    uint8_t    bytes[16];
  21    uint32_t   words[4];
  22    uint64_t   l[2];
  23};
  24
  25#ifdef HOST_WORDS_BIGENDIAN
  26#define CR_ST_BYTE(state, i)   (state.bytes[(15 - (i)) ^ 8])
  27#define CR_ST_WORD(state, i)   (state.words[(3 - (i)) ^ 2])
  28#else
  29#define CR_ST_BYTE(state, i)   (state.bytes[i])
  30#define CR_ST_WORD(state, i)   (state.words[i])
  31#endif
  32
  33void HELPER(crypto_aese)(void *vd, void *vm, uint32_t decrypt)
  34{
  35    static uint8_t const * const sbox[2] = { AES_sbox, AES_isbox };
  36    static uint8_t const * const shift[2] = { AES_shifts, AES_ishifts };
  37    uint64_t *rd = vd;
  38    uint64_t *rm = vm;
  39    union CRYPTO_STATE rk = { .l = { rm[0], rm[1] } };
  40    union CRYPTO_STATE st = { .l = { rd[0], rd[1] } };
  41    int i;
  42
  43    assert(decrypt < 2);
  44
  45    /* xor state vector with round key */
  46    rk.l[0] ^= st.l[0];
  47    rk.l[1] ^= st.l[1];
  48
  49    /* combine ShiftRows operation and sbox substitution */
  50    for (i = 0; i < 16; i++) {
  51        CR_ST_BYTE(st, i) = sbox[decrypt][CR_ST_BYTE(rk, shift[decrypt][i])];
  52    }
  53
  54    rd[0] = st.l[0];
  55    rd[1] = st.l[1];
  56}
  57
  58void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t decrypt)
  59{
  60    static uint32_t const mc[][256] = { {
  61        /* MixColumns lookup table */
  62        0x00000000, 0x03010102, 0x06020204, 0x05030306,
  63        0x0c040408, 0x0f05050a, 0x0a06060c, 0x0907070e,
  64        0x18080810, 0x1b090912, 0x1e0a0a14, 0x1d0b0b16,
  65        0x140c0c18, 0x170d0d1a, 0x120e0e1c, 0x110f0f1e,
  66        0x30101020, 0x33111122, 0x36121224, 0x35131326,
  67        0x3c141428, 0x3f15152a, 0x3a16162c, 0x3917172e,
  68        0x28181830, 0x2b191932, 0x2e1a1a34, 0x2d1b1b36,
  69        0x241c1c38, 0x271d1d3a, 0x221e1e3c, 0x211f1f3e,
  70        0x60202040, 0x63212142, 0x66222244, 0x65232346,
  71        0x6c242448, 0x6f25254a, 0x6a26264c, 0x6927274e,
  72        0x78282850, 0x7b292952, 0x7e2a2a54, 0x7d2b2b56,
  73        0x742c2c58, 0x772d2d5a, 0x722e2e5c, 0x712f2f5e,
  74        0x50303060, 0x53313162, 0x56323264, 0x55333366,
  75        0x5c343468, 0x5f35356a, 0x5a36366c, 0x5937376e,
  76        0x48383870, 0x4b393972, 0x4e3a3a74, 0x4d3b3b76,
  77        0x443c3c78, 0x473d3d7a, 0x423e3e7c, 0x413f3f7e,
  78        0xc0404080, 0xc3414182, 0xc6424284, 0xc5434386,
  79        0xcc444488, 0xcf45458a, 0xca46468c, 0xc947478e,
  80        0xd8484890, 0xdb494992, 0xde4a4a94, 0xdd4b4b96,
  81        0xd44c4c98, 0xd74d4d9a, 0xd24e4e9c, 0xd14f4f9e,
  82        0xf05050a0, 0xf35151a2, 0xf65252a4, 0xf55353a6,
  83        0xfc5454a8, 0xff5555aa, 0xfa5656ac, 0xf95757ae,
  84        0xe85858b0, 0xeb5959b2, 0xee5a5ab4, 0xed5b5bb6,
  85        0xe45c5cb8, 0xe75d5dba, 0xe25e5ebc, 0xe15f5fbe,
  86        0xa06060c0, 0xa36161c2, 0xa66262c4, 0xa56363c6,
  87        0xac6464c8, 0xaf6565ca, 0xaa6666cc, 0xa96767ce,
  88        0xb86868d0, 0xbb6969d2, 0xbe6a6ad4, 0xbd6b6bd6,
  89        0xb46c6cd8, 0xb76d6dda, 0xb26e6edc, 0xb16f6fde,
  90        0x907070e0, 0x937171e2, 0x967272e4, 0x957373e6,
  91        0x9c7474e8, 0x9f7575ea, 0x9a7676ec, 0x997777ee,
  92        0x887878f0, 0x8b7979f2, 0x8e7a7af4, 0x8d7b7bf6,
  93        0x847c7cf8, 0x877d7dfa, 0x827e7efc, 0x817f7ffe,
  94        0x9b80801b, 0x98818119, 0x9d82821f, 0x9e83831d,
  95        0x97848413, 0x94858511, 0x91868617, 0x92878715,
  96        0x8388880b, 0x80898909, 0x858a8a0f, 0x868b8b0d,
  97        0x8f8c8c03, 0x8c8d8d01, 0x898e8e07, 0x8a8f8f05,
  98        0xab90903b, 0xa8919139, 0xad92923f, 0xae93933d,
  99        0xa7949433, 0xa4959531, 0xa1969637, 0xa2979735,
 100        0xb398982b, 0xb0999929, 0xb59a9a2f, 0xb69b9b2d,
 101        0xbf9c9c23, 0xbc9d9d21, 0xb99e9e27, 0xba9f9f25,
 102        0xfba0a05b, 0xf8a1a159, 0xfda2a25f, 0xfea3a35d,
 103        0xf7a4a453, 0xf4a5a551, 0xf1a6a657, 0xf2a7a755,
 104        0xe3a8a84b, 0xe0a9a949, 0xe5aaaa4f, 0xe6abab4d,
 105        0xefacac43, 0xecadad41, 0xe9aeae47, 0xeaafaf45,
 106        0xcbb0b07b, 0xc8b1b179, 0xcdb2b27f, 0xceb3b37d,
 107        0xc7b4b473, 0xc4b5b571, 0xc1b6b677, 0xc2b7b775,
 108        0xd3b8b86b, 0xd0b9b969, 0xd5baba6f, 0xd6bbbb6d,
 109        0xdfbcbc63, 0xdcbdbd61, 0xd9bebe67, 0xdabfbf65,
 110        0x5bc0c09b, 0x58c1c199, 0x5dc2c29f, 0x5ec3c39d,
 111        0x57c4c493, 0x54c5c591, 0x51c6c697, 0x52c7c795,
 112        0x43c8c88b, 0x40c9c989, 0x45caca8f, 0x46cbcb8d,
 113        0x4fcccc83, 0x4ccdcd81, 0x49cece87, 0x4acfcf85,
 114        0x6bd0d0bb, 0x68d1d1b9, 0x6dd2d2bf, 0x6ed3d3bd,
 115        0x67d4d4b3, 0x64d5d5b1, 0x61d6d6b7, 0x62d7d7b5,
 116        0x73d8d8ab, 0x70d9d9a9, 0x75dadaaf, 0x76dbdbad,
 117        0x7fdcdca3, 0x7cdddda1, 0x79dedea7, 0x7adfdfa5,
 118        0x3be0e0db, 0x38e1e1d9, 0x3de2e2df, 0x3ee3e3dd,
 119        0x37e4e4d3, 0x34e5e5d1, 0x31e6e6d7, 0x32e7e7d5,
 120        0x23e8e8cb, 0x20e9e9c9, 0x25eaeacf, 0x26ebebcd,
 121        0x2fececc3, 0x2cededc1, 0x29eeeec7, 0x2aefefc5,
 122        0x0bf0f0fb, 0x08f1f1f9, 0x0df2f2ff, 0x0ef3f3fd,
 123        0x07f4f4f3, 0x04f5f5f1, 0x01f6f6f7, 0x02f7f7f5,
 124        0x13f8f8eb, 0x10f9f9e9, 0x15fafaef, 0x16fbfbed,
 125        0x1ffcfce3, 0x1cfdfde1, 0x19fefee7, 0x1affffe5,
 126    }, {
 127        /* Inverse MixColumns lookup table */
 128        0x00000000, 0x0b0d090e, 0x161a121c, 0x1d171b12,
 129        0x2c342438, 0x27392d36, 0x3a2e3624, 0x31233f2a,
 130        0x58684870, 0x5365417e, 0x4e725a6c, 0x457f5362,
 131        0x745c6c48, 0x7f516546, 0x62467e54, 0x694b775a,
 132        0xb0d090e0, 0xbbdd99ee, 0xa6ca82fc, 0xadc78bf2,
 133        0x9ce4b4d8, 0x97e9bdd6, 0x8afea6c4, 0x81f3afca,
 134        0xe8b8d890, 0xe3b5d19e, 0xfea2ca8c, 0xf5afc382,
 135        0xc48cfca8, 0xcf81f5a6, 0xd296eeb4, 0xd99be7ba,
 136        0x7bbb3bdb, 0x70b632d5, 0x6da129c7, 0x66ac20c9,
 137        0x578f1fe3, 0x5c8216ed, 0x41950dff, 0x4a9804f1,
 138        0x23d373ab, 0x28de7aa5, 0x35c961b7, 0x3ec468b9,
 139        0x0fe75793, 0x04ea5e9d, 0x19fd458f, 0x12f04c81,
 140        0xcb6bab3b, 0xc066a235, 0xdd71b927, 0xd67cb029,
 141        0xe75f8f03, 0xec52860d, 0xf1459d1f, 0xfa489411,
 142        0x9303e34b, 0x980eea45, 0x8519f157, 0x8e14f859,
 143        0xbf37c773, 0xb43ace7d, 0xa92dd56f, 0xa220dc61,
 144        0xf66d76ad, 0xfd607fa3, 0xe07764b1, 0xeb7a6dbf,
 145        0xda595295, 0xd1545b9b, 0xcc434089, 0xc74e4987,
 146        0xae053edd, 0xa50837d3, 0xb81f2cc1, 0xb31225cf,
 147        0x82311ae5, 0x893c13eb, 0x942b08f9, 0x9f2601f7,
 148        0x46bde64d, 0x4db0ef43, 0x50a7f451, 0x5baafd5f,
 149        0x6a89c275, 0x6184cb7b, 0x7c93d069, 0x779ed967,
 150        0x1ed5ae3d, 0x15d8a733, 0x08cfbc21, 0x03c2b52f,
 151        0x32e18a05, 0x39ec830b, 0x24fb9819, 0x2ff69117,
 152        0x8dd64d76, 0x86db4478, 0x9bcc5f6a, 0x90c15664,
 153        0xa1e2694e, 0xaaef6040, 0xb7f87b52, 0xbcf5725c,
 154        0xd5be0506, 0xdeb30c08, 0xc3a4171a, 0xc8a91e14,
 155        0xf98a213e, 0xf2872830, 0xef903322, 0xe49d3a2c,
 156        0x3d06dd96, 0x360bd498, 0x2b1ccf8a, 0x2011c684,
 157        0x1132f9ae, 0x1a3ff0a0, 0x0728ebb2, 0x0c25e2bc,
 158        0x656e95e6, 0x6e639ce8, 0x737487fa, 0x78798ef4,
 159        0x495ab1de, 0x4257b8d0, 0x5f40a3c2, 0x544daacc,
 160        0xf7daec41, 0xfcd7e54f, 0xe1c0fe5d, 0xeacdf753,
 161        0xdbeec879, 0xd0e3c177, 0xcdf4da65, 0xc6f9d36b,
 162        0xafb2a431, 0xa4bfad3f, 0xb9a8b62d, 0xb2a5bf23,
 163        0x83868009, 0x888b8907, 0x959c9215, 0x9e919b1b,
 164        0x470a7ca1, 0x4c0775af, 0x51106ebd, 0x5a1d67b3,
 165        0x6b3e5899, 0x60335197, 0x7d244a85, 0x7629438b,
 166        0x1f6234d1, 0x146f3ddf, 0x097826cd, 0x02752fc3,
 167        0x335610e9, 0x385b19e7, 0x254c02f5, 0x2e410bfb,
 168        0x8c61d79a, 0x876cde94, 0x9a7bc586, 0x9176cc88,
 169        0xa055f3a2, 0xab58faac, 0xb64fe1be, 0xbd42e8b0,
 170        0xd4099fea, 0xdf0496e4, 0xc2138df6, 0xc91e84f8,
 171        0xf83dbbd2, 0xf330b2dc, 0xee27a9ce, 0xe52aa0c0,
 172        0x3cb1477a, 0x37bc4e74, 0x2aab5566, 0x21a65c68,
 173        0x10856342, 0x1b886a4c, 0x069f715e, 0x0d927850,
 174        0x64d90f0a, 0x6fd40604, 0x72c31d16, 0x79ce1418,
 175        0x48ed2b32, 0x43e0223c, 0x5ef7392e, 0x55fa3020,
 176        0x01b79aec, 0x0aba93e2, 0x17ad88f0, 0x1ca081fe,
 177        0x2d83bed4, 0x268eb7da, 0x3b99acc8, 0x3094a5c6,
 178        0x59dfd29c, 0x52d2db92, 0x4fc5c080, 0x44c8c98e,
 179        0x75ebf6a4, 0x7ee6ffaa, 0x63f1e4b8, 0x68fcedb6,
 180        0xb1670a0c, 0xba6a0302, 0xa77d1810, 0xac70111e,
 181        0x9d532e34, 0x965e273a, 0x8b493c28, 0x80443526,
 182        0xe90f427c, 0xe2024b72, 0xff155060, 0xf418596e,
 183        0xc53b6644, 0xce366f4a, 0xd3217458, 0xd82c7d56,
 184        0x7a0ca137, 0x7101a839, 0x6c16b32b, 0x671bba25,
 185        0x5638850f, 0x5d358c01, 0x40229713, 0x4b2f9e1d,
 186        0x2264e947, 0x2969e049, 0x347efb5b, 0x3f73f255,
 187        0x0e50cd7f, 0x055dc471, 0x184adf63, 0x1347d66d,
 188        0xcadc31d7, 0xc1d138d9, 0xdcc623cb, 0xd7cb2ac5,
 189        0xe6e815ef, 0xede51ce1, 0xf0f207f3, 0xfbff0efd,
 190        0x92b479a7, 0x99b970a9, 0x84ae6bbb, 0x8fa362b5,
 191        0xbe805d9f, 0xb58d5491, 0xa89a4f83, 0xa397468d,
 192    } };
 193
 194    uint64_t *rd = vd;
 195    uint64_t *rm = vm;
 196    union CRYPTO_STATE st = { .l = { rm[0], rm[1] } };
 197    int i;
 198
 199    assert(decrypt < 2);
 200
 201    for (i = 0; i < 16; i += 4) {
 202        CR_ST_WORD(st, i >> 2) =
 203            mc[decrypt][CR_ST_BYTE(st, i)] ^
 204            rol32(mc[decrypt][CR_ST_BYTE(st, i + 1)], 8) ^
 205            rol32(mc[decrypt][CR_ST_BYTE(st, i + 2)], 16) ^
 206            rol32(mc[decrypt][CR_ST_BYTE(st, i + 3)], 24);
 207    }
 208
 209    rd[0] = st.l[0];
 210    rd[1] = st.l[1];
 211}
 212
 213/*
 214 * SHA-1 logical functions
 215 */
 216
 217static uint32_t cho(uint32_t x, uint32_t y, uint32_t z)
 218{
 219    return (x & (y ^ z)) ^ z;
 220}
 221
 222static uint32_t par(uint32_t x, uint32_t y, uint32_t z)
 223{
 224    return x ^ y ^ z;
 225}
 226
 227static uint32_t maj(uint32_t x, uint32_t y, uint32_t z)
 228{
 229    return (x & y) | ((x | y) & z);
 230}
 231
 232void HELPER(crypto_sha1_3reg)(void *vd, void *vn, void *vm, uint32_t op)
 233{
 234    uint64_t *rd = vd;
 235    uint64_t *rn = vn;
 236    uint64_t *rm = vm;
 237    union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
 238    union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
 239    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
 240
 241    if (op == 3) { /* sha1su0 */
 242        d.l[0] ^= d.l[1] ^ m.l[0];
 243        d.l[1] ^= n.l[0] ^ m.l[1];
 244    } else {
 245        int i;
 246
 247        for (i = 0; i < 4; i++) {
 248            uint32_t t;
 249
 250            switch (op) {
 251            case 0: /* sha1c */
 252                t = cho(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3));
 253                break;
 254            case 1: /* sha1p */
 255                t = par(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3));
 256                break;
 257            case 2: /* sha1m */
 258                t = maj(CR_ST_WORD(d, 1), CR_ST_WORD(d, 2), CR_ST_WORD(d, 3));
 259                break;
 260            default:
 261                g_assert_not_reached();
 262            }
 263            t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0)
 264                 + CR_ST_WORD(m, i);
 265
 266            CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3);
 267            CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
 268            CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2);
 269            CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
 270            CR_ST_WORD(d, 0) = t;
 271        }
 272    }
 273    rd[0] = d.l[0];
 274    rd[1] = d.l[1];
 275}
 276
 277void HELPER(crypto_sha1h)(void *vd, void *vm)
 278{
 279    uint64_t *rd = vd;
 280    uint64_t *rm = vm;
 281    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
 282
 283    CR_ST_WORD(m, 0) = ror32(CR_ST_WORD(m, 0), 2);
 284    CR_ST_WORD(m, 1) = CR_ST_WORD(m, 2) = CR_ST_WORD(m, 3) = 0;
 285
 286    rd[0] = m.l[0];
 287    rd[1] = m.l[1];
 288}
 289
 290void HELPER(crypto_sha1su1)(void *vd, void *vm)
 291{
 292    uint64_t *rd = vd;
 293    uint64_t *rm = vm;
 294    union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
 295    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
 296
 297    CR_ST_WORD(d, 0) = rol32(CR_ST_WORD(d, 0) ^ CR_ST_WORD(m, 1), 1);
 298    CR_ST_WORD(d, 1) = rol32(CR_ST_WORD(d, 1) ^ CR_ST_WORD(m, 2), 1);
 299    CR_ST_WORD(d, 2) = rol32(CR_ST_WORD(d, 2) ^ CR_ST_WORD(m, 3), 1);
 300    CR_ST_WORD(d, 3) = rol32(CR_ST_WORD(d, 3) ^ CR_ST_WORD(d, 0), 1);
 301
 302    rd[0] = d.l[0];
 303    rd[1] = d.l[1];
 304}
 305
 306/*
 307 * The SHA-256 logical functions, according to
 308 * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf
 309 */
 310
 311static uint32_t S0(uint32_t x)
 312{
 313    return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22);
 314}
 315
 316static uint32_t S1(uint32_t x)
 317{
 318    return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25);
 319}
 320
 321static uint32_t s0(uint32_t x)
 322{
 323    return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3);
 324}
 325
 326static uint32_t s1(uint32_t x)
 327{
 328    return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10);
 329}
 330
 331void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm)
 332{
 333    uint64_t *rd = vd;
 334    uint64_t *rn = vn;
 335    uint64_t *rm = vm;
 336    union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
 337    union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
 338    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
 339    int i;
 340
 341    for (i = 0; i < 4; i++) {
 342        uint32_t t = cho(CR_ST_WORD(n, 0), CR_ST_WORD(n, 1), CR_ST_WORD(n, 2))
 343                     + CR_ST_WORD(n, 3) + S1(CR_ST_WORD(n, 0))
 344                     + CR_ST_WORD(m, i);
 345
 346        CR_ST_WORD(n, 3) = CR_ST_WORD(n, 2);
 347        CR_ST_WORD(n, 2) = CR_ST_WORD(n, 1);
 348        CR_ST_WORD(n, 1) = CR_ST_WORD(n, 0);
 349        CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3) + t;
 350
 351        t += maj(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2))
 352             + S0(CR_ST_WORD(d, 0));
 353
 354        CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
 355        CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1);
 356        CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
 357        CR_ST_WORD(d, 0) = t;
 358    }
 359
 360    rd[0] = d.l[0];
 361    rd[1] = d.l[1];
 362}
 363
 364void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm)
 365{
 366    uint64_t *rd = vd;
 367    uint64_t *rn = vn;
 368    uint64_t *rm = vm;
 369    union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
 370    union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
 371    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
 372    int i;
 373
 374    for (i = 0; i < 4; i++) {
 375        uint32_t t = cho(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2))
 376                     + CR_ST_WORD(d, 3) + S1(CR_ST_WORD(d, 0))
 377                     + CR_ST_WORD(m, i);
 378
 379        CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
 380        CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1);
 381        CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
 382        CR_ST_WORD(d, 0) = CR_ST_WORD(n, 3 - i) + t;
 383    }
 384
 385    rd[0] = d.l[0];
 386    rd[1] = d.l[1];
 387}
 388
 389void HELPER(crypto_sha256su0)(void *vd, void *vm)
 390{
 391    uint64_t *rd = vd;
 392    uint64_t *rm = vm;
 393    union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
 394    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
 395
 396    CR_ST_WORD(d, 0) += s0(CR_ST_WORD(d, 1));
 397    CR_ST_WORD(d, 1) += s0(CR_ST_WORD(d, 2));
 398    CR_ST_WORD(d, 2) += s0(CR_ST_WORD(d, 3));
 399    CR_ST_WORD(d, 3) += s0(CR_ST_WORD(m, 0));
 400
 401    rd[0] = d.l[0];
 402    rd[1] = d.l[1];
 403}
 404
 405void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm)
 406{
 407    uint64_t *rd = vd;
 408    uint64_t *rn = vn;
 409    uint64_t *rm = vm;
 410    union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
 411    union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
 412    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
 413
 414    CR_ST_WORD(d, 0) += s1(CR_ST_WORD(m, 2)) + CR_ST_WORD(n, 1);
 415    CR_ST_WORD(d, 1) += s1(CR_ST_WORD(m, 3)) + CR_ST_WORD(n, 2);
 416    CR_ST_WORD(d, 2) += s1(CR_ST_WORD(d, 0)) + CR_ST_WORD(n, 3);
 417    CR_ST_WORD(d, 3) += s1(CR_ST_WORD(d, 1)) + CR_ST_WORD(m, 0);
 418
 419    rd[0] = d.l[0];
 420    rd[1] = d.l[1];
 421}
 422
 423/*
 424 * The SHA-512 logical functions (same as above but using 64-bit operands)
 425 */
 426
 427static uint64_t cho512(uint64_t x, uint64_t y, uint64_t z)
 428{
 429    return (x & (y ^ z)) ^ z;
 430}
 431
 432static uint64_t maj512(uint64_t x, uint64_t y, uint64_t z)
 433{
 434    return (x & y) | ((x | y) & z);
 435}
 436
 437static uint64_t S0_512(uint64_t x)
 438{
 439    return ror64(x, 28) ^ ror64(x, 34) ^ ror64(x, 39);
 440}
 441
 442static uint64_t S1_512(uint64_t x)
 443{
 444    return ror64(x, 14) ^ ror64(x, 18) ^ ror64(x, 41);
 445}
 446
 447static uint64_t s0_512(uint64_t x)
 448{
 449    return ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7);
 450}
 451
 452static uint64_t s1_512(uint64_t x)
 453{
 454    return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6);
 455}
 456
 457void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm)
 458{
 459    uint64_t *rd = vd;
 460    uint64_t *rn = vn;
 461    uint64_t *rm = vm;
 462    uint64_t d0 = rd[0];
 463    uint64_t d1 = rd[1];
 464
 465    d1 += S1_512(rm[1]) + cho512(rm[1], rn[0], rn[1]);
 466    d0 += S1_512(d1 + rm[0]) + cho512(d1 + rm[0], rm[1], rn[0]);
 467
 468    rd[0] = d0;
 469    rd[1] = d1;
 470}
 471
 472void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm)
 473{
 474    uint64_t *rd = vd;
 475    uint64_t *rn = vn;
 476    uint64_t *rm = vm;
 477    uint64_t d0 = rd[0];
 478    uint64_t d1 = rd[1];
 479
 480    d1 += S0_512(rm[0]) + maj512(rn[0], rm[1], rm[0]);
 481    d0 += S0_512(d1) + maj512(d1, rm[0], rm[1]);
 482
 483    rd[0] = d0;
 484    rd[1] = d1;
 485}
 486
 487void HELPER(crypto_sha512su0)(void *vd, void *vn)
 488{
 489    uint64_t *rd = vd;
 490    uint64_t *rn = vn;
 491    uint64_t d0 = rd[0];
 492    uint64_t d1 = rd[1];
 493
 494    d0 += s0_512(rd[1]);
 495    d1 += s0_512(rn[0]);
 496
 497    rd[0] = d0;
 498    rd[1] = d1;
 499}
 500
 501void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm)
 502{
 503    uint64_t *rd = vd;
 504    uint64_t *rn = vn;
 505    uint64_t *rm = vm;
 506
 507    rd[0] += s1_512(rn[0]) + rm[0];
 508    rd[1] += s1_512(rn[1]) + rm[1];
 509}
 510
 511void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm)
 512{
 513    uint64_t *rd = vd;
 514    uint64_t *rn = vn;
 515    uint64_t *rm = vm;
 516    union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
 517    union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
 518    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
 519    uint32_t t;
 520
 521    t = CR_ST_WORD(d, 0) ^ CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 1), 17);
 522    CR_ST_WORD(d, 0) = t ^ ror32(t, 17) ^ ror32(t, 9);
 523
 524    t = CR_ST_WORD(d, 1) ^ CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 2), 17);
 525    CR_ST_WORD(d, 1) = t ^ ror32(t, 17) ^ ror32(t, 9);
 526
 527    t = CR_ST_WORD(d, 2) ^ CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 3), 17);
 528    CR_ST_WORD(d, 2) = t ^ ror32(t, 17) ^ ror32(t, 9);
 529
 530    t = CR_ST_WORD(d, 3) ^ CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 0), 17);
 531    CR_ST_WORD(d, 3) = t ^ ror32(t, 17) ^ ror32(t, 9);
 532
 533    rd[0] = d.l[0];
 534    rd[1] = d.l[1];
 535}
 536
 537void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm)
 538{
 539    uint64_t *rd = vd;
 540    uint64_t *rn = vn;
 541    uint64_t *rm = vm;
 542    union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
 543    union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
 544    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
 545    uint32_t t = CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 0), 25);
 546
 547    CR_ST_WORD(d, 0) ^= t;
 548    CR_ST_WORD(d, 1) ^= CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 1), 25);
 549    CR_ST_WORD(d, 2) ^= CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 2), 25);
 550    CR_ST_WORD(d, 3) ^= CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(m, 3), 25) ^
 551                        ror32(t, 17) ^ ror32(t, 2) ^ ror32(t, 26);
 552
 553    rd[0] = d.l[0];
 554    rd[1] = d.l[1];
 555}
 556
 557void HELPER(crypto_sm3tt)(void *vd, void *vn, void *vm, uint32_t imm2,
 558                          uint32_t opcode)
 559{
 560    uint64_t *rd = vd;
 561    uint64_t *rn = vn;
 562    uint64_t *rm = vm;
 563    union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
 564    union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
 565    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
 566    uint32_t t;
 567
 568    assert(imm2 < 4);
 569
 570    if (opcode == 0 || opcode == 2) {
 571        /* SM3TT1A, SM3TT2A */
 572        t = par(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
 573    } else if (opcode == 1) {
 574        /* SM3TT1B */
 575        t = maj(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
 576    } else if (opcode == 3) {
 577        /* SM3TT2B */
 578        t = cho(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
 579    } else {
 580        g_assert_not_reached();
 581    }
 582
 583    t += CR_ST_WORD(d, 0) + CR_ST_WORD(m, imm2);
 584
 585    CR_ST_WORD(d, 0) = CR_ST_WORD(d, 1);
 586
 587    if (opcode < 2) {
 588        /* SM3TT1A, SM3TT1B */
 589        t += CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 3), 20);
 590
 591        CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 23);
 592    } else {
 593        /* SM3TT2A, SM3TT2B */
 594        t += CR_ST_WORD(n, 3);
 595        t ^= rol32(t, 9) ^ rol32(t, 17);
 596
 597        CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 13);
 598    }
 599
 600    CR_ST_WORD(d, 2) = CR_ST_WORD(d, 3);
 601    CR_ST_WORD(d, 3) = t;
 602
 603    rd[0] = d.l[0];
 604    rd[1] = d.l[1];
 605}
 606
 607static uint8_t const sm4_sbox[] = {
 608    0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7,
 609    0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05,
 610    0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3,
 611    0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99,
 612    0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a,
 613    0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62,
 614    0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95,
 615    0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6,
 616    0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba,
 617    0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8,
 618    0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b,
 619    0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35,
 620    0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2,
 621    0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87,
 622    0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52,
 623    0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e,
 624    0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5,
 625    0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1,
 626    0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55,
 627    0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3,
 628    0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60,
 629    0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f,
 630    0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f,
 631    0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51,
 632    0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f,
 633    0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8,
 634    0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd,
 635    0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0,
 636    0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e,
 637    0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84,
 638    0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20,
 639    0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48,
 640};
 641
 642void HELPER(crypto_sm4e)(void *vd, void *vn)
 643{
 644    uint64_t *rd = vd;
 645    uint64_t *rn = vn;
 646    union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
 647    union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
 648    uint32_t t, i;
 649
 650    for (i = 0; i < 4; i++) {
 651        t = CR_ST_WORD(d, (i + 1) % 4) ^
 652            CR_ST_WORD(d, (i + 2) % 4) ^
 653            CR_ST_WORD(d, (i + 3) % 4) ^
 654            CR_ST_WORD(n, i);
 655
 656        t = sm4_sbox[t & 0xff] |
 657            sm4_sbox[(t >> 8) & 0xff] << 8 |
 658            sm4_sbox[(t >> 16) & 0xff] << 16 |
 659            sm4_sbox[(t >> 24) & 0xff] << 24;
 660
 661        CR_ST_WORD(d, i) ^= t ^ rol32(t, 2) ^ rol32(t, 10) ^ rol32(t, 18) ^
 662                            rol32(t, 24);
 663    }
 664
 665    rd[0] = d.l[0];
 666    rd[1] = d.l[1];
 667}
 668
 669void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm)
 670{
 671    uint64_t *rd = vd;
 672    uint64_t *rn = vn;
 673    uint64_t *rm = vm;
 674    union CRYPTO_STATE d;
 675    union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
 676    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
 677    uint32_t t, i;
 678
 679    d = n;
 680    for (i = 0; i < 4; i++) {
 681        t = CR_ST_WORD(d, (i + 1) % 4) ^
 682            CR_ST_WORD(d, (i + 2) % 4) ^
 683            CR_ST_WORD(d, (i + 3) % 4) ^
 684            CR_ST_WORD(m, i);
 685
 686        t = sm4_sbox[t & 0xff] |
 687            sm4_sbox[(t >> 8) & 0xff] << 8 |
 688            sm4_sbox[(t >> 16) & 0xff] << 16 |
 689            sm4_sbox[(t >> 24) & 0xff] << 24;
 690
 691        CR_ST_WORD(d, i) ^= t ^ rol32(t, 13) ^ rol32(t, 23);
 692    }
 693
 694    rd[0] = d.l[0];
 695    rd[1] = d.l[1];
 696}
 697