qemu/target/arm/tcg/crypto_helper.c
<<
>>
Prefs
   1/*
   2 * crypto_helper.c - emulate v8 Crypto Extensions instructions
   3 *
   4 * Copyright (C) 2013 - 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
   5 *
   6 * This library is free software; you can redistribute it and/or
   7 * modify it under the terms of the GNU Lesser General Public
   8 * License as published by the Free Software Foundation; either
   9 * version 2.1 of the License, or (at your option) any later version.
  10 */
  11
  12#include "qemu/osdep.h"
  13
  14#include "cpu.h"
  15#include "exec/helper-proto.h"
  16#include "tcg/tcg-gvec-desc.h"
  17#include "crypto/aes-round.h"
  18#include "crypto/sm4.h"
  19#include "vec_internal.h"
  20
  21union CRYPTO_STATE {
  22    uint8_t    bytes[16];
  23    uint32_t   words[4];
  24    uint64_t   l[2];
  25};
  26
  27#if HOST_BIG_ENDIAN
  28#define CR_ST_BYTE(state, i)   ((state).bytes[(15 - (i)) ^ 8])
  29#define CR_ST_WORD(state, i)   ((state).words[(3 - (i)) ^ 2])
  30#else
  31#define CR_ST_BYTE(state, i)   ((state).bytes[i])
  32#define CR_ST_WORD(state, i)   ((state).words[i])
  33#endif
  34
  35/*
  36 * The caller has not been converted to full gvec, and so only
  37 * modifies the low 16 bytes of the vector register.
  38 */
  39static void clear_tail_16(void *vd, uint32_t desc)
  40{
  41    int opr_sz = simd_oprsz(desc);
  42    int max_sz = simd_maxsz(desc);
  43
  44    assert(opr_sz == 16);
  45    clear_tail(vd, opr_sz, max_sz);
  46}
  47
  48static const AESState aes_zero = { };
  49
  50void HELPER(crypto_aese)(void *vd, void *vn, void *vm, uint32_t desc)
  51{
  52    intptr_t i, opr_sz = simd_oprsz(desc);
  53
  54    for (i = 0; i < opr_sz; i += 16) {
  55        AESState *ad = (AESState *)(vd + i);
  56        AESState *st = (AESState *)(vn + i);
  57        AESState *rk = (AESState *)(vm + i);
  58        AESState t;
  59
  60        /*
  61         * Our uint64_t are in the wrong order for big-endian.
  62         * The Arm AddRoundKey comes first, while the API AddRoundKey
  63         * comes last: perform the xor here, and provide zero to API.
  64         */
  65        if (HOST_BIG_ENDIAN) {
  66            t.d[0] = st->d[1] ^ rk->d[1];
  67            t.d[1] = st->d[0] ^ rk->d[0];
  68            aesenc_SB_SR_AK(&t, &t, &aes_zero, false);
  69            ad->d[0] = t.d[1];
  70            ad->d[1] = t.d[0];
  71        } else {
  72            t.v = st->v ^ rk->v;
  73            aesenc_SB_SR_AK(ad, &t, &aes_zero, false);
  74        }
  75    }
  76    clear_tail(vd, opr_sz, simd_maxsz(desc));
  77}
  78
  79void HELPER(crypto_aesd)(void *vd, void *vn, void *vm, uint32_t desc)
  80{
  81    intptr_t i, opr_sz = simd_oprsz(desc);
  82
  83    for (i = 0; i < opr_sz; i += 16) {
  84        AESState *ad = (AESState *)(vd + i);
  85        AESState *st = (AESState *)(vn + i);
  86        AESState *rk = (AESState *)(vm + i);
  87        AESState t;
  88
  89        /* Our uint64_t are in the wrong order for big-endian. */
  90        if (HOST_BIG_ENDIAN) {
  91            t.d[0] = st->d[1] ^ rk->d[1];
  92            t.d[1] = st->d[0] ^ rk->d[0];
  93            aesdec_ISB_ISR_AK(&t, &t, &aes_zero, false);
  94            ad->d[0] = t.d[1];
  95            ad->d[1] = t.d[0];
  96        } else {
  97            t.v = st->v ^ rk->v;
  98            aesdec_ISB_ISR_AK(ad, &t, &aes_zero, false);
  99        }
 100    }
 101    clear_tail(vd, opr_sz, simd_maxsz(desc));
 102}
 103
 104void HELPER(crypto_aesmc)(void *vd, void *vm, uint32_t desc)
 105{
 106    intptr_t i, opr_sz = simd_oprsz(desc);
 107
 108    for (i = 0; i < opr_sz; i += 16) {
 109        AESState *ad = (AESState *)(vd + i);
 110        AESState *st = (AESState *)(vm + i);
 111        AESState t;
 112
 113        /* Our uint64_t are in the wrong order for big-endian. */
 114        if (HOST_BIG_ENDIAN) {
 115            t.d[0] = st->d[1];
 116            t.d[1] = st->d[0];
 117            aesenc_MC(&t, &t, false);
 118            ad->d[0] = t.d[1];
 119            ad->d[1] = t.d[0];
 120        } else {
 121            aesenc_MC(ad, st, false);
 122        }
 123    }
 124    clear_tail(vd, opr_sz, simd_maxsz(desc));
 125}
 126
 127void HELPER(crypto_aesimc)(void *vd, void *vm, uint32_t desc)
 128{
 129    intptr_t i, opr_sz = simd_oprsz(desc);
 130
 131    for (i = 0; i < opr_sz; i += 16) {
 132        AESState *ad = (AESState *)(vd + i);
 133        AESState *st = (AESState *)(vm + i);
 134        AESState t;
 135
 136        /* Our uint64_t are in the wrong order for big-endian. */
 137        if (HOST_BIG_ENDIAN) {
 138            t.d[0] = st->d[1];
 139            t.d[1] = st->d[0];
 140            aesdec_IMC(&t, &t, false);
 141            ad->d[0] = t.d[1];
 142            ad->d[1] = t.d[0];
 143        } else {
 144            aesdec_IMC(ad, st, false);
 145        }
 146    }
 147    clear_tail(vd, opr_sz, simd_maxsz(desc));
 148}
 149
 150/*
 151 * SHA-1 logical functions
 152 */
 153
 154static uint32_t cho(uint32_t x, uint32_t y, uint32_t z)
 155{
 156    return (x & (y ^ z)) ^ z;
 157}
 158
 159static uint32_t par(uint32_t x, uint32_t y, uint32_t z)
 160{
 161    return x ^ y ^ z;
 162}
 163
 164static uint32_t maj(uint32_t x, uint32_t y, uint32_t z)
 165{
 166    return (x & y) | ((x | y) & z);
 167}
 168
 169void HELPER(crypto_sha1su0)(void *vd, void *vn, void *vm, uint32_t desc)
 170{
 171    uint64_t *d = vd, *n = vn, *m = vm;
 172    uint64_t d0, d1;
 173
 174    d0 = d[1] ^ d[0] ^ m[0];
 175    d1 = n[0] ^ d[1] ^ m[1];
 176    d[0] = d0;
 177    d[1] = d1;
 178
 179    clear_tail_16(vd, desc);
 180}
 181
 182static inline void crypto_sha1_3reg(uint64_t *rd, uint64_t *rn,
 183                                    uint64_t *rm, uint32_t desc,
 184                                    uint32_t (*fn)(union CRYPTO_STATE *d))
 185{
 186    union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
 187    union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
 188    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
 189    int i;
 190
 191    for (i = 0; i < 4; i++) {
 192        uint32_t t = fn(&d);
 193
 194        t += rol32(CR_ST_WORD(d, 0), 5) + CR_ST_WORD(n, 0)
 195             + CR_ST_WORD(m, i);
 196
 197        CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3);
 198        CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
 199        CR_ST_WORD(d, 2) = ror32(CR_ST_WORD(d, 1), 2);
 200        CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
 201        CR_ST_WORD(d, 0) = t;
 202    }
 203    rd[0] = d.l[0];
 204    rd[1] = d.l[1];
 205
 206    clear_tail_16(rd, desc);
 207}
 208
 209static uint32_t do_sha1c(union CRYPTO_STATE *d)
 210{
 211    return cho(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
 212}
 213
 214void HELPER(crypto_sha1c)(void *vd, void *vn, void *vm, uint32_t desc)
 215{
 216    crypto_sha1_3reg(vd, vn, vm, desc, do_sha1c);
 217}
 218
 219static uint32_t do_sha1p(union CRYPTO_STATE *d)
 220{
 221    return par(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
 222}
 223
 224void HELPER(crypto_sha1p)(void *vd, void *vn, void *vm, uint32_t desc)
 225{
 226    crypto_sha1_3reg(vd, vn, vm, desc, do_sha1p);
 227}
 228
 229static uint32_t do_sha1m(union CRYPTO_STATE *d)
 230{
 231    return maj(CR_ST_WORD(*d, 1), CR_ST_WORD(*d, 2), CR_ST_WORD(*d, 3));
 232}
 233
 234void HELPER(crypto_sha1m)(void *vd, void *vn, void *vm, uint32_t desc)
 235{
 236    crypto_sha1_3reg(vd, vn, vm, desc, do_sha1m);
 237}
 238
 239void HELPER(crypto_sha1h)(void *vd, void *vm, uint32_t desc)
 240{
 241    uint64_t *rd = vd;
 242    uint64_t *rm = vm;
 243    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
 244
 245    CR_ST_WORD(m, 0) = ror32(CR_ST_WORD(m, 0), 2);
 246    CR_ST_WORD(m, 1) = CR_ST_WORD(m, 2) = CR_ST_WORD(m, 3) = 0;
 247
 248    rd[0] = m.l[0];
 249    rd[1] = m.l[1];
 250
 251    clear_tail_16(vd, desc);
 252}
 253
 254void HELPER(crypto_sha1su1)(void *vd, void *vm, uint32_t desc)
 255{
 256    uint64_t *rd = vd;
 257    uint64_t *rm = vm;
 258    union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
 259    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
 260
 261    CR_ST_WORD(d, 0) = rol32(CR_ST_WORD(d, 0) ^ CR_ST_WORD(m, 1), 1);
 262    CR_ST_WORD(d, 1) = rol32(CR_ST_WORD(d, 1) ^ CR_ST_WORD(m, 2), 1);
 263    CR_ST_WORD(d, 2) = rol32(CR_ST_WORD(d, 2) ^ CR_ST_WORD(m, 3), 1);
 264    CR_ST_WORD(d, 3) = rol32(CR_ST_WORD(d, 3) ^ CR_ST_WORD(d, 0), 1);
 265
 266    rd[0] = d.l[0];
 267    rd[1] = d.l[1];
 268
 269    clear_tail_16(vd, desc);
 270}
 271
 272/*
 273 * The SHA-256 logical functions, according to
 274 * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf
 275 */
 276
 277static uint32_t S0(uint32_t x)
 278{
 279    return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22);
 280}
 281
 282static uint32_t S1(uint32_t x)
 283{
 284    return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25);
 285}
 286
 287static uint32_t s0(uint32_t x)
 288{
 289    return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3);
 290}
 291
 292static uint32_t s1(uint32_t x)
 293{
 294    return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10);
 295}
 296
 297void HELPER(crypto_sha256h)(void *vd, void *vn, void *vm, uint32_t desc)
 298{
 299    uint64_t *rd = vd;
 300    uint64_t *rn = vn;
 301    uint64_t *rm = vm;
 302    union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
 303    union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
 304    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
 305    int i;
 306
 307    for (i = 0; i < 4; i++) {
 308        uint32_t t = cho(CR_ST_WORD(n, 0), CR_ST_WORD(n, 1), CR_ST_WORD(n, 2))
 309                     + CR_ST_WORD(n, 3) + S1(CR_ST_WORD(n, 0))
 310                     + CR_ST_WORD(m, i);
 311
 312        CR_ST_WORD(n, 3) = CR_ST_WORD(n, 2);
 313        CR_ST_WORD(n, 2) = CR_ST_WORD(n, 1);
 314        CR_ST_WORD(n, 1) = CR_ST_WORD(n, 0);
 315        CR_ST_WORD(n, 0) = CR_ST_WORD(d, 3) + t;
 316
 317        t += maj(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2))
 318             + S0(CR_ST_WORD(d, 0));
 319
 320        CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
 321        CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1);
 322        CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
 323        CR_ST_WORD(d, 0) = t;
 324    }
 325
 326    rd[0] = d.l[0];
 327    rd[1] = d.l[1];
 328
 329    clear_tail_16(vd, desc);
 330}
 331
 332void HELPER(crypto_sha256h2)(void *vd, void *vn, void *vm, uint32_t desc)
 333{
 334    uint64_t *rd = vd;
 335    uint64_t *rn = vn;
 336    uint64_t *rm = vm;
 337    union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
 338    union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
 339    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
 340    int i;
 341
 342    for (i = 0; i < 4; i++) {
 343        uint32_t t = cho(CR_ST_WORD(d, 0), CR_ST_WORD(d, 1), CR_ST_WORD(d, 2))
 344                     + CR_ST_WORD(d, 3) + S1(CR_ST_WORD(d, 0))
 345                     + CR_ST_WORD(m, i);
 346
 347        CR_ST_WORD(d, 3) = CR_ST_WORD(d, 2);
 348        CR_ST_WORD(d, 2) = CR_ST_WORD(d, 1);
 349        CR_ST_WORD(d, 1) = CR_ST_WORD(d, 0);
 350        CR_ST_WORD(d, 0) = CR_ST_WORD(n, 3 - i) + t;
 351    }
 352
 353    rd[0] = d.l[0];
 354    rd[1] = d.l[1];
 355
 356    clear_tail_16(vd, desc);
 357}
 358
 359void HELPER(crypto_sha256su0)(void *vd, void *vm, uint32_t desc)
 360{
 361    uint64_t *rd = vd;
 362    uint64_t *rm = vm;
 363    union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
 364    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
 365
 366    CR_ST_WORD(d, 0) += s0(CR_ST_WORD(d, 1));
 367    CR_ST_WORD(d, 1) += s0(CR_ST_WORD(d, 2));
 368    CR_ST_WORD(d, 2) += s0(CR_ST_WORD(d, 3));
 369    CR_ST_WORD(d, 3) += s0(CR_ST_WORD(m, 0));
 370
 371    rd[0] = d.l[0];
 372    rd[1] = d.l[1];
 373
 374    clear_tail_16(vd, desc);
 375}
 376
 377void HELPER(crypto_sha256su1)(void *vd, void *vn, void *vm, uint32_t desc)
 378{
 379    uint64_t *rd = vd;
 380    uint64_t *rn = vn;
 381    uint64_t *rm = vm;
 382    union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
 383    union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
 384    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
 385
 386    CR_ST_WORD(d, 0) += s1(CR_ST_WORD(m, 2)) + CR_ST_WORD(n, 1);
 387    CR_ST_WORD(d, 1) += s1(CR_ST_WORD(m, 3)) + CR_ST_WORD(n, 2);
 388    CR_ST_WORD(d, 2) += s1(CR_ST_WORD(d, 0)) + CR_ST_WORD(n, 3);
 389    CR_ST_WORD(d, 3) += s1(CR_ST_WORD(d, 1)) + CR_ST_WORD(m, 0);
 390
 391    rd[0] = d.l[0];
 392    rd[1] = d.l[1];
 393
 394    clear_tail_16(vd, desc);
 395}
 396
 397/*
 398 * The SHA-512 logical functions (same as above but using 64-bit operands)
 399 */
 400
 401static uint64_t cho512(uint64_t x, uint64_t y, uint64_t z)
 402{
 403    return (x & (y ^ z)) ^ z;
 404}
 405
 406static uint64_t maj512(uint64_t x, uint64_t y, uint64_t z)
 407{
 408    return (x & y) | ((x | y) & z);
 409}
 410
 411static uint64_t S0_512(uint64_t x)
 412{
 413    return ror64(x, 28) ^ ror64(x, 34) ^ ror64(x, 39);
 414}
 415
 416static uint64_t S1_512(uint64_t x)
 417{
 418    return ror64(x, 14) ^ ror64(x, 18) ^ ror64(x, 41);
 419}
 420
 421static uint64_t s0_512(uint64_t x)
 422{
 423    return ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7);
 424}
 425
 426static uint64_t s1_512(uint64_t x)
 427{
 428    return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6);
 429}
 430
 431void HELPER(crypto_sha512h)(void *vd, void *vn, void *vm, uint32_t desc)
 432{
 433    uint64_t *rd = vd;
 434    uint64_t *rn = vn;
 435    uint64_t *rm = vm;
 436    uint64_t d0 = rd[0];
 437    uint64_t d1 = rd[1];
 438
 439    d1 += S1_512(rm[1]) + cho512(rm[1], rn[0], rn[1]);
 440    d0 += S1_512(d1 + rm[0]) + cho512(d1 + rm[0], rm[1], rn[0]);
 441
 442    rd[0] = d0;
 443    rd[1] = d1;
 444
 445    clear_tail_16(vd, desc);
 446}
 447
 448void HELPER(crypto_sha512h2)(void *vd, void *vn, void *vm, uint32_t desc)
 449{
 450    uint64_t *rd = vd;
 451    uint64_t *rn = vn;
 452    uint64_t *rm = vm;
 453    uint64_t d0 = rd[0];
 454    uint64_t d1 = rd[1];
 455
 456    d1 += S0_512(rm[0]) + maj512(rn[0], rm[1], rm[0]);
 457    d0 += S0_512(d1) + maj512(d1, rm[0], rm[1]);
 458
 459    rd[0] = d0;
 460    rd[1] = d1;
 461
 462    clear_tail_16(vd, desc);
 463}
 464
 465void HELPER(crypto_sha512su0)(void *vd, void *vn, uint32_t desc)
 466{
 467    uint64_t *rd = vd;
 468    uint64_t *rn = vn;
 469    uint64_t d0 = rd[0];
 470    uint64_t d1 = rd[1];
 471
 472    d0 += s0_512(rd[1]);
 473    d1 += s0_512(rn[0]);
 474
 475    rd[0] = d0;
 476    rd[1] = d1;
 477
 478    clear_tail_16(vd, desc);
 479}
 480
 481void HELPER(crypto_sha512su1)(void *vd, void *vn, void *vm, uint32_t desc)
 482{
 483    uint64_t *rd = vd;
 484    uint64_t *rn = vn;
 485    uint64_t *rm = vm;
 486
 487    rd[0] += s1_512(rn[0]) + rm[0];
 488    rd[1] += s1_512(rn[1]) + rm[1];
 489
 490    clear_tail_16(vd, desc);
 491}
 492
 493void HELPER(crypto_sm3partw1)(void *vd, void *vn, void *vm, uint32_t desc)
 494{
 495    uint64_t *rd = vd;
 496    uint64_t *rn = vn;
 497    uint64_t *rm = vm;
 498    union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
 499    union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
 500    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
 501    uint32_t t;
 502
 503    t = CR_ST_WORD(d, 0) ^ CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 1), 17);
 504    CR_ST_WORD(d, 0) = t ^ ror32(t, 17) ^ ror32(t, 9);
 505
 506    t = CR_ST_WORD(d, 1) ^ CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 2), 17);
 507    CR_ST_WORD(d, 1) = t ^ ror32(t, 17) ^ ror32(t, 9);
 508
 509    t = CR_ST_WORD(d, 2) ^ CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 3), 17);
 510    CR_ST_WORD(d, 2) = t ^ ror32(t, 17) ^ ror32(t, 9);
 511
 512    t = CR_ST_WORD(d, 3) ^ CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 0), 17);
 513    CR_ST_WORD(d, 3) = t ^ ror32(t, 17) ^ ror32(t, 9);
 514
 515    rd[0] = d.l[0];
 516    rd[1] = d.l[1];
 517
 518    clear_tail_16(vd, desc);
 519}
 520
 521void HELPER(crypto_sm3partw2)(void *vd, void *vn, void *vm, uint32_t desc)
 522{
 523    uint64_t *rd = vd;
 524    uint64_t *rn = vn;
 525    uint64_t *rm = vm;
 526    union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
 527    union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
 528    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
 529    uint32_t t = CR_ST_WORD(n, 0) ^ ror32(CR_ST_WORD(m, 0), 25);
 530
 531    CR_ST_WORD(d, 0) ^= t;
 532    CR_ST_WORD(d, 1) ^= CR_ST_WORD(n, 1) ^ ror32(CR_ST_WORD(m, 1), 25);
 533    CR_ST_WORD(d, 2) ^= CR_ST_WORD(n, 2) ^ ror32(CR_ST_WORD(m, 2), 25);
 534    CR_ST_WORD(d, 3) ^= CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(m, 3), 25) ^
 535                        ror32(t, 17) ^ ror32(t, 2) ^ ror32(t, 26);
 536
 537    rd[0] = d.l[0];
 538    rd[1] = d.l[1];
 539
 540    clear_tail_16(vd, desc);
 541}
 542
 543static inline void QEMU_ALWAYS_INLINE
 544crypto_sm3tt(uint64_t *rd, uint64_t *rn, uint64_t *rm,
 545             uint32_t desc, uint32_t opcode)
 546{
 547    union CRYPTO_STATE d = { .l = { rd[0], rd[1] } };
 548    union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
 549    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
 550    uint32_t imm2 = simd_data(desc);
 551    uint32_t t;
 552
 553    assert(imm2 < 4);
 554
 555    if (opcode == 0 || opcode == 2) {
 556        /* SM3TT1A, SM3TT2A */
 557        t = par(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
 558    } else if (opcode == 1) {
 559        /* SM3TT1B */
 560        t = maj(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
 561    } else if (opcode == 3) {
 562        /* SM3TT2B */
 563        t = cho(CR_ST_WORD(d, 3), CR_ST_WORD(d, 2), CR_ST_WORD(d, 1));
 564    } else {
 565        qemu_build_not_reached();
 566    }
 567
 568    t += CR_ST_WORD(d, 0) + CR_ST_WORD(m, imm2);
 569
 570    CR_ST_WORD(d, 0) = CR_ST_WORD(d, 1);
 571
 572    if (opcode < 2) {
 573        /* SM3TT1A, SM3TT1B */
 574        t += CR_ST_WORD(n, 3) ^ ror32(CR_ST_WORD(d, 3), 20);
 575
 576        CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 23);
 577    } else {
 578        /* SM3TT2A, SM3TT2B */
 579        t += CR_ST_WORD(n, 3);
 580        t ^= rol32(t, 9) ^ rol32(t, 17);
 581
 582        CR_ST_WORD(d, 1) = ror32(CR_ST_WORD(d, 2), 13);
 583    }
 584
 585    CR_ST_WORD(d, 2) = CR_ST_WORD(d, 3);
 586    CR_ST_WORD(d, 3) = t;
 587
 588    rd[0] = d.l[0];
 589    rd[1] = d.l[1];
 590
 591    clear_tail_16(rd, desc);
 592}
 593
 594#define DO_SM3TT(NAME, OPCODE) \
 595    void HELPER(NAME)(void *vd, void *vn, void *vm, uint32_t desc) \
 596    { crypto_sm3tt(vd, vn, vm, desc, OPCODE); }
 597
 598DO_SM3TT(crypto_sm3tt1a, 0)
 599DO_SM3TT(crypto_sm3tt1b, 1)
 600DO_SM3TT(crypto_sm3tt2a, 2)
 601DO_SM3TT(crypto_sm3tt2b, 3)
 602
 603#undef DO_SM3TT
 604
 605static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm)
 606{
 607    union CRYPTO_STATE d = { .l = { rn[0], rn[1] } };
 608    union CRYPTO_STATE n = { .l = { rm[0], rm[1] } };
 609    uint32_t t, i;
 610
 611    for (i = 0; i < 4; i++) {
 612        t = CR_ST_WORD(d, (i + 1) % 4) ^
 613            CR_ST_WORD(d, (i + 2) % 4) ^
 614            CR_ST_WORD(d, (i + 3) % 4) ^
 615            CR_ST_WORD(n, i);
 616
 617        t = sm4_sbox[t & 0xff] |
 618            sm4_sbox[(t >> 8) & 0xff] << 8 |
 619            sm4_sbox[(t >> 16) & 0xff] << 16 |
 620            sm4_sbox[(t >> 24) & 0xff] << 24;
 621
 622        CR_ST_WORD(d, i) ^= t ^ rol32(t, 2) ^ rol32(t, 10) ^ rol32(t, 18) ^
 623                            rol32(t, 24);
 624    }
 625
 626    rd[0] = d.l[0];
 627    rd[1] = d.l[1];
 628}
 629
 630void HELPER(crypto_sm4e)(void *vd, void *vn, void *vm, uint32_t desc)
 631{
 632    intptr_t i, opr_sz = simd_oprsz(desc);
 633
 634    for (i = 0; i < opr_sz; i += 16) {
 635        do_crypto_sm4e(vd + i, vn + i, vm + i);
 636    }
 637    clear_tail(vd, opr_sz, simd_maxsz(desc));
 638}
 639
 640static void do_crypto_sm4ekey(uint64_t *rd, uint64_t *rn, uint64_t *rm)
 641{
 642    union CRYPTO_STATE d;
 643    union CRYPTO_STATE n = { .l = { rn[0], rn[1] } };
 644    union CRYPTO_STATE m = { .l = { rm[0], rm[1] } };
 645    uint32_t t, i;
 646
 647    d = n;
 648    for (i = 0; i < 4; i++) {
 649        t = CR_ST_WORD(d, (i + 1) % 4) ^
 650            CR_ST_WORD(d, (i + 2) % 4) ^
 651            CR_ST_WORD(d, (i + 3) % 4) ^
 652            CR_ST_WORD(m, i);
 653
 654        t = sm4_sbox[t & 0xff] |
 655            sm4_sbox[(t >> 8) & 0xff] << 8 |
 656            sm4_sbox[(t >> 16) & 0xff] << 16 |
 657            sm4_sbox[(t >> 24) & 0xff] << 24;
 658
 659        CR_ST_WORD(d, i) ^= t ^ rol32(t, 13) ^ rol32(t, 23);
 660    }
 661
 662    rd[0] = d.l[0];
 663    rd[1] = d.l[1];
 664}
 665
 666void HELPER(crypto_sm4ekey)(void *vd, void *vn, void* vm, uint32_t desc)
 667{
 668    intptr_t i, opr_sz = simd_oprsz(desc);
 669
 670    for (i = 0; i < opr_sz; i += 16) {
 671        do_crypto_sm4ekey(vd + i, vn + i, vm + i);
 672    }
 673    clear_tail(vd, opr_sz, simd_maxsz(desc));
 674}
 675
 676void HELPER(crypto_rax1)(void *vd, void *vn, void *vm, uint32_t desc)
 677{
 678    intptr_t i, opr_sz = simd_oprsz(desc);
 679    uint64_t *d = vd, *n = vn, *m = vm;
 680
 681    for (i = 0; i < opr_sz / 8; ++i) {
 682        d[i] = n[i] ^ rol64(m[i], 1);
 683    }
 684    clear_tail(vd, opr_sz, simd_maxsz(desc));
 685}
 686