linux/drivers/crypto/sunxi-ss/sun4i-ss-hash.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * sun4i-ss-hash.c - hardware cryptographic accelerator for Allwinner A20 SoC
   4 *
   5 * Copyright (C) 2013-2015 Corentin LABBE <clabbe.montjoie@gmail.com>
   6 *
   7 * This file add support for MD5 and SHA1.
   8 *
   9 * You could find the datasheet in Documentation/arm/sunxi.rst
  10 */
  11#include "sun4i-ss.h"
  12#include <linux/scatterlist.h>
  13
  14/* This is a totally arbitrary value */
  15#define SS_TIMEOUT 100
  16
  17int sun4i_hash_crainit(struct crypto_tfm *tfm)
  18{
  19        struct sun4i_tfm_ctx *op = crypto_tfm_ctx(tfm);
  20        struct ahash_alg *alg = __crypto_ahash_alg(tfm->__crt_alg);
  21        struct sun4i_ss_alg_template *algt;
  22
  23        memset(op, 0, sizeof(struct sun4i_tfm_ctx));
  24
  25        algt = container_of(alg, struct sun4i_ss_alg_template, alg.hash);
  26        op->ss = algt->ss;
  27
  28        crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
  29                                 sizeof(struct sun4i_req_ctx));
  30        return 0;
  31}
  32
  33/* sun4i_hash_init: initialize request context */
  34int sun4i_hash_init(struct ahash_request *areq)
  35{
  36        struct sun4i_req_ctx *op = ahash_request_ctx(areq);
  37        struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
  38        struct ahash_alg *alg = __crypto_ahash_alg(tfm->base.__crt_alg);
  39        struct sun4i_ss_alg_template *algt;
  40
  41        memset(op, 0, sizeof(struct sun4i_req_ctx));
  42
  43        algt = container_of(alg, struct sun4i_ss_alg_template, alg.hash);
  44        op->mode = algt->mode;
  45
  46        return 0;
  47}
  48
  49int sun4i_hash_export_md5(struct ahash_request *areq, void *out)
  50{
  51        struct sun4i_req_ctx *op = ahash_request_ctx(areq);
  52        struct md5_state *octx = out;
  53        int i;
  54
  55        octx->byte_count = op->byte_count + op->len;
  56
  57        memcpy(octx->block, op->buf, op->len);
  58
  59        if (op->byte_count) {
  60                for (i = 0; i < 4; i++)
  61                        octx->hash[i] = op->hash[i];
  62        } else {
  63                octx->hash[0] = SHA1_H0;
  64                octx->hash[1] = SHA1_H1;
  65                octx->hash[2] = SHA1_H2;
  66                octx->hash[3] = SHA1_H3;
  67        }
  68
  69        return 0;
  70}
  71
  72int sun4i_hash_import_md5(struct ahash_request *areq, const void *in)
  73{
  74        struct sun4i_req_ctx *op = ahash_request_ctx(areq);
  75        const struct md5_state *ictx = in;
  76        int i;
  77
  78        sun4i_hash_init(areq);
  79
  80        op->byte_count = ictx->byte_count & ~0x3F;
  81        op->len = ictx->byte_count & 0x3F;
  82
  83        memcpy(op->buf, ictx->block, op->len);
  84
  85        for (i = 0; i < 4; i++)
  86                op->hash[i] = ictx->hash[i];
  87
  88        return 0;
  89}
  90
  91int sun4i_hash_export_sha1(struct ahash_request *areq, void *out)
  92{
  93        struct sun4i_req_ctx *op = ahash_request_ctx(areq);
  94        struct sha1_state *octx = out;
  95        int i;
  96
  97        octx->count = op->byte_count + op->len;
  98
  99        memcpy(octx->buffer, op->buf, op->len);
 100
 101        if (op->byte_count) {
 102                for (i = 0; i < 5; i++)
 103                        octx->state[i] = op->hash[i];
 104        } else {
 105                octx->state[0] = SHA1_H0;
 106                octx->state[1] = SHA1_H1;
 107                octx->state[2] = SHA1_H2;
 108                octx->state[3] = SHA1_H3;
 109                octx->state[4] = SHA1_H4;
 110        }
 111
 112        return 0;
 113}
 114
 115int sun4i_hash_import_sha1(struct ahash_request *areq, const void *in)
 116{
 117        struct sun4i_req_ctx *op = ahash_request_ctx(areq);
 118        const struct sha1_state *ictx = in;
 119        int i;
 120
 121        sun4i_hash_init(areq);
 122
 123        op->byte_count = ictx->count & ~0x3F;
 124        op->len = ictx->count & 0x3F;
 125
 126        memcpy(op->buf, ictx->buffer, op->len);
 127
 128        for (i = 0; i < 5; i++)
 129                op->hash[i] = ictx->state[i];
 130
 131        return 0;
 132}
 133
 134#define SS_HASH_UPDATE 1
 135#define SS_HASH_FINAL 2
 136
 137/*
 138 * sun4i_hash_update: update hash engine
 139 *
 140 * Could be used for both SHA1 and MD5
 141 * Write data by step of 32bits and put then in the SS.
 142 *
 143 * Since we cannot leave partial data and hash state in the engine,
 144 * we need to get the hash state at the end of this function.
 145 * We can get the hash state every 64 bytes
 146 *
 147 * So the first work is to get the number of bytes to write to SS modulo 64
 148 * The extra bytes will go to a temporary buffer op->buf storing op->len bytes
 149 *
 150 * So at the begin of update()
 151 * if op->len + areq->nbytes < 64
 152 * => all data will be written to wait buffer (op->buf) and end=0
 153 * if not, write all data from op->buf to the device and position end to
 154 * complete to 64bytes
 155 *
 156 * example 1:
 157 * update1 60o => op->len=60
 158 * update2 60o => need one more word to have 64 bytes
 159 * end=4
 160 * so write all data from op->buf and one word of SGs
 161 * write remaining data in op->buf
 162 * final state op->len=56
 163 */
 164static int sun4i_hash(struct ahash_request *areq)
 165{
 166        /*
 167         * i is the total bytes read from SGs, to be compared to areq->nbytes
 168         * i is important because we cannot rely on SG length since the sum of
 169         * SG->length could be greater than areq->nbytes
 170         *
 171         * end is the position when we need to stop writing to the device,
 172         * to be compared to i
 173         *
 174         * in_i: advancement in the current SG
 175         */
 176        unsigned int i = 0, end, fill, min_fill, nwait, nbw = 0, j = 0, todo;
 177        unsigned int in_i = 0;
 178        u32 spaces, rx_cnt = SS_RX_DEFAULT, bf[32] = {0}, wb = 0, v, ivmode = 0;
 179        struct sun4i_req_ctx *op = ahash_request_ctx(areq);
 180        struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
 181        struct sun4i_tfm_ctx *tfmctx = crypto_ahash_ctx(tfm);
 182        struct sun4i_ss_ctx *ss = tfmctx->ss;
 183        struct scatterlist *in_sg = areq->src;
 184        struct sg_mapping_iter mi;
 185        int in_r, err = 0;
 186        size_t copied = 0;
 187
 188        dev_dbg(ss->dev, "%s %s bc=%llu len=%u mode=%x wl=%u h0=%0x",
 189                __func__, crypto_tfm_alg_name(areq->base.tfm),
 190                op->byte_count, areq->nbytes, op->mode,
 191                op->len, op->hash[0]);
 192
 193        if (unlikely(!areq->nbytes) && !(op->flags & SS_HASH_FINAL))
 194                return 0;
 195
 196        /* protect against overflow */
 197        if (unlikely(areq->nbytes > UINT_MAX - op->len)) {
 198                dev_err(ss->dev, "Cannot process too large request\n");
 199                return -EINVAL;
 200        }
 201
 202        if (op->len + areq->nbytes < 64 && !(op->flags & SS_HASH_FINAL)) {
 203                /* linearize data to op->buf */
 204                copied = sg_pcopy_to_buffer(areq->src, sg_nents(areq->src),
 205                                            op->buf + op->len, areq->nbytes, 0);
 206                op->len += copied;
 207                return 0;
 208        }
 209
 210        spin_lock_bh(&ss->slock);
 211
 212        /*
 213         * if some data have been processed before,
 214         * we need to restore the partial hash state
 215         */
 216        if (op->byte_count) {
 217                ivmode = SS_IV_ARBITRARY;
 218                for (i = 0; i < 5; i++)
 219                        writel(op->hash[i], ss->base + SS_IV0 + i * 4);
 220        }
 221        /* Enable the device */
 222        writel(op->mode | SS_ENABLED | ivmode, ss->base + SS_CTL);
 223
 224        if (!(op->flags & SS_HASH_UPDATE))
 225                goto hash_final;
 226
 227        /* start of handling data */
 228        if (!(op->flags & SS_HASH_FINAL)) {
 229                end = ((areq->nbytes + op->len) / 64) * 64 - op->len;
 230
 231                if (end > areq->nbytes || areq->nbytes - end > 63) {
 232                        dev_err(ss->dev, "ERROR: Bound error %u %u\n",
 233                                end, areq->nbytes);
 234                        err = -EINVAL;
 235                        goto release_ss;
 236                }
 237        } else {
 238                /* Since we have the flag final, we can go up to modulo 4 */
 239                if (areq->nbytes < 4)
 240                        end = 0;
 241                else
 242                        end = ((areq->nbytes + op->len) / 4) * 4 - op->len;
 243        }
 244
 245        /* TODO if SGlen % 4 and !op->len then DMA */
 246        i = 1;
 247        while (in_sg && i == 1) {
 248                if (in_sg->length % 4)
 249                        i = 0;
 250                in_sg = sg_next(in_sg);
 251        }
 252        if (i == 1 && !op->len && areq->nbytes)
 253                dev_dbg(ss->dev, "We can DMA\n");
 254
 255        i = 0;
 256        sg_miter_start(&mi, areq->src, sg_nents(areq->src),
 257                       SG_MITER_FROM_SG | SG_MITER_ATOMIC);
 258        sg_miter_next(&mi);
 259        in_i = 0;
 260
 261        do {
 262                /*
 263                 * we need to linearize in two case:
 264                 * - the buffer is already used
 265                 * - the SG does not have enough byte remaining ( < 4)
 266                 */
 267                if (op->len || (mi.length - in_i) < 4) {
 268                        /*
 269                         * if we have entered here we have two reason to stop
 270                         * - the buffer is full
 271                         * - reach the end
 272                         */
 273                        while (op->len < 64 && i < end) {
 274                                /* how many bytes we can read from current SG */
 275                                in_r = min3(mi.length - in_i, end - i,
 276                                            64 - op->len);
 277                                memcpy(op->buf + op->len, mi.addr + in_i, in_r);
 278                                op->len += in_r;
 279                                i += in_r;
 280                                in_i += in_r;
 281                                if (in_i == mi.length) {
 282                                        sg_miter_next(&mi);
 283                                        in_i = 0;
 284                                }
 285                        }
 286                        if (op->len > 3 && !(op->len % 4)) {
 287                                /* write buf to the device */
 288                                writesl(ss->base + SS_RXFIFO, op->buf,
 289                                        op->len / 4);
 290                                op->byte_count += op->len;
 291                                op->len = 0;
 292                        }
 293                }
 294                if (mi.length - in_i > 3 && i < end) {
 295                        /* how many bytes we can read from current SG */
 296                        in_r = min3(mi.length - in_i, areq->nbytes - i,
 297                                    ((mi.length - in_i) / 4) * 4);
 298                        /* how many bytes we can write in the device*/
 299                        todo = min3((u32)(end - i) / 4, rx_cnt, (u32)in_r / 4);
 300                        writesl(ss->base + SS_RXFIFO, mi.addr + in_i, todo);
 301                        op->byte_count += todo * 4;
 302                        i += todo * 4;
 303                        in_i += todo * 4;
 304                        rx_cnt -= todo;
 305                        if (!rx_cnt) {
 306                                spaces = readl(ss->base + SS_FCSR);
 307                                rx_cnt = SS_RXFIFO_SPACES(spaces);
 308                        }
 309                        if (in_i == mi.length) {
 310                                sg_miter_next(&mi);
 311                                in_i = 0;
 312                        }
 313                }
 314        } while (i < end);
 315
 316        /*
 317         * Now we have written to the device all that we can,
 318         * store the remaining bytes in op->buf
 319         */
 320        if ((areq->nbytes - i) < 64) {
 321                while (i < areq->nbytes && in_i < mi.length && op->len < 64) {
 322                        /* how many bytes we can read from current SG */
 323                        in_r = min3(mi.length - in_i, areq->nbytes - i,
 324                                    64 - op->len);
 325                        memcpy(op->buf + op->len, mi.addr + in_i, in_r);
 326                        op->len += in_r;
 327                        i += in_r;
 328                        in_i += in_r;
 329                        if (in_i == mi.length) {
 330                                sg_miter_next(&mi);
 331                                in_i = 0;
 332                        }
 333                }
 334        }
 335
 336        sg_miter_stop(&mi);
 337
 338        /*
 339         * End of data process
 340         * Now if we have the flag final go to finalize part
 341         * If not, store the partial hash
 342         */
 343        if (op->flags & SS_HASH_FINAL)
 344                goto hash_final;
 345
 346        writel(op->mode | SS_ENABLED | SS_DATA_END, ss->base + SS_CTL);
 347        i = 0;
 348        do {
 349                v = readl(ss->base + SS_CTL);
 350                i++;
 351        } while (i < SS_TIMEOUT && (v & SS_DATA_END));
 352        if (unlikely(i >= SS_TIMEOUT)) {
 353                dev_err_ratelimited(ss->dev,
 354                                    "ERROR: hash end timeout %d>%d ctl=%x len=%u\n",
 355                                    i, SS_TIMEOUT, v, areq->nbytes);
 356                err = -EIO;
 357                goto release_ss;
 358        }
 359
 360        /*
 361         * The datasheet isn't very clear about when to retrieve the digest. The
 362         * bit SS_DATA_END is cleared when the engine has processed the data and
 363         * when the digest is computed *but* it doesn't mean the digest is
 364         * available in the digest registers. Hence the delay to be sure we can
 365         * read it.
 366         */
 367        ndelay(1);
 368
 369        for (i = 0; i < crypto_ahash_digestsize(tfm) / 4; i++)
 370                op->hash[i] = readl(ss->base + SS_MD0 + i * 4);
 371
 372        goto release_ss;
 373
 374/*
 375 * hash_final: finalize hashing operation
 376 *
 377 * If we have some remaining bytes, we write them.
 378 * Then ask the SS for finalizing the hashing operation
 379 *
 380 * I do not check RX FIFO size in this function since the size is 32
 381 * after each enabling and this function neither write more than 32 words.
 382 * If we come from the update part, we cannot have more than
 383 * 3 remaining bytes to write and SS is fast enough to not care about it.
 384 */
 385
 386hash_final:
 387
 388        /* write the remaining words of the wait buffer */
 389        if (op->len) {
 390                nwait = op->len / 4;
 391                if (nwait) {
 392                        writesl(ss->base + SS_RXFIFO, op->buf, nwait);
 393                        op->byte_count += 4 * nwait;
 394                }
 395
 396                nbw = op->len - 4 * nwait;
 397                if (nbw) {
 398                        wb = *(u32 *)(op->buf + nwait * 4);
 399                        wb &= GENMASK((nbw * 8) - 1, 0);
 400
 401                        op->byte_count += nbw;
 402                }
 403        }
 404
 405        /* write the remaining bytes of the nbw buffer */
 406        wb |= ((1 << 7) << (nbw * 8));
 407        bf[j++] = wb;
 408
 409        /*
 410         * number of space to pad to obtain 64o minus 8(size) minus 4 (final 1)
 411         * I take the operations from other MD5/SHA1 implementations
 412         */
 413
 414        /* last block size */
 415        fill = 64 - (op->byte_count % 64);
 416        min_fill = 2 * sizeof(u32) + (nbw ? 0 : sizeof(u32));
 417
 418        /* if we can't fill all data, jump to the next 64 block */
 419        if (fill < min_fill)
 420                fill += 64;
 421
 422        j += (fill - min_fill) / sizeof(u32);
 423
 424        /* write the length of data */
 425        if (op->mode == SS_OP_SHA1) {
 426                __be64 bits = cpu_to_be64(op->byte_count << 3);
 427                bf[j++] = lower_32_bits(bits);
 428                bf[j++] = upper_32_bits(bits);
 429        } else {
 430                __le64 bits = op->byte_count << 3;
 431                bf[j++] = lower_32_bits(bits);
 432                bf[j++] = upper_32_bits(bits);
 433        }
 434        writesl(ss->base + SS_RXFIFO, bf, j);
 435
 436        /* Tell the SS to stop the hashing */
 437        writel(op->mode | SS_ENABLED | SS_DATA_END, ss->base + SS_CTL);
 438
 439        /*
 440         * Wait for SS to finish the hash.
 441         * The timeout could happen only in case of bad overclocking
 442         * or driver bug.
 443         */
 444        i = 0;
 445        do {
 446                v = readl(ss->base + SS_CTL);
 447                i++;
 448        } while (i < SS_TIMEOUT && (v & SS_DATA_END));
 449        if (unlikely(i >= SS_TIMEOUT)) {
 450                dev_err_ratelimited(ss->dev,
 451                                    "ERROR: hash end timeout %d>%d ctl=%x len=%u\n",
 452                                    i, SS_TIMEOUT, v, areq->nbytes);
 453                err = -EIO;
 454                goto release_ss;
 455        }
 456
 457        /*
 458         * The datasheet isn't very clear about when to retrieve the digest. The
 459         * bit SS_DATA_END is cleared when the engine has processed the data and
 460         * when the digest is computed *but* it doesn't mean the digest is
 461         * available in the digest registers. Hence the delay to be sure we can
 462         * read it.
 463         */
 464        ndelay(1);
 465
 466        /* Get the hash from the device */
 467        if (op->mode == SS_OP_SHA1) {
 468                for (i = 0; i < 5; i++) {
 469                        v = cpu_to_be32(readl(ss->base + SS_MD0 + i * 4));
 470                        memcpy(areq->result + i * 4, &v, 4);
 471                }
 472        } else {
 473                for (i = 0; i < 4; i++) {
 474                        v = readl(ss->base + SS_MD0 + i * 4);
 475                        memcpy(areq->result + i * 4, &v, 4);
 476                }
 477        }
 478
 479release_ss:
 480        writel(0, ss->base + SS_CTL);
 481        spin_unlock_bh(&ss->slock);
 482        return err;
 483}
 484
 485int sun4i_hash_final(struct ahash_request *areq)
 486{
 487        struct sun4i_req_ctx *op = ahash_request_ctx(areq);
 488
 489        op->flags = SS_HASH_FINAL;
 490        return sun4i_hash(areq);
 491}
 492
 493int sun4i_hash_update(struct ahash_request *areq)
 494{
 495        struct sun4i_req_ctx *op = ahash_request_ctx(areq);
 496
 497        op->flags = SS_HASH_UPDATE;
 498        return sun4i_hash(areq);
 499}
 500
 501/* sun4i_hash_finup: finalize hashing operation after an update */
 502int sun4i_hash_finup(struct ahash_request *areq)
 503{
 504        struct sun4i_req_ctx *op = ahash_request_ctx(areq);
 505
 506        op->flags = SS_HASH_UPDATE | SS_HASH_FINAL;
 507        return sun4i_hash(areq);
 508}
 509
 510/* combo of init/update/final functions */
 511int sun4i_hash_digest(struct ahash_request *areq)
 512{
 513        int err;
 514        struct sun4i_req_ctx *op = ahash_request_ctx(areq);
 515
 516        err = sun4i_hash_init(areq);
 517        if (err)
 518                return err;
 519
 520        op->flags = SS_HASH_UPDATE | SS_HASH_FINAL;
 521        return sun4i_hash(areq);
 522}
 523