linux/drivers/crypto/sunxi-ss/sun4i-ss-hash.c
<<
>>
Prefs
   1/*
   2 * sun4i-ss-hash.c - hardware cryptographic accelerator for Allwinner A20 SoC
   3 *
   4 * Copyright (C) 2013-2015 Corentin LABBE <clabbe.montjoie@gmail.com>
   5 *
   6 * This file add support for MD5 and SHA1.
   7 *
   8 * You could find the datasheet in Documentation/arm/sunxi/README
   9 *
  10 * This program is free software; you can redistribute it and/or modify
  11 * it under the terms of the GNU General Public License as published by
  12 * the Free Software Foundation; either version 2 of the License, or
  13 * (at your option) any later version.
  14 */
  15#include "sun4i-ss.h"
  16#include <linux/scatterlist.h>
  17
  18/* This is a totally arbitrary value */
  19#define SS_TIMEOUT 100
  20
  21int sun4i_hash_crainit(struct crypto_tfm *tfm)
  22{
  23        struct sun4i_tfm_ctx *op = crypto_tfm_ctx(tfm);
  24        struct ahash_alg *alg = __crypto_ahash_alg(tfm->__crt_alg);
  25        struct sun4i_ss_alg_template *algt;
  26
  27        memset(op, 0, sizeof(struct sun4i_tfm_ctx));
  28
  29        algt = container_of(alg, struct sun4i_ss_alg_template, alg.hash);
  30        op->ss = algt->ss;
  31
  32        crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
  33                                 sizeof(struct sun4i_req_ctx));
  34        return 0;
  35}
  36
  37/* sun4i_hash_init: initialize request context */
  38int sun4i_hash_init(struct ahash_request *areq)
  39{
  40        struct sun4i_req_ctx *op = ahash_request_ctx(areq);
  41        struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
  42        struct ahash_alg *alg = __crypto_ahash_alg(tfm->base.__crt_alg);
  43        struct sun4i_ss_alg_template *algt;
  44
  45        memset(op, 0, sizeof(struct sun4i_req_ctx));
  46
  47        algt = container_of(alg, struct sun4i_ss_alg_template, alg.hash);
  48        op->mode = algt->mode;
  49
  50        return 0;
  51}
  52
  53int sun4i_hash_export_md5(struct ahash_request *areq, void *out)
  54{
  55        struct sun4i_req_ctx *op = ahash_request_ctx(areq);
  56        struct md5_state *octx = out;
  57        int i;
  58
  59        octx->byte_count = op->byte_count + op->len;
  60
  61        memcpy(octx->block, op->buf, op->len);
  62
  63        if (op->byte_count) {
  64                for (i = 0; i < 4; i++)
  65                        octx->hash[i] = op->hash[i];
  66        } else {
  67                octx->hash[0] = SHA1_H0;
  68                octx->hash[1] = SHA1_H1;
  69                octx->hash[2] = SHA1_H2;
  70                octx->hash[3] = SHA1_H3;
  71        }
  72
  73        return 0;
  74}
  75
  76int sun4i_hash_import_md5(struct ahash_request *areq, const void *in)
  77{
  78        struct sun4i_req_ctx *op = ahash_request_ctx(areq);
  79        const struct md5_state *ictx = in;
  80        int i;
  81
  82        sun4i_hash_init(areq);
  83
  84        op->byte_count = ictx->byte_count & ~0x3F;
  85        op->len = ictx->byte_count & 0x3F;
  86
  87        memcpy(op->buf, ictx->block, op->len);
  88
  89        for (i = 0; i < 4; i++)
  90                op->hash[i] = ictx->hash[i];
  91
  92        return 0;
  93}
  94
  95int sun4i_hash_export_sha1(struct ahash_request *areq, void *out)
  96{
  97        struct sun4i_req_ctx *op = ahash_request_ctx(areq);
  98        struct sha1_state *octx = out;
  99        int i;
 100
 101        octx->count = op->byte_count + op->len;
 102
 103        memcpy(octx->buffer, op->buf, op->len);
 104
 105        if (op->byte_count) {
 106                for (i = 0; i < 5; i++)
 107                        octx->state[i] = op->hash[i];
 108        } else {
 109                octx->state[0] = SHA1_H0;
 110                octx->state[1] = SHA1_H1;
 111                octx->state[2] = SHA1_H2;
 112                octx->state[3] = SHA1_H3;
 113                octx->state[4] = SHA1_H4;
 114        }
 115
 116        return 0;
 117}
 118
 119int sun4i_hash_import_sha1(struct ahash_request *areq, const void *in)
 120{
 121        struct sun4i_req_ctx *op = ahash_request_ctx(areq);
 122        const struct sha1_state *ictx = in;
 123        int i;
 124
 125        sun4i_hash_init(areq);
 126
 127        op->byte_count = ictx->count & ~0x3F;
 128        op->len = ictx->count & 0x3F;
 129
 130        memcpy(op->buf, ictx->buffer, op->len);
 131
 132        for (i = 0; i < 5; i++)
 133                op->hash[i] = ictx->state[i];
 134
 135        return 0;
 136}
 137
 138#define SS_HASH_UPDATE 1
 139#define SS_HASH_FINAL 2
 140
 141/*
 142 * sun4i_hash_update: update hash engine
 143 *
 144 * Could be used for both SHA1 and MD5
 145 * Write data by step of 32bits and put then in the SS.
 146 *
 147 * Since we cannot leave partial data and hash state in the engine,
 148 * we need to get the hash state at the end of this function.
 149 * We can get the hash state every 64 bytes
 150 *
 151 * So the first work is to get the number of bytes to write to SS modulo 64
 152 * The extra bytes will go to a temporary buffer op->buf storing op->len bytes
 153 *
 154 * So at the begin of update()
 155 * if op->len + areq->nbytes < 64
 156 * => all data will be written to wait buffer (op->buf) and end=0
 157 * if not, write all data from op->buf to the device and position end to
 158 * complete to 64bytes
 159 *
 160 * example 1:
 161 * update1 60o => op->len=60
 162 * update2 60o => need one more word to have 64 bytes
 163 * end=4
 164 * so write all data from op->buf and one word of SGs
 165 * write remaining data in op->buf
 166 * final state op->len=56
 167 */
 168static int sun4i_hash(struct ahash_request *areq)
 169{
 170        /*
 171         * i is the total bytes read from SGs, to be compared to areq->nbytes
 172         * i is important because we cannot rely on SG length since the sum of
 173         * SG->length could be greater than areq->nbytes
 174         *
 175         * end is the position when we need to stop writing to the device,
 176         * to be compared to i
 177         *
 178         * in_i: advancement in the current SG
 179         */
 180        unsigned int i = 0, end, fill, min_fill, nwait, nbw = 0, j = 0, todo;
 181        unsigned int in_i = 0;
 182        u32 spaces, rx_cnt = SS_RX_DEFAULT, bf[32] = {0}, wb = 0, v, ivmode = 0;
 183        struct sun4i_req_ctx *op = ahash_request_ctx(areq);
 184        struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
 185        struct sun4i_tfm_ctx *tfmctx = crypto_ahash_ctx(tfm);
 186        struct sun4i_ss_ctx *ss = tfmctx->ss;
 187        struct scatterlist *in_sg = areq->src;
 188        struct sg_mapping_iter mi;
 189        int in_r, err = 0;
 190        size_t copied = 0;
 191
 192        dev_dbg(ss->dev, "%s %s bc=%llu len=%u mode=%x wl=%u h0=%0x",
 193                __func__, crypto_tfm_alg_name(areq->base.tfm),
 194                op->byte_count, areq->nbytes, op->mode,
 195                op->len, op->hash[0]);
 196
 197        if (unlikely(!areq->nbytes) && !(op->flags & SS_HASH_FINAL))
 198                return 0;
 199
 200        /* protect against overflow */
 201        if (unlikely(areq->nbytes > UINT_MAX - op->len)) {
 202                dev_err(ss->dev, "Cannot process too large request\n");
 203                return -EINVAL;
 204        }
 205
 206        if (op->len + areq->nbytes < 64 && !(op->flags & SS_HASH_FINAL)) {
 207                /* linearize data to op->buf */
 208                copied = sg_pcopy_to_buffer(areq->src, sg_nents(areq->src),
 209                                            op->buf + op->len, areq->nbytes, 0);
 210                op->len += copied;
 211                return 0;
 212        }
 213
 214        spin_lock_bh(&ss->slock);
 215
 216        /*
 217         * if some data have been processed before,
 218         * we need to restore the partial hash state
 219         */
 220        if (op->byte_count) {
 221                ivmode = SS_IV_ARBITRARY;
 222                for (i = 0; i < 5; i++)
 223                        writel(op->hash[i], ss->base + SS_IV0 + i * 4);
 224        }
 225        /* Enable the device */
 226        writel(op->mode | SS_ENABLED | ivmode, ss->base + SS_CTL);
 227
 228        if (!(op->flags & SS_HASH_UPDATE))
 229                goto hash_final;
 230
 231        /* start of handling data */
 232        if (!(op->flags & SS_HASH_FINAL)) {
 233                end = ((areq->nbytes + op->len) / 64) * 64 - op->len;
 234
 235                if (end > areq->nbytes || areq->nbytes - end > 63) {
 236                        dev_err(ss->dev, "ERROR: Bound error %u %u\n",
 237                                end, areq->nbytes);
 238                        err = -EINVAL;
 239                        goto release_ss;
 240                }
 241        } else {
 242                /* Since we have the flag final, we can go up to modulo 4 */
 243                end = ((areq->nbytes + op->len) / 4) * 4 - op->len;
 244        }
 245
 246        /* TODO if SGlen % 4 and !op->len then DMA */
 247        i = 1;
 248        while (in_sg && i == 1) {
 249                if (in_sg->length % 4)
 250                        i = 0;
 251                in_sg = sg_next(in_sg);
 252        }
 253        if (i == 1 && !op->len && areq->nbytes)
 254                dev_dbg(ss->dev, "We can DMA\n");
 255
 256        i = 0;
 257        sg_miter_start(&mi, areq->src, sg_nents(areq->src),
 258                       SG_MITER_FROM_SG | SG_MITER_ATOMIC);
 259        sg_miter_next(&mi);
 260        in_i = 0;
 261
 262        do {
 263                /*
 264                 * we need to linearize in two case:
 265                 * - the buffer is already used
 266                 * - the SG does not have enough byte remaining ( < 4)
 267                 */
 268                if (op->len || (mi.length - in_i) < 4) {
 269                        /*
 270                         * if we have entered here we have two reason to stop
 271                         * - the buffer is full
 272                         * - reach the end
 273                         */
 274                        while (op->len < 64 && i < end) {
 275                                /* how many bytes we can read from current SG */
 276                                in_r = min3(mi.length - in_i, end - i,
 277                                            64 - op->len);
 278                                memcpy(op->buf + op->len, mi.addr + in_i, in_r);
 279                                op->len += in_r;
 280                                i += in_r;
 281                                in_i += in_r;
 282                                if (in_i == mi.length) {
 283                                        sg_miter_next(&mi);
 284                                        in_i = 0;
 285                                }
 286                        }
 287                        if (op->len > 3 && !(op->len % 4)) {
 288                                /* write buf to the device */
 289                                writesl(ss->base + SS_RXFIFO, op->buf,
 290                                        op->len / 4);
 291                                op->byte_count += op->len;
 292                                op->len = 0;
 293                        }
 294                }
 295                if (mi.length - in_i > 3 && i < end) {
 296                        /* how many bytes we can read from current SG */
 297                        in_r = min3(mi.length - in_i, areq->nbytes - i,
 298                                    ((mi.length - in_i) / 4) * 4);
 299                        /* how many bytes we can write in the device*/
 300                        todo = min3((u32)(end - i) / 4, rx_cnt, (u32)in_r / 4);
 301                        writesl(ss->base + SS_RXFIFO, mi.addr + in_i, todo);
 302                        op->byte_count += todo * 4;
 303                        i += todo * 4;
 304                        in_i += todo * 4;
 305                        rx_cnt -= todo;
 306                        if (!rx_cnt) {
 307                                spaces = readl(ss->base + SS_FCSR);
 308                                rx_cnt = SS_RXFIFO_SPACES(spaces);
 309                        }
 310                        if (in_i == mi.length) {
 311                                sg_miter_next(&mi);
 312                                in_i = 0;
 313                        }
 314                }
 315        } while (i < end);
 316
 317        /*
 318         * Now we have written to the device all that we can,
 319         * store the remaining bytes in op->buf
 320         */
 321        if ((areq->nbytes - i) < 64) {
 322                while (i < areq->nbytes && in_i < mi.length && op->len < 64) {
 323                        /* how many bytes we can read from current SG */
 324                        in_r = min3(mi.length - in_i, areq->nbytes - i,
 325                                    64 - op->len);
 326                        memcpy(op->buf + op->len, mi.addr + in_i, in_r);
 327                        op->len += in_r;
 328                        i += in_r;
 329                        in_i += in_r;
 330                        if (in_i == mi.length) {
 331                                sg_miter_next(&mi);
 332                                in_i = 0;
 333                        }
 334                }
 335        }
 336
 337        sg_miter_stop(&mi);
 338
 339        /*
 340         * End of data process
 341         * Now if we have the flag final go to finalize part
 342         * If not, store the partial hash
 343         */
 344        if (op->flags & SS_HASH_FINAL)
 345                goto hash_final;
 346
 347        writel(op->mode | SS_ENABLED | SS_DATA_END, ss->base + SS_CTL);
 348        i = 0;
 349        do {
 350                v = readl(ss->base + SS_CTL);
 351                i++;
 352        } while (i < SS_TIMEOUT && (v & SS_DATA_END));
 353        if (unlikely(i >= SS_TIMEOUT)) {
 354                dev_err_ratelimited(ss->dev,
 355                                    "ERROR: hash end timeout %d>%d ctl=%x len=%u\n",
 356                                    i, SS_TIMEOUT, v, areq->nbytes);
 357                err = -EIO;
 358                goto release_ss;
 359        }
 360
 361        /*
 362         * The datasheet isn't very clear about when to retrieve the digest. The
 363         * bit SS_DATA_END is cleared when the engine has processed the data and
 364         * when the digest is computed *but* it doesn't mean the digest is
 365         * available in the digest registers. Hence the delay to be sure we can
 366         * read it.
 367         */
 368        ndelay(1);
 369
 370        for (i = 0; i < crypto_ahash_digestsize(tfm) / 4; i++)
 371                op->hash[i] = readl(ss->base + SS_MD0 + i * 4);
 372
 373        goto release_ss;
 374
 375/*
 376 * hash_final: finalize hashing operation
 377 *
 378 * If we have some remaining bytes, we write them.
 379 * Then ask the SS for finalizing the hashing operation
 380 *
 381 * I do not check RX FIFO size in this function since the size is 32
 382 * after each enabling and this function neither write more than 32 words.
 383 * If we come from the update part, we cannot have more than
 384 * 3 remaining bytes to write and SS is fast enough to not care about it.
 385 */
 386
 387hash_final:
 388
 389        /* write the remaining words of the wait buffer */
 390        if (op->len) {
 391                nwait = op->len / 4;
 392                if (nwait) {
 393                        writesl(ss->base + SS_RXFIFO, op->buf, nwait);
 394                        op->byte_count += 4 * nwait;
 395                }
 396
 397                nbw = op->len - 4 * nwait;
 398                if (nbw) {
 399                        wb = *(u32 *)(op->buf + nwait * 4);
 400                        wb &= GENMASK((nbw * 8) - 1, 0);
 401
 402                        op->byte_count += nbw;
 403                }
 404        }
 405
 406        /* write the remaining bytes of the nbw buffer */
 407        wb |= ((1 << 7) << (nbw * 8));
 408        bf[j++] = wb;
 409
 410        /*
 411         * number of space to pad to obtain 64o minus 8(size) minus 4 (final 1)
 412         * I take the operations from other MD5/SHA1 implementations
 413         */
 414
 415        /* last block size */
 416        fill = 64 - (op->byte_count % 64);
 417        min_fill = 2 * sizeof(u32) + (nbw ? 0 : sizeof(u32));
 418
 419        /* if we can't fill all data, jump to the next 64 block */
 420        if (fill < min_fill)
 421                fill += 64;
 422
 423        j += (fill - min_fill) / sizeof(u32);
 424
 425        /* write the length of data */
 426        if (op->mode == SS_OP_SHA1) {
 427                __be64 bits = cpu_to_be64(op->byte_count << 3);
 428                bf[j++] = lower_32_bits(bits);
 429                bf[j++] = upper_32_bits(bits);
 430        } else {
 431                __le64 bits = op->byte_count << 3;
 432                bf[j++] = lower_32_bits(bits);
 433                bf[j++] = upper_32_bits(bits);
 434        }
 435        writesl(ss->base + SS_RXFIFO, bf, j);
 436
 437        /* Tell the SS to stop the hashing */
 438        writel(op->mode | SS_ENABLED | SS_DATA_END, ss->base + SS_CTL);
 439
 440        /*
 441         * Wait for SS to finish the hash.
 442         * The timeout could happen only in case of bad overclocking
 443         * or driver bug.
 444         */
 445        i = 0;
 446        do {
 447                v = readl(ss->base + SS_CTL);
 448                i++;
 449        } while (i < SS_TIMEOUT && (v & SS_DATA_END));
 450        if (unlikely(i >= SS_TIMEOUT)) {
 451                dev_err_ratelimited(ss->dev,
 452                                    "ERROR: hash end timeout %d>%d ctl=%x len=%u\n",
 453                                    i, SS_TIMEOUT, v, areq->nbytes);
 454                err = -EIO;
 455                goto release_ss;
 456        }
 457
 458        /*
 459         * The datasheet isn't very clear about when to retrieve the digest. The
 460         * bit SS_DATA_END is cleared when the engine has processed the data and
 461         * when the digest is computed *but* it doesn't mean the digest is
 462         * available in the digest registers. Hence the delay to be sure we can
 463         * read it.
 464         */
 465        ndelay(1);
 466
 467        /* Get the hash from the device */
 468        if (op->mode == SS_OP_SHA1) {
 469                for (i = 0; i < 5; i++) {
 470                        v = cpu_to_be32(readl(ss->base + SS_MD0 + i * 4));
 471                        memcpy(areq->result + i * 4, &v, 4);
 472                }
 473        } else {
 474                for (i = 0; i < 4; i++) {
 475                        v = readl(ss->base + SS_MD0 + i * 4);
 476                        memcpy(areq->result + i * 4, &v, 4);
 477                }
 478        }
 479
 480release_ss:
 481        writel(0, ss->base + SS_CTL);
 482        spin_unlock_bh(&ss->slock);
 483        return err;
 484}
 485
 486int sun4i_hash_final(struct ahash_request *areq)
 487{
 488        struct sun4i_req_ctx *op = ahash_request_ctx(areq);
 489
 490        op->flags = SS_HASH_FINAL;
 491        return sun4i_hash(areq);
 492}
 493
 494int sun4i_hash_update(struct ahash_request *areq)
 495{
 496        struct sun4i_req_ctx *op = ahash_request_ctx(areq);
 497
 498        op->flags = SS_HASH_UPDATE;
 499        return sun4i_hash(areq);
 500}
 501
 502/* sun4i_hash_finup: finalize hashing operation after an update */
 503int sun4i_hash_finup(struct ahash_request *areq)
 504{
 505        struct sun4i_req_ctx *op = ahash_request_ctx(areq);
 506
 507        op->flags = SS_HASH_UPDATE | SS_HASH_FINAL;
 508        return sun4i_hash(areq);
 509}
 510
 511/* combo of init/update/final functions */
 512int sun4i_hash_digest(struct ahash_request *areq)
 513{
 514        int err;
 515        struct sun4i_req_ctx *op = ahash_request_ctx(areq);
 516
 517        err = sun4i_hash_init(areq);
 518        if (err)
 519                return err;
 520
 521        op->flags = SS_HASH_UPDATE | SS_HASH_FINAL;
 522        return sun4i_hash(areq);
 523}
 524