linux/drivers/crypto/sunxi-ss/sun4i-ss-hash.c
<<
>>
Prefs
   1/*
   2 * sun4i-ss-hash.c - hardware cryptographic accelerator for Allwinner A20 SoC
   3 *
   4 * Copyright (C) 2013-2015 Corentin LABBE <clabbe.montjoie@gmail.com>
   5 *
   6 * This file add support for MD5 and SHA1.
   7 *
   8 * You could find the datasheet in Documentation/arm/sunxi/README
   9 *
  10 * This program is free software; you can redistribute it and/or modify
  11 * it under the terms of the GNU General Public License as published by
  12 * the Free Software Foundation; either version 2 of the License, or
  13 * (at your option) any later version.
  14 */
  15#include "sun4i-ss.h"
  16#include <linux/scatterlist.h>
  17
  18/* This is a totally arbitrary value */
  19#define SS_TIMEOUT 100
  20
  21int sun4i_hash_crainit(struct crypto_tfm *tfm)
  22{
  23        struct sun4i_tfm_ctx *op = crypto_tfm_ctx(tfm);
  24        struct ahash_alg *alg = __crypto_ahash_alg(tfm->__crt_alg);
  25        struct sun4i_ss_alg_template *algt;
  26
  27        memset(op, 0, sizeof(struct sun4i_tfm_ctx));
  28
  29        algt = container_of(alg, struct sun4i_ss_alg_template, alg.hash);
  30        op->ss = algt->ss;
  31
  32        crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
  33                                 sizeof(struct sun4i_req_ctx));
  34        return 0;
  35}
  36
  37/* sun4i_hash_init: initialize request context */
  38int sun4i_hash_init(struct ahash_request *areq)
  39{
  40        struct sun4i_req_ctx *op = ahash_request_ctx(areq);
  41        struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
  42        struct ahash_alg *alg = __crypto_ahash_alg(tfm->base.__crt_alg);
  43        struct sun4i_ss_alg_template *algt;
  44
  45        memset(op, 0, sizeof(struct sun4i_req_ctx));
  46
  47        algt = container_of(alg, struct sun4i_ss_alg_template, alg.hash);
  48        op->mode = algt->mode;
  49
  50        return 0;
  51}
  52
  53int sun4i_hash_export_md5(struct ahash_request *areq, void *out)
  54{
  55        struct sun4i_req_ctx *op = ahash_request_ctx(areq);
  56        struct md5_state *octx = out;
  57        int i;
  58
  59        octx->byte_count = op->byte_count + op->len;
  60
  61        memcpy(octx->block, op->buf, op->len);
  62
  63        if (op->byte_count > 0) {
  64                for (i = 0; i < 4; i++)
  65                        octx->hash[i] = op->hash[i];
  66        } else {
  67                octx->hash[0] = SHA1_H0;
  68                octx->hash[1] = SHA1_H1;
  69                octx->hash[2] = SHA1_H2;
  70                octx->hash[3] = SHA1_H3;
  71        }
  72
  73        return 0;
  74}
  75
  76int sun4i_hash_import_md5(struct ahash_request *areq, const void *in)
  77{
  78        struct sun4i_req_ctx *op = ahash_request_ctx(areq);
  79        const struct md5_state *ictx = in;
  80        int i;
  81
  82        sun4i_hash_init(areq);
  83
  84        op->byte_count = ictx->byte_count & ~0x3F;
  85        op->len = ictx->byte_count & 0x3F;
  86
  87        memcpy(op->buf, ictx->block, op->len);
  88
  89        for (i = 0; i < 4; i++)
  90                op->hash[i] = ictx->hash[i];
  91
  92        return 0;
  93}
  94
  95int sun4i_hash_export_sha1(struct ahash_request *areq, void *out)
  96{
  97        struct sun4i_req_ctx *op = ahash_request_ctx(areq);
  98        struct sha1_state *octx = out;
  99        int i;
 100
 101        octx->count = op->byte_count + op->len;
 102
 103        memcpy(octx->buffer, op->buf, op->len);
 104
 105        if (op->byte_count > 0) {
 106                for (i = 0; i < 5; i++)
 107                        octx->state[i] = op->hash[i];
 108        } else {
 109                octx->state[0] = SHA1_H0;
 110                octx->state[1] = SHA1_H1;
 111                octx->state[2] = SHA1_H2;
 112                octx->state[3] = SHA1_H3;
 113                octx->state[4] = SHA1_H4;
 114        }
 115
 116        return 0;
 117}
 118
 119int sun4i_hash_import_sha1(struct ahash_request *areq, const void *in)
 120{
 121        struct sun4i_req_ctx *op = ahash_request_ctx(areq);
 122        const struct sha1_state *ictx = in;
 123        int i;
 124
 125        sun4i_hash_init(areq);
 126
 127        op->byte_count = ictx->count & ~0x3F;
 128        op->len = ictx->count & 0x3F;
 129
 130        memcpy(op->buf, ictx->buffer, op->len);
 131
 132        for (i = 0; i < 5; i++)
 133                op->hash[i] = ictx->state[i];
 134
 135        return 0;
 136}
 137
 138#define SS_HASH_UPDATE 1
 139#define SS_HASH_FINAL 2
 140
 141/*
 142 * sun4i_hash_update: update hash engine
 143 *
 144 * Could be used for both SHA1 and MD5
 145 * Write data by step of 32bits and put then in the SS.
 146 *
 147 * Since we cannot leave partial data and hash state in the engine,
 148 * we need to get the hash state at the end of this function.
 149 * We can get the hash state every 64 bytes
 150 *
 151 * So the first work is to get the number of bytes to write to SS modulo 64
 152 * The extra bytes will go to a temporary buffer op->buf storing op->len bytes
 153 *
 154 * So at the begin of update()
 155 * if op->len + areq->nbytes < 64
 156 * => all data will be written to wait buffer (op->buf) and end=0
 157 * if not, write all data from op->buf to the device and position end to
 158 * complete to 64bytes
 159 *
 160 * example 1:
 161 * update1 60o => op->len=60
 162 * update2 60o => need one more word to have 64 bytes
 163 * end=4
 164 * so write all data from op->buf and one word of SGs
 165 * write remaining data in op->buf
 166 * final state op->len=56
 167 */
 168static int sun4i_hash(struct ahash_request *areq)
 169{
 170        u32 v, ivmode = 0;
 171        unsigned int i = 0;
 172        /*
 173         * i is the total bytes read from SGs, to be compared to areq->nbytes
 174         * i is important because we cannot rely on SG length since the sum of
 175         * SG->length could be greater than areq->nbytes
 176         */
 177
 178        struct sun4i_req_ctx *op = ahash_request_ctx(areq);
 179        struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
 180        struct sun4i_tfm_ctx *tfmctx = crypto_ahash_ctx(tfm);
 181        struct sun4i_ss_ctx *ss = tfmctx->ss;
 182        unsigned int in_i = 0; /* advancement in the current SG */
 183        unsigned int end;
 184        /*
 185         * end is the position when we need to stop writing to the device,
 186         * to be compared to i
 187         */
 188        int in_r, err = 0;
 189        unsigned int todo;
 190        u32 spaces, rx_cnt = SS_RX_DEFAULT;
 191        size_t copied = 0;
 192        struct sg_mapping_iter mi;
 193        unsigned int j = 0;
 194        int zeros;
 195        unsigned int index, padlen;
 196        __be64 bits;
 197        u32 bf[32];
 198        u32 wb = 0;
 199        unsigned int nwait, nbw = 0;
 200        struct scatterlist *in_sg = areq->src;
 201
 202        dev_dbg(ss->dev, "%s %s bc=%llu len=%u mode=%x wl=%u h0=%0x",
 203                __func__, crypto_tfm_alg_name(areq->base.tfm),
 204                op->byte_count, areq->nbytes, op->mode,
 205                op->len, op->hash[0]);
 206
 207        if (unlikely(areq->nbytes == 0) && (op->flags & SS_HASH_FINAL) == 0)
 208                return 0;
 209
 210        /* protect against overflow */
 211        if (unlikely(areq->nbytes > UINT_MAX - op->len)) {
 212                dev_err(ss->dev, "Cannot process too large request\n");
 213                return -EINVAL;
 214        }
 215
 216        if (op->len + areq->nbytes < 64 && (op->flags & SS_HASH_FINAL) == 0) {
 217                /* linearize data to op->buf */
 218                copied = sg_pcopy_to_buffer(areq->src, sg_nents(areq->src),
 219                                            op->buf + op->len, areq->nbytes, 0);
 220                op->len += copied;
 221                return 0;
 222        }
 223
 224        spin_lock_bh(&ss->slock);
 225
 226        /*
 227         * if some data have been processed before,
 228         * we need to restore the partial hash state
 229         */
 230        if (op->byte_count > 0) {
 231                ivmode = SS_IV_ARBITRARY;
 232                for (i = 0; i < 5; i++)
 233                        writel(op->hash[i], ss->base + SS_IV0 + i * 4);
 234        }
 235        /* Enable the device */
 236        writel(op->mode | SS_ENABLED | ivmode, ss->base + SS_CTL);
 237
 238        if ((op->flags & SS_HASH_UPDATE) == 0)
 239                goto hash_final;
 240
 241        /* start of handling data */
 242        if ((op->flags & SS_HASH_FINAL) == 0) {
 243                end = ((areq->nbytes + op->len) / 64) * 64 - op->len;
 244
 245                if (end > areq->nbytes || areq->nbytes - end > 63) {
 246                        dev_err(ss->dev, "ERROR: Bound error %u %u\n",
 247                                end, areq->nbytes);
 248                        err = -EINVAL;
 249                        goto release_ss;
 250                }
 251        } else {
 252                /* Since we have the flag final, we can go up to modulo 4 */
 253                end = ((areq->nbytes + op->len) / 4) * 4 - op->len;
 254        }
 255
 256        /* TODO if SGlen % 4 and op->len == 0 then DMA */
 257        i = 1;
 258        while (in_sg && i == 1) {
 259                if ((in_sg->length % 4) != 0)
 260                        i = 0;
 261                in_sg = sg_next(in_sg);
 262        }
 263        if (i == 1 && op->len == 0)
 264                dev_dbg(ss->dev, "We can DMA\n");
 265
 266        i = 0;
 267        sg_miter_start(&mi, areq->src, sg_nents(areq->src),
 268                       SG_MITER_FROM_SG | SG_MITER_ATOMIC);
 269        sg_miter_next(&mi);
 270        in_i = 0;
 271
 272        do {
 273                /*
 274                 * we need to linearize in two case:
 275                 * - the buffer is already used
 276                 * - the SG does not have enough byte remaining ( < 4)
 277                 */
 278                if (op->len > 0 || (mi.length - in_i) < 4) {
 279                        /*
 280                         * if we have entered here we have two reason to stop
 281                         * - the buffer is full
 282                         * - reach the end
 283                         */
 284                        while (op->len < 64 && i < end) {
 285                                /* how many bytes we can read from current SG */
 286                                in_r = min3(mi.length - in_i, end - i,
 287                                            64 - op->len);
 288                                memcpy(op->buf + op->len, mi.addr + in_i, in_r);
 289                                op->len += in_r;
 290                                i += in_r;
 291                                in_i += in_r;
 292                                if (in_i == mi.length) {
 293                                        sg_miter_next(&mi);
 294                                        in_i = 0;
 295                                }
 296                        }
 297                        if (op->len > 3 && (op->len % 4) == 0) {
 298                                /* write buf to the device */
 299                                writesl(ss->base + SS_RXFIFO, op->buf,
 300                                        op->len / 4);
 301                                op->byte_count += op->len;
 302                                op->len = 0;
 303                        }
 304                }
 305                if (mi.length - in_i > 3 && i < end) {
 306                        /* how many bytes we can read from current SG */
 307                        in_r = min3(mi.length - in_i, areq->nbytes - i,
 308                                    ((mi.length - in_i) / 4) * 4);
 309                        /* how many bytes we can write in the device*/
 310                        todo = min3((u32)(end - i) / 4, rx_cnt, (u32)in_r / 4);
 311                        writesl(ss->base + SS_RXFIFO, mi.addr + in_i, todo);
 312                        op->byte_count += todo * 4;
 313                        i += todo * 4;
 314                        in_i += todo * 4;
 315                        rx_cnt -= todo;
 316                        if (rx_cnt == 0) {
 317                                spaces = readl(ss->base + SS_FCSR);
 318                                rx_cnt = SS_RXFIFO_SPACES(spaces);
 319                        }
 320                        if (in_i == mi.length) {
 321                                sg_miter_next(&mi);
 322                                in_i = 0;
 323                        }
 324                }
 325        } while (i < end);
 326
 327        /*
 328         * Now we have written to the device all that we can,
 329         * store the remaining bytes in op->buf
 330         */
 331        if ((areq->nbytes - i) < 64) {
 332                while (i < areq->nbytes && in_i < mi.length && op->len < 64) {
 333                        /* how many bytes we can read from current SG */
 334                        in_r = min3(mi.length - in_i, areq->nbytes - i,
 335                                    64 - op->len);
 336                        memcpy(op->buf + op->len, mi.addr + in_i, in_r);
 337                        op->len += in_r;
 338                        i += in_r;
 339                        in_i += in_r;
 340                        if (in_i == mi.length) {
 341                                sg_miter_next(&mi);
 342                                in_i = 0;
 343                        }
 344                }
 345        }
 346
 347        sg_miter_stop(&mi);
 348
 349        /*
 350         * End of data process
 351         * Now if we have the flag final go to finalize part
 352         * If not, store the partial hash
 353         */
 354        if ((op->flags & SS_HASH_FINAL) > 0)
 355                goto hash_final;
 356
 357        writel(op->mode | SS_ENABLED | SS_DATA_END, ss->base + SS_CTL);
 358        i = 0;
 359        do {
 360                v = readl(ss->base + SS_CTL);
 361                i++;
 362        } while (i < SS_TIMEOUT && (v & SS_DATA_END) > 0);
 363        if (unlikely(i >= SS_TIMEOUT)) {
 364                dev_err_ratelimited(ss->dev,
 365                                    "ERROR: hash end timeout %d>%d ctl=%x len=%u\n",
 366                                    i, SS_TIMEOUT, v, areq->nbytes);
 367                err = -EIO;
 368                goto release_ss;
 369        }
 370
 371        for (i = 0; i < crypto_ahash_digestsize(tfm) / 4; i++)
 372                op->hash[i] = readl(ss->base + SS_MD0 + i * 4);
 373
 374        goto release_ss;
 375
 376/*
 377 * hash_final: finalize hashing operation
 378 *
 379 * If we have some remaining bytes, we write them.
 380 * Then ask the SS for finalizing the hashing operation
 381 *
 382 * I do not check RX FIFO size in this function since the size is 32
 383 * after each enabling and this function neither write more than 32 words.
 384 * If we come from the update part, we cannot have more than
 385 * 3 remaining bytes to write and SS is fast enough to not care about it.
 386 */
 387
 388hash_final:
 389
 390        /* write the remaining words of the wait buffer */
 391        if (op->len > 0) {
 392                nwait = op->len / 4;
 393                if (nwait > 0) {
 394                        writesl(ss->base + SS_RXFIFO, op->buf, nwait);
 395                        op->byte_count += 4 * nwait;
 396                }
 397                nbw = op->len - 4 * nwait;
 398                wb = *(u32 *)(op->buf + nwait * 4);
 399                wb &= (0xFFFFFFFF >> (4 - nbw) * 8);
 400        }
 401
 402        /* write the remaining bytes of the nbw buffer */
 403        if (nbw > 0) {
 404                wb |= ((1 << 7) << (nbw * 8));
 405                bf[j++] = wb;
 406        } else {
 407                bf[j++] = 1 << 7;
 408        }
 409
 410        /*
 411         * number of space to pad to obtain 64o minus 8(size) minus 4 (final 1)
 412         * I take the operations from other MD5/SHA1 implementations
 413         */
 414
 415        /* we have already send 4 more byte of which nbw data */
 416        if (op->mode == SS_OP_MD5) {
 417                index = (op->byte_count + 4) & 0x3f;
 418                op->byte_count += nbw;
 419                if (index > 56)
 420                        zeros = (120 - index) / 4;
 421                else
 422                        zeros = (56 - index) / 4;
 423        } else {
 424                op->byte_count += nbw;
 425                index = op->byte_count & 0x3f;
 426                padlen = (index < 56) ? (56 - index) : ((64 + 56) - index);
 427                zeros = (padlen - 1) / 4;
 428        }
 429
 430        memset(bf + j, 0, 4 * zeros);
 431        j += zeros;
 432
 433        /* write the length of data */
 434        if (op->mode == SS_OP_SHA1) {
 435                bits = cpu_to_be64(op->byte_count << 3);
 436                bf[j++] = bits & 0xffffffff;
 437                bf[j++] = (bits >> 32) & 0xffffffff;
 438        } else {
 439                bf[j++] = (op->byte_count << 3) & 0xffffffff;
 440                bf[j++] = (op->byte_count >> 29) & 0xffffffff;
 441        }
 442        writesl(ss->base + SS_RXFIFO, bf, j);
 443
 444        /* Tell the SS to stop the hashing */
 445        writel(op->mode | SS_ENABLED | SS_DATA_END, ss->base + SS_CTL);
 446
 447        /*
 448         * Wait for SS to finish the hash.
 449         * The timeout could happen only in case of bad overclocking
 450         * or driver bug.
 451         */
 452        i = 0;
 453        do {
 454                v = readl(ss->base + SS_CTL);
 455                i++;
 456        } while (i < SS_TIMEOUT && (v & SS_DATA_END) > 0);
 457        if (unlikely(i >= SS_TIMEOUT)) {
 458                dev_err_ratelimited(ss->dev,
 459                                    "ERROR: hash end timeout %d>%d ctl=%x len=%u\n",
 460                                    i, SS_TIMEOUT, v, areq->nbytes);
 461                err = -EIO;
 462                goto release_ss;
 463        }
 464
 465        /* Get the hash from the device */
 466        if (op->mode == SS_OP_SHA1) {
 467                for (i = 0; i < 5; i++) {
 468                        v = cpu_to_be32(readl(ss->base + SS_MD0 + i * 4));
 469                        memcpy(areq->result + i * 4, &v, 4);
 470                }
 471        } else {
 472                for (i = 0; i < 4; i++) {
 473                        v = readl(ss->base + SS_MD0 + i * 4);
 474                        memcpy(areq->result + i * 4, &v, 4);
 475                }
 476        }
 477
 478release_ss:
 479        writel(0, ss->base + SS_CTL);
 480        spin_unlock_bh(&ss->slock);
 481        return err;
 482}
 483
 484int sun4i_hash_final(struct ahash_request *areq)
 485{
 486        struct sun4i_req_ctx *op = ahash_request_ctx(areq);
 487
 488        op->flags = SS_HASH_FINAL;
 489        return sun4i_hash(areq);
 490}
 491
 492int sun4i_hash_update(struct ahash_request *areq)
 493{
 494        struct sun4i_req_ctx *op = ahash_request_ctx(areq);
 495
 496        op->flags = SS_HASH_UPDATE;
 497        return sun4i_hash(areq);
 498}
 499
 500/* sun4i_hash_finup: finalize hashing operation after an update */
 501int sun4i_hash_finup(struct ahash_request *areq)
 502{
 503        struct sun4i_req_ctx *op = ahash_request_ctx(areq);
 504
 505        op->flags = SS_HASH_UPDATE | SS_HASH_FINAL;
 506        return sun4i_hash(areq);
 507}
 508
 509/* combo of init/update/final functions */
 510int sun4i_hash_digest(struct ahash_request *areq)
 511{
 512        int err;
 513        struct sun4i_req_ctx *op = ahash_request_ctx(areq);
 514
 515        err = sun4i_hash_init(areq);
 516        if (err != 0)
 517                return err;
 518
 519        op->flags = SS_HASH_UPDATE | SS_HASH_FINAL;
 520        return sun4i_hash(areq);
 521}
 522