LXR linux/crypto/gf128mul.c

   1/* gf128mul.c - GF(2^128) multiplication functions
   2 *
   3 * Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.
   4 * Copyright (c) 2006, Rik Snel <rsnel@cube.dyndns.org>
   5 *
   6 * Based on Dr Brian Gladman's (GPL'd) work published at
   7 * http://gladman.plushost.co.uk/oldsite/cryptography_technology/index.php
   8 * See the original copyright notice below.
   9 *
  10 * This program is free software; you can redistribute it and/or modify it
  11 * under the terms of the GNU General Public License as published by the Free
  12 * Software Foundation; either version 2 of the License, or (at your option)
  13 * any later version.
  14 */
  15
  16/*
  17 ---------------------------------------------------------------------------
  18 Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.   All rights reserved.
  19
  20 LICENSE TERMS
  21
  22 The free distribution and use of this software in both source and binary
  23 form is allowed (with or without changes) provided that:
  24
  25   1. distributions of this source code include the above copyright
  26      notice, this list of conditions and the following disclaimer;
  27
  28   2. distributions in binary form include the above copyright
  29      notice, this list of conditions and the following disclaimer
  30      in the documentation and/or other associated materials;
  31
  32   3. the copyright holder's name is not used to endorse products
  33      built using this software without specific written permission.
  34
  35 ALTERNATIVELY, provided that this notice is retained in full, this product
  36 may be distributed under the terms of the GNU General Public License (GPL),
  37 in which case the provisions of the GPL apply INSTEAD OF those given above.
  38
  39 DISCLAIMER
  40
  41 This software is provided 'as is' with no explicit or implied warranties
  42 in respect of its properties, including, but not limited to, correctness
  43 and/or fitness for purpose.
  44 ---------------------------------------------------------------------------
  45 Issue 31/01/2006
  46
  47 This file provides fast multiplication in GF(128) as required by several
  48 cryptographic authentication modes
  49*/
  50
  51#include <crypto/gf128mul.h>
  52#include <linux/kernel.h>
  53#include <linux/module.h>
  54#include <linux/slab.h>
  55
  56#define gf128mul_dat(q) { \
  57        q(0x00), q(0x01), q(0x02), q(0x03), q(0x04), q(0x05), q(0x06), q(0x07),\
  58        q(0x08), q(0x09), q(0x0a), q(0x0b), q(0x0c), q(0x0d), q(0x0e), q(0x0f),\
  59        q(0x10), q(0x11), q(0x12), q(0x13), q(0x14), q(0x15), q(0x16), q(0x17),\
  60        q(0x18), q(0x19), q(0x1a), q(0x1b), q(0x1c), q(0x1d), q(0x1e), q(0x1f),\
  61        q(0x20), q(0x21), q(0x22), q(0x23), q(0x24), q(0x25), q(0x26), q(0x27),\
  62        q(0x28), q(0x29), q(0x2a), q(0x2b), q(0x2c), q(0x2d), q(0x2e), q(0x2f),\
  63        q(0x30), q(0x31), q(0x32), q(0x33), q(0x34), q(0x35), q(0x36), q(0x37),\
  64        q(0x38), q(0x39), q(0x3a), q(0x3b), q(0x3c), q(0x3d), q(0x3e), q(0x3f),\
  65        q(0x40), q(0x41), q(0x42), q(0x43), q(0x44), q(0x45), q(0x46), q(0x47),\
  66        q(0x48), q(0x49), q(0x4a), q(0x4b), q(0x4c), q(0x4d), q(0x4e), q(0x4f),\
  67        q(0x50), q(0x51), q(0x52), q(0x53), q(0x54), q(0x55), q(0x56), q(0x57),\
  68        q(0x58), q(0x59), q(0x5a), q(0x5b), q(0x5c), q(0x5d), q(0x5e), q(0x5f),\
  69        q(0x60), q(0x61), q(0x62), q(0x63), q(0x64), q(0x65), q(0x66), q(0x67),\
  70        q(0x68), q(0x69), q(0x6a), q(0x6b), q(0x6c), q(0x6d), q(0x6e), q(0x6f),\
  71        q(0x70), q(0x71), q(0x72), q(0x73), q(0x74), q(0x75), q(0x76), q(0x77),\
  72        q(0x78), q(0x79), q(0x7a), q(0x7b), q(0x7c), q(0x7d), q(0x7e), q(0x7f),\
  73        q(0x80), q(0x81), q(0x82), q(0x83), q(0x84), q(0x85), q(0x86), q(0x87),\
  74        q(0x88), q(0x89), q(0x8a), q(0x8b), q(0x8c), q(0x8d), q(0x8e), q(0x8f),\
  75        q(0x90), q(0x91), q(0x92), q(0x93), q(0x94), q(0x95), q(0x96), q(0x97),\
  76        q(0x98), q(0x99), q(0x9a), q(0x9b), q(0x9c), q(0x9d), q(0x9e), q(0x9f),\
  77        q(0xa0), q(0xa1), q(0xa2), q(0xa3), q(0xa4), q(0xa5), q(0xa6), q(0xa7),\
  78        q(0xa8), q(0xa9), q(0xaa), q(0xab), q(0xac), q(0xad), q(0xae), q(0xaf),\
  79        q(0xb0), q(0xb1), q(0xb2), q(0xb3), q(0xb4), q(0xb5), q(0xb6), q(0xb7),\
  80        q(0xb8), q(0xb9), q(0xba), q(0xbb), q(0xbc), q(0xbd), q(0xbe), q(0xbf),\
  81        q(0xc0), q(0xc1), q(0xc2), q(0xc3), q(0xc4), q(0xc5), q(0xc6), q(0xc7),\
  82        q(0xc8), q(0xc9), q(0xca), q(0xcb), q(0xcc), q(0xcd), q(0xce), q(0xcf),\
  83        q(0xd0), q(0xd1), q(0xd2), q(0xd3), q(0xd4), q(0xd5), q(0xd6), q(0xd7),\
  84        q(0xd8), q(0xd9), q(0xda), q(0xdb), q(0xdc), q(0xdd), q(0xde), q(0xdf),\
  85        q(0xe0), q(0xe1), q(0xe2), q(0xe3), q(0xe4), q(0xe5), q(0xe6), q(0xe7),\
  86        q(0xe8), q(0xe9), q(0xea), q(0xeb), q(0xec), q(0xed), q(0xee), q(0xef),\
  87        q(0xf0), q(0xf1), q(0xf2), q(0xf3), q(0xf4), q(0xf5), q(0xf6), q(0xf7),\
  88        q(0xf8), q(0xf9), q(0xfa), q(0xfb), q(0xfc), q(0xfd), q(0xfe), q(0xff) \
  89}
  90
  91/*      Given the value i in 0..255 as the byte overflow when a field element
  92    in GHASH is multiplied by x^8, this function will return the values that
  93    are generated in the lo 16-bit word of the field value by applying the
  94    modular polynomial. The values lo_byte and hi_byte are returned via the
  95    macro xp_fun(lo_byte, hi_byte) so that the values can be assembled into
  96    memory as required by a suitable definition of this macro operating on
  97    the table above
  98*/
  99
 100#define xx(p, q)        0x##p##q
 101
 102#define xda_bbe(i) ( \
 103        (i & 0x80 ? xx(43, 80) : 0) ^ (i & 0x40 ? xx(21, c0) : 0) ^ \
 104        (i & 0x20 ? xx(10, e0) : 0) ^ (i & 0x10 ? xx(08, 70) : 0) ^ \
 105        (i & 0x08 ? xx(04, 38) : 0) ^ (i & 0x04 ? xx(02, 1c) : 0) ^ \
 106        (i & 0x02 ? xx(01, 0e) : 0) ^ (i & 0x01 ? xx(00, 87) : 0) \
 107)
 108
 109#define xda_lle(i) ( \
 110        (i & 0x80 ? xx(e1, 00) : 0) ^ (i & 0x40 ? xx(70, 80) : 0) ^ \
 111        (i & 0x20 ? xx(38, 40) : 0) ^ (i & 0x10 ? xx(1c, 20) : 0) ^ \
 112        (i & 0x08 ? xx(0e, 10) : 0) ^ (i & 0x04 ? xx(07, 08) : 0) ^ \
 113        (i & 0x02 ? xx(03, 84) : 0) ^ (i & 0x01 ? xx(01, c2) : 0) \
 114)
 115
 116static const u16 gf128mul_table_lle[256] = gf128mul_dat(xda_lle);
 117static const u16 gf128mul_table_bbe[256] = gf128mul_dat(xda_bbe);
 118
 119/* These functions multiply a field element by x, by x^4 and by x^8
 120 * in the polynomial field representation. It uses 32-bit word operations
 121 * to gain speed but compensates for machine endianess and hence works
 122 * correctly on both styles of machine.
 123 */
 124
 125static void gf128mul_x_lle(be128 *r, const be128 *x)
 126{
 127        u64 a = be64_to_cpu(x->a);
 128        u64 b = be64_to_cpu(x->b);
 129        u64 _tt = gf128mul_table_lle[(b << 7) & 0xff];
 130
 131        r->b = cpu_to_be64((b >> 1) | (a << 63));
 132        r->a = cpu_to_be64((a >> 1) ^ (_tt << 48));
 133}
 134
 135static void gf128mul_x_bbe(be128 *r, const be128 *x)
 136{
 137        u64 a = be64_to_cpu(x->a);
 138        u64 b = be64_to_cpu(x->b);
 139        u64 _tt = gf128mul_table_bbe[a >> 63];
 140
 141        r->a = cpu_to_be64((a << 1) | (b >> 63));
 142        r->b = cpu_to_be64((b << 1) ^ _tt);
 143}
 144
 145void gf128mul_x_ble(be128 *r, const be128 *x)
 146{
 147        u64 a = le64_to_cpu(x->a);
 148        u64 b = le64_to_cpu(x->b);
 149        u64 _tt = gf128mul_table_bbe[b >> 63];
 150
 151        r->a = cpu_to_le64((a << 1) ^ _tt);
 152        r->b = cpu_to_le64((b << 1) | (a >> 63));
 153}
 154EXPORT_SYMBOL(gf128mul_x_ble);
 155
 156static void gf128mul_x8_lle(be128 *x)
 157{
 158        u64 a = be64_to_cpu(x->a);
 159        u64 b = be64_to_cpu(x->b);
 160        u64 _tt = gf128mul_table_lle[b & 0xff];
 161
 162        x->b = cpu_to_be64((b >> 8) | (a << 56));
 163        x->a = cpu_to_be64((a >> 8) ^ (_tt << 48));
 164}
 165
 166static void gf128mul_x8_bbe(be128 *x)
 167{
 168        u64 a = be64_to_cpu(x->a);
 169        u64 b = be64_to_cpu(x->b);
 170        u64 _tt = gf128mul_table_bbe[a >> 56];
 171
 172        x->a = cpu_to_be64((a << 8) | (b >> 56));
 173        x->b = cpu_to_be64((b << 8) ^ _tt);
 174}
 175
 176void gf128mul_lle(be128 *r, const be128 *b)
 177{
 178        be128 p[8];
 179        int i;
 180
 181        p[0] = *r;
 182        for (i = 0; i < 7; ++i)
 183                gf128mul_x_lle(&p[i + 1], &p[i]);
 184
 185        memset(r, 0, sizeof(*r));
 186        for (i = 0;;) {
 187                u8 ch = ((u8 *)b)[15 - i];
 188
 189                if (ch & 0x80)
 190                        be128_xor(r, r, &p[0]);
 191                if (ch & 0x40)
 192                        be128_xor(r, r, &p[1]);
 193                if (ch & 0x20)
 194                        be128_xor(r, r, &p[2]);
 195                if (ch & 0x10)
 196                        be128_xor(r, r, &p[3]);
 197                if (ch & 0x08)
 198                        be128_xor(r, r, &p[4]);
 199                if (ch & 0x04)
 200                        be128_xor(r, r, &p[5]);
 201                if (ch & 0x02)
 202                        be128_xor(r, r, &p[6]);
 203                if (ch & 0x01)
 204                        be128_xor(r, r, &p[7]);
 205
 206                if (++i >= 16)
 207                        break;
 208
 209                gf128mul_x8_lle(r);
 210        }
 211}
 212EXPORT_SYMBOL(gf128mul_lle);
 213
 214void gf128mul_bbe(be128 *r, const be128 *b)
 215{
 216        be128 p[8];
 217        int i;
 218
 219        p[0] = *r;
 220        for (i = 0; i < 7; ++i)
 221                gf128mul_x_bbe(&p[i + 1], &p[i]);
 222
 223        memset(r, 0, sizeof(*r));
 224        for (i = 0;;) {
 225                u8 ch = ((u8 *)b)[i];
 226
 227                if (ch & 0x80)
 228                        be128_xor(r, r, &p[7]);
 229                if (ch & 0x40)
 230                        be128_xor(r, r, &p[6]);
 231                if (ch & 0x20)
 232                        be128_xor(r, r, &p[5]);
 233                if (ch & 0x10)
 234                        be128_xor(r, r, &p[4]);
 235                if (ch & 0x08)
 236                        be128_xor(r, r, &p[3]);
 237                if (ch & 0x04)
 238                        be128_xor(r, r, &p[2]);
 239                if (ch & 0x02)
 240                        be128_xor(r, r, &p[1]);
 241                if (ch & 0x01)
 242                        be128_xor(r, r, &p[0]);
 243
 244                if (++i >= 16)
 245                        break;
 246
 247                gf128mul_x8_bbe(r);
 248        }
 249}
 250EXPORT_SYMBOL(gf128mul_bbe);
 251
 252/*      This version uses 64k bytes of table space.
 253    A 16 byte buffer has to be multiplied by a 16 byte key
 254    value in GF(128).  If we consider a GF(128) value in
 255    the buffer's lowest byte, we can construct a table of
 256    the 256 16 byte values that result from the 256 values
 257    of this byte.  This requires 4096 bytes. But we also
 258    need tables for each of the 16 higher bytes in the
 259    buffer as well, which makes 64 kbytes in total.
 260*/
 261/* additional explanation
 262 * t[0][BYTE] contains g*BYTE
 263 * t[1][BYTE] contains g*x^8*BYTE
 264 *  ..
 265 * t[15][BYTE] contains g*x^120*BYTE */
 266struct gf128mul_64k *gf128mul_init_64k_lle(const be128 *g)
 267{
 268        struct gf128mul_64k *t;
 269        int i, j, k;
 270
 271        t = kzalloc(sizeof(*t), GFP_KERNEL);
 272        if (!t)
 273                goto out;
 274
 275        for (i = 0; i < 16; i++) {
 276                t->t[i] = kzalloc(sizeof(*t->t[i]), GFP_KERNEL);
 277                if (!t->t[i]) {
 278                        gf128mul_free_64k(t);
 279                        t = NULL;
 280                        goto out;
 281                }
 282        }
 283
 284        t->t[0]->t[128] = *g;
 285        for (j = 64; j > 0; j >>= 1)
 286                gf128mul_x_lle(&t->t[0]->t[j], &t->t[0]->t[j + j]);
 287
 288        for (i = 0;;) {
 289                for (j = 2; j < 256; j += j)
 290                        for (k = 1; k < j; ++k)
 291                                be128_xor(&t->t[i]->t[j + k],
 292                                          &t->t[i]->t[j], &t->t[i]->t[k]);
 293
 294                if (++i >= 16)
 295                        break;
 296
 297                for (j = 128; j > 0; j >>= 1) {
 298                        t->t[i]->t[j] = t->t[i - 1]->t[j];
 299                        gf128mul_x8_lle(&t->t[i]->t[j]);
 300                }
 301        }
 302
 303out:
 304        return t;
 305}
 306EXPORT_SYMBOL(gf128mul_init_64k_lle);
 307
 308struct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g)
 309{
 310        struct gf128mul_64k *t;
 311        int i, j, k;
 312
 313        t = kzalloc(sizeof(*t), GFP_KERNEL);
 314        if (!t)
 315                goto out;
 316
 317        for (i = 0; i < 16; i++) {
 318                t->t[i] = kzalloc(sizeof(*t->t[i]), GFP_KERNEL);
 319                if (!t->t[i]) {
 320                        gf128mul_free_64k(t);
 321                        t = NULL;
 322                        goto out;
 323                }
 324        }
 325
 326        t->t[0]->t[1] = *g;
 327        for (j = 1; j <= 64; j <<= 1)
 328                gf128mul_x_bbe(&t->t[0]->t[j + j], &t->t[0]->t[j]);
 329
 330        for (i = 0;;) {
 331                for (j = 2; j < 256; j += j)
 332                        for (k = 1; k < j; ++k)
 333                                be128_xor(&t->t[i]->t[j + k],
 334                                          &t->t[i]->t[j], &t->t[i]->t[k]);
 335
 336                if (++i >= 16)
 337                        break;
 338
 339                for (j = 128; j > 0; j >>= 1) {
 340                        t->t[i]->t[j] = t->t[i - 1]->t[j];
 341                        gf128mul_x8_bbe(&t->t[i]->t[j]);
 342                }
 343        }
 344
 345out:
 346        return t;
 347}
 348EXPORT_SYMBOL(gf128mul_init_64k_bbe);
 349
 350void gf128mul_free_64k(struct gf128mul_64k *t)
 351{
 352        int i;
 353
 354        for (i = 0; i < 16; i++)
 355                kfree(t->t[i]);
 356        kfree(t);
 357}
 358EXPORT_SYMBOL(gf128mul_free_64k);
 359
 360void gf128mul_64k_lle(be128 *a, struct gf128mul_64k *t)
 361{
 362        u8 *ap = (u8 *)a;
 363        be128 r[1];
 364        int i;
 365
 366        *r = t->t[0]->t[ap[0]];
 367        for (i = 1; i < 16; ++i)
 368                be128_xor(r, r, &t->t[i]->t[ap[i]]);
 369        *a = *r;
 370}
 371EXPORT_SYMBOL(gf128mul_64k_lle);
 372
 373void gf128mul_64k_bbe(be128 *a, struct gf128mul_64k *t)
 374{
 375        u8 *ap = (u8 *)a;
 376        be128 r[1];
 377        int i;
 378
 379        *r = t->t[0]->t[ap[15]];
 380        for (i = 1; i < 16; ++i)
 381                be128_xor(r, r, &t->t[i]->t[ap[15 - i]]);
 382        *a = *r;
 383}
 384EXPORT_SYMBOL(gf128mul_64k_bbe);
 385
 386/*      This version uses 4k bytes of table space.
 387    A 16 byte buffer has to be multiplied by a 16 byte key
 388    value in GF(128).  If we consider a GF(128) value in a
 389    single byte, we can construct a table of the 256 16 byte
 390    values that result from the 256 values of this byte.
 391    This requires 4096 bytes. If we take the highest byte in
 392    the buffer and use this table to get the result, we then
 393    have to multiply by x^120 to get the final value. For the
 394    next highest byte the result has to be multiplied by x^112
 395    and so on. But we can do this by accumulating the result
 396    in an accumulator starting with the result for the top
 397    byte.  We repeatedly multiply the accumulator value by
 398    x^8 and then add in (i.e. xor) the 16 bytes of the next
 399    lower byte in the buffer, stopping when we reach the
 400    lowest byte. This requires a 4096 byte table.
 401*/
 402struct gf128mul_4k *gf128mul_init_4k_lle(const be128 *g)
 403{
 404        struct gf128mul_4k *t;
 405        int j, k;
 406
 407        t = kzalloc(sizeof(*t), GFP_KERNEL);
 408        if (!t)
 409                goto out;
 410
 411        t->t[128] = *g;
 412        for (j = 64; j > 0; j >>= 1)
 413                gf128mul_x_lle(&t->t[j], &t->t[j+j]);
 414
 415        for (j = 2; j < 256; j += j)
 416                for (k = 1; k < j; ++k)
 417                        be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);
 418
 419out:
 420        return t;
 421}
 422EXPORT_SYMBOL(gf128mul_init_4k_lle);
 423
 424struct gf128mul_4k *gf128mul_init_4k_bbe(const be128 *g)
 425{
 426        struct gf128mul_4k *t;
 427        int j, k;
 428
 429        t = kzalloc(sizeof(*t), GFP_KERNEL);
 430        if (!t)
 431                goto out;
 432
 433        t->t[1] = *g;
 434        for (j = 1; j <= 64; j <<= 1)
 435                gf128mul_x_bbe(&t->t[j + j], &t->t[j]);
 436
 437        for (j = 2; j < 256; j += j)
 438                for (k = 1; k < j; ++k)
 439                        be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);
 440
 441out:
 442        return t;
 443}
 444EXPORT_SYMBOL(gf128mul_init_4k_bbe);
 445
 446void gf128mul_4k_lle(be128 *a, struct gf128mul_4k *t)
 447{
 448        u8 *ap = (u8 *)a;
 449        be128 r[1];
 450        int i = 15;
 451
 452        *r = t->t[ap[15]];
 453        while (i--) {
 454                gf128mul_x8_lle(r);
 455                be128_xor(r, r, &t->t[ap[i]]);
 456        }
 457        *a = *r;
 458}
 459EXPORT_SYMBOL(gf128mul_4k_lle);
 460
 461void gf128mul_4k_bbe(be128 *a, struct gf128mul_4k *t)
 462{
 463        u8 *ap = (u8 *)a;
 464        be128 r[1];
 465        int i = 0;
 466
 467        *r = t->t[ap[0]];
 468        while (++i < 16) {
 469                gf128mul_x8_bbe(r);
 470                be128_xor(r, r, &t->t[ap[i]]);
 471        }
 472        *a = *r;
 473}
 474EXPORT_SYMBOL(gf128mul_4k_bbe);
 475
 476MODULE_LICENSE("GPL");
 477MODULE_DESCRIPTION("Functions for multiplying elements of GF(2^128)");
 478