linux/crypto/gf128mul.c
<<
>>
Prefs
   1/* gf128mul.c - GF(2^128) multiplication functions
   2 *
   3 * Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.
   4 * Copyright (c) 2006, Rik Snel <rsnel@cube.dyndns.org>
   5 *
   6 * Based on Dr Brian Gladman's (GPL'd) work published at
   7 * http://gladman.plushost.co.uk/oldsite/cryptography_technology/index.php
   8 * See the original copyright notice below.
   9 *
  10 * This program is free software; you can redistribute it and/or modify it
  11 * under the terms of the GNU General Public License as published by the Free
  12 * Software Foundation; either version 2 of the License, or (at your option)
  13 * any later version.
  14 */
  15
  16/*
  17 ---------------------------------------------------------------------------
  18 Copyright (c) 2003, Dr Brian Gladman, Worcester, UK.   All rights reserved.
  19
  20 LICENSE TERMS
  21
  22 The free distribution and use of this software in both source and binary
  23 form is allowed (with or without changes) provided that:
  24
  25   1. distributions of this source code include the above copyright
  26      notice, this list of conditions and the following disclaimer;
  27
  28   2. distributions in binary form include the above copyright
  29      notice, this list of conditions and the following disclaimer
  30      in the documentation and/or other associated materials;
  31
  32   3. the copyright holder's name is not used to endorse products
  33      built using this software without specific written permission.
  34
  35 ALTERNATIVELY, provided that this notice is retained in full, this product
  36 may be distributed under the terms of the GNU General Public License (GPL),
  37 in which case the provisions of the GPL apply INSTEAD OF those given above.
  38
  39 DISCLAIMER
  40
  41 This software is provided 'as is' with no explicit or implied warranties
  42 in respect of its properties, including, but not limited to, correctness
  43 and/or fitness for purpose.
  44 ---------------------------------------------------------------------------
  45 Issue 31/01/2006
  46
  47 This file provides fast multiplication in GF(2^128) as required by several
  48 cryptographic authentication modes
  49*/
  50
  51#include <crypto/gf128mul.h>
  52#include <linux/kernel.h>
  53#include <linux/module.h>
  54#include <linux/slab.h>
  55
  56#define gf128mul_dat(q) { \
  57        q(0x00), q(0x01), q(0x02), q(0x03), q(0x04), q(0x05), q(0x06), q(0x07),\
  58        q(0x08), q(0x09), q(0x0a), q(0x0b), q(0x0c), q(0x0d), q(0x0e), q(0x0f),\
  59        q(0x10), q(0x11), q(0x12), q(0x13), q(0x14), q(0x15), q(0x16), q(0x17),\
  60        q(0x18), q(0x19), q(0x1a), q(0x1b), q(0x1c), q(0x1d), q(0x1e), q(0x1f),\
  61        q(0x20), q(0x21), q(0x22), q(0x23), q(0x24), q(0x25), q(0x26), q(0x27),\
  62        q(0x28), q(0x29), q(0x2a), q(0x2b), q(0x2c), q(0x2d), q(0x2e), q(0x2f),\
  63        q(0x30), q(0x31), q(0x32), q(0x33), q(0x34), q(0x35), q(0x36), q(0x37),\
  64        q(0x38), q(0x39), q(0x3a), q(0x3b), q(0x3c), q(0x3d), q(0x3e), q(0x3f),\
  65        q(0x40), q(0x41), q(0x42), q(0x43), q(0x44), q(0x45), q(0x46), q(0x47),\
  66        q(0x48), q(0x49), q(0x4a), q(0x4b), q(0x4c), q(0x4d), q(0x4e), q(0x4f),\
  67        q(0x50), q(0x51), q(0x52), q(0x53), q(0x54), q(0x55), q(0x56), q(0x57),\
  68        q(0x58), q(0x59), q(0x5a), q(0x5b), q(0x5c), q(0x5d), q(0x5e), q(0x5f),\
  69        q(0x60), q(0x61), q(0x62), q(0x63), q(0x64), q(0x65), q(0x66), q(0x67),\
  70        q(0x68), q(0x69), q(0x6a), q(0x6b), q(0x6c), q(0x6d), q(0x6e), q(0x6f),\
  71        q(0x70), q(0x71), q(0x72), q(0x73), q(0x74), q(0x75), q(0x76), q(0x77),\
  72        q(0x78), q(0x79), q(0x7a), q(0x7b), q(0x7c), q(0x7d), q(0x7e), q(0x7f),\
  73        q(0x80), q(0x81), q(0x82), q(0x83), q(0x84), q(0x85), q(0x86), q(0x87),\
  74        q(0x88), q(0x89), q(0x8a), q(0x8b), q(0x8c), q(0x8d), q(0x8e), q(0x8f),\
  75        q(0x90), q(0x91), q(0x92), q(0x93), q(0x94), q(0x95), q(0x96), q(0x97),\
  76        q(0x98), q(0x99), q(0x9a), q(0x9b), q(0x9c), q(0x9d), q(0x9e), q(0x9f),\
  77        q(0xa0), q(0xa1), q(0xa2), q(0xa3), q(0xa4), q(0xa5), q(0xa6), q(0xa7),\
  78        q(0xa8), q(0xa9), q(0xaa), q(0xab), q(0xac), q(0xad), q(0xae), q(0xaf),\
  79        q(0xb0), q(0xb1), q(0xb2), q(0xb3), q(0xb4), q(0xb5), q(0xb6), q(0xb7),\
  80        q(0xb8), q(0xb9), q(0xba), q(0xbb), q(0xbc), q(0xbd), q(0xbe), q(0xbf),\
  81        q(0xc0), q(0xc1), q(0xc2), q(0xc3), q(0xc4), q(0xc5), q(0xc6), q(0xc7),\
  82        q(0xc8), q(0xc9), q(0xca), q(0xcb), q(0xcc), q(0xcd), q(0xce), q(0xcf),\
  83        q(0xd0), q(0xd1), q(0xd2), q(0xd3), q(0xd4), q(0xd5), q(0xd6), q(0xd7),\
  84        q(0xd8), q(0xd9), q(0xda), q(0xdb), q(0xdc), q(0xdd), q(0xde), q(0xdf),\
  85        q(0xe0), q(0xe1), q(0xe2), q(0xe3), q(0xe4), q(0xe5), q(0xe6), q(0xe7),\
  86        q(0xe8), q(0xe9), q(0xea), q(0xeb), q(0xec), q(0xed), q(0xee), q(0xef),\
  87        q(0xf0), q(0xf1), q(0xf2), q(0xf3), q(0xf4), q(0xf5), q(0xf6), q(0xf7),\
  88        q(0xf8), q(0xf9), q(0xfa), q(0xfb), q(0xfc), q(0xfd), q(0xfe), q(0xff) \
  89}
  90
  91/*
  92 * Given a value i in 0..255 as the byte overflow when a field element
  93 * in GF(2^128) is multiplied by x^8, the following macro returns the
  94 * 16-bit value that must be XOR-ed into the low-degree end of the
  95 * product to reduce it modulo the polynomial x^128 + x^7 + x^2 + x + 1.
  96 *
  97 * There are two versions of the macro, and hence two tables: one for
  98 * the "be" convention where the highest-order bit is the coefficient of
  99 * the highest-degree polynomial term, and one for the "le" convention
 100 * where the highest-order bit is the coefficient of the lowest-degree
 101 * polynomial term.  In both cases the values are stored in CPU byte
 102 * endianness such that the coefficients are ordered consistently across
 103 * bytes, i.e. in the "be" table bits 15..0 of the stored value
 104 * correspond to the coefficients of x^15..x^0, and in the "le" table
 105 * bits 15..0 correspond to the coefficients of x^0..x^15.
 106 *
 107 * Therefore, provided that the appropriate byte endianness conversions
 108 * are done by the multiplication functions (and these must be in place
 109 * anyway to support both little endian and big endian CPUs), the "be"
 110 * table can be used for multiplications of both "bbe" and "ble"
 111 * elements, and the "le" table can be used for multiplications of both
 112 * "lle" and "lbe" elements.
 113 */
 114
 115#define xda_be(i) ( \
 116        (i & 0x80 ? 0x4380 : 0) ^ (i & 0x40 ? 0x21c0 : 0) ^ \
 117        (i & 0x20 ? 0x10e0 : 0) ^ (i & 0x10 ? 0x0870 : 0) ^ \
 118        (i & 0x08 ? 0x0438 : 0) ^ (i & 0x04 ? 0x021c : 0) ^ \
 119        (i & 0x02 ? 0x010e : 0) ^ (i & 0x01 ? 0x0087 : 0) \
 120)
 121
 122#define xda_le(i) ( \
 123        (i & 0x80 ? 0xe100 : 0) ^ (i & 0x40 ? 0x7080 : 0) ^ \
 124        (i & 0x20 ? 0x3840 : 0) ^ (i & 0x10 ? 0x1c20 : 0) ^ \
 125        (i & 0x08 ? 0x0e10 : 0) ^ (i & 0x04 ? 0x0708 : 0) ^ \
 126        (i & 0x02 ? 0x0384 : 0) ^ (i & 0x01 ? 0x01c2 : 0) \
 127)
 128
 129static const u16 gf128mul_table_le[256] = gf128mul_dat(xda_le);
 130static const u16 gf128mul_table_be[256] = gf128mul_dat(xda_be);
 131
 132/*
 133 * The following functions multiply a field element by x^8 in
 134 * the polynomial field representation.  They use 64-bit word operations
 135 * to gain speed but compensate for machine endianness and hence work
 136 * correctly on both styles of machine.
 137 */
 138
 139static void gf128mul_x8_lle(be128 *x)
 140{
 141        u64 a = be64_to_cpu(x->a);
 142        u64 b = be64_to_cpu(x->b);
 143        u64 _tt = gf128mul_table_le[b & 0xff];
 144
 145        x->b = cpu_to_be64((b >> 8) | (a << 56));
 146        x->a = cpu_to_be64((a >> 8) ^ (_tt << 48));
 147}
 148
 149static void gf128mul_x8_bbe(be128 *x)
 150{
 151        u64 a = be64_to_cpu(x->a);
 152        u64 b = be64_to_cpu(x->b);
 153        u64 _tt = gf128mul_table_be[a >> 56];
 154
 155        x->a = cpu_to_be64((a << 8) | (b >> 56));
 156        x->b = cpu_to_be64((b << 8) ^ _tt);
 157}
 158
 159void gf128mul_lle(be128 *r, const be128 *b)
 160{
 161        be128 p[8];
 162        int i;
 163
 164        p[0] = *r;
 165        for (i = 0; i < 7; ++i)
 166                gf128mul_x_lle(&p[i + 1], &p[i]);
 167
 168        memset(r, 0, sizeof(*r));
 169        for (i = 0;;) {
 170                u8 ch = ((u8 *)b)[15 - i];
 171
 172                if (ch & 0x80)
 173                        be128_xor(r, r, &p[0]);
 174                if (ch & 0x40)
 175                        be128_xor(r, r, &p[1]);
 176                if (ch & 0x20)
 177                        be128_xor(r, r, &p[2]);
 178                if (ch & 0x10)
 179                        be128_xor(r, r, &p[3]);
 180                if (ch & 0x08)
 181                        be128_xor(r, r, &p[4]);
 182                if (ch & 0x04)
 183                        be128_xor(r, r, &p[5]);
 184                if (ch & 0x02)
 185                        be128_xor(r, r, &p[6]);
 186                if (ch & 0x01)
 187                        be128_xor(r, r, &p[7]);
 188
 189                if (++i >= 16)
 190                        break;
 191
 192                gf128mul_x8_lle(r);
 193        }
 194}
 195EXPORT_SYMBOL(gf128mul_lle);
 196
 197void gf128mul_bbe(be128 *r, const be128 *b)
 198{
 199        be128 p[8];
 200        int i;
 201
 202        p[0] = *r;
 203        for (i = 0; i < 7; ++i)
 204                gf128mul_x_bbe(&p[i + 1], &p[i]);
 205
 206        memset(r, 0, sizeof(*r));
 207        for (i = 0;;) {
 208                u8 ch = ((u8 *)b)[i];
 209
 210                if (ch & 0x80)
 211                        be128_xor(r, r, &p[7]);
 212                if (ch & 0x40)
 213                        be128_xor(r, r, &p[6]);
 214                if (ch & 0x20)
 215                        be128_xor(r, r, &p[5]);
 216                if (ch & 0x10)
 217                        be128_xor(r, r, &p[4]);
 218                if (ch & 0x08)
 219                        be128_xor(r, r, &p[3]);
 220                if (ch & 0x04)
 221                        be128_xor(r, r, &p[2]);
 222                if (ch & 0x02)
 223                        be128_xor(r, r, &p[1]);
 224                if (ch & 0x01)
 225                        be128_xor(r, r, &p[0]);
 226
 227                if (++i >= 16)
 228                        break;
 229
 230                gf128mul_x8_bbe(r);
 231        }
 232}
 233EXPORT_SYMBOL(gf128mul_bbe);
 234
 235/*      This version uses 64k bytes of table space.
 236    A 16 byte buffer has to be multiplied by a 16 byte key
 237    value in GF(2^128).  If we consider a GF(2^128) value in
 238    the buffer's lowest byte, we can construct a table of
 239    the 256 16 byte values that result from the 256 values
 240    of this byte.  This requires 4096 bytes. But we also
 241    need tables for each of the 16 higher bytes in the
 242    buffer as well, which makes 64 kbytes in total.
 243*/
 244/* additional explanation
 245 * t[0][BYTE] contains g*BYTE
 246 * t[1][BYTE] contains g*x^8*BYTE
 247 *  ..
 248 * t[15][BYTE] contains g*x^120*BYTE */
 249struct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g)
 250{
 251        struct gf128mul_64k *t;
 252        int i, j, k;
 253
 254        t = kzalloc(sizeof(*t), GFP_KERNEL);
 255        if (!t)
 256                goto out;
 257
 258        for (i = 0; i < 16; i++) {
 259                t->t[i] = kzalloc(sizeof(*t->t[i]), GFP_KERNEL);
 260                if (!t->t[i]) {
 261                        gf128mul_free_64k(t);
 262                        t = NULL;
 263                        goto out;
 264                }
 265        }
 266
 267        t->t[0]->t[1] = *g;
 268        for (j = 1; j <= 64; j <<= 1)
 269                gf128mul_x_bbe(&t->t[0]->t[j + j], &t->t[0]->t[j]);
 270
 271        for (i = 0;;) {
 272                for (j = 2; j < 256; j += j)
 273                        for (k = 1; k < j; ++k)
 274                                be128_xor(&t->t[i]->t[j + k],
 275                                          &t->t[i]->t[j], &t->t[i]->t[k]);
 276
 277                if (++i >= 16)
 278                        break;
 279
 280                for (j = 128; j > 0; j >>= 1) {
 281                        t->t[i]->t[j] = t->t[i - 1]->t[j];
 282                        gf128mul_x8_bbe(&t->t[i]->t[j]);
 283                }
 284        }
 285
 286out:
 287        return t;
 288}
 289EXPORT_SYMBOL(gf128mul_init_64k_bbe);
 290
 291void gf128mul_free_64k(struct gf128mul_64k *t)
 292{
 293        int i;
 294
 295        for (i = 0; i < 16; i++)
 296                kzfree(t->t[i]);
 297        kzfree(t);
 298}
 299EXPORT_SYMBOL(gf128mul_free_64k);
 300
 301void gf128mul_64k_bbe(be128 *a, const struct gf128mul_64k *t)
 302{
 303        u8 *ap = (u8 *)a;
 304        be128 r[1];
 305        int i;
 306
 307        *r = t->t[0]->t[ap[15]];
 308        for (i = 1; i < 16; ++i)
 309                be128_xor(r, r, &t->t[i]->t[ap[15 - i]]);
 310        *a = *r;
 311}
 312EXPORT_SYMBOL(gf128mul_64k_bbe);
 313
 314/*      This version uses 4k bytes of table space.
 315    A 16 byte buffer has to be multiplied by a 16 byte key
 316    value in GF(2^128).  If we consider a GF(2^128) value in a
 317    single byte, we can construct a table of the 256 16 byte
 318    values that result from the 256 values of this byte.
 319    This requires 4096 bytes. If we take the highest byte in
 320    the buffer and use this table to get the result, we then
 321    have to multiply by x^120 to get the final value. For the
 322    next highest byte the result has to be multiplied by x^112
 323    and so on. But we can do this by accumulating the result
 324    in an accumulator starting with the result for the top
 325    byte.  We repeatedly multiply the accumulator value by
 326    x^8 and then add in (i.e. xor) the 16 bytes of the next
 327    lower byte in the buffer, stopping when we reach the
 328    lowest byte. This requires a 4096 byte table.
 329*/
 330struct gf128mul_4k *gf128mul_init_4k_lle(const be128 *g)
 331{
 332        struct gf128mul_4k *t;
 333        int j, k;
 334
 335        t = kzalloc(sizeof(*t), GFP_KERNEL);
 336        if (!t)
 337                goto out;
 338
 339        t->t[128] = *g;
 340        for (j = 64; j > 0; j >>= 1)
 341                gf128mul_x_lle(&t->t[j], &t->t[j+j]);
 342
 343        for (j = 2; j < 256; j += j)
 344                for (k = 1; k < j; ++k)
 345                        be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);
 346
 347out:
 348        return t;
 349}
 350EXPORT_SYMBOL(gf128mul_init_4k_lle);
 351
 352struct gf128mul_4k *gf128mul_init_4k_bbe(const be128 *g)
 353{
 354        struct gf128mul_4k *t;
 355        int j, k;
 356
 357        t = kzalloc(sizeof(*t), GFP_KERNEL);
 358        if (!t)
 359                goto out;
 360
 361        t->t[1] = *g;
 362        for (j = 1; j <= 64; j <<= 1)
 363                gf128mul_x_bbe(&t->t[j + j], &t->t[j]);
 364
 365        for (j = 2; j < 256; j += j)
 366                for (k = 1; k < j; ++k)
 367                        be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);
 368
 369out:
 370        return t;
 371}
 372EXPORT_SYMBOL(gf128mul_init_4k_bbe);
 373
 374void gf128mul_4k_lle(be128 *a, const struct gf128mul_4k *t)
 375{
 376        u8 *ap = (u8 *)a;
 377        be128 r[1];
 378        int i = 15;
 379
 380        *r = t->t[ap[15]];
 381        while (i--) {
 382                gf128mul_x8_lle(r);
 383                be128_xor(r, r, &t->t[ap[i]]);
 384        }
 385        *a = *r;
 386}
 387EXPORT_SYMBOL(gf128mul_4k_lle);
 388
 389void gf128mul_4k_bbe(be128 *a, const struct gf128mul_4k *t)
 390{
 391        u8 *ap = (u8 *)a;
 392        be128 r[1];
 393        int i = 0;
 394
 395        *r = t->t[ap[0]];
 396        while (++i < 16) {
 397                gf128mul_x8_bbe(r);
 398                be128_xor(r, r, &t->t[ap[i]]);
 399        }
 400        *a = *r;
 401}
 402EXPORT_SYMBOL(gf128mul_4k_bbe);
 403
 404MODULE_LICENSE("GPL");
 405MODULE_DESCRIPTION("Functions for multiplying elements of GF(2^128)");
 406