linux/arch/x86/crypto/des3_ede-asm_64.S
<<
>>
Prefs
   1/*
   2 * des3_ede-asm_64.S  -  x86-64 assembly implementation of 3DES cipher
   3 *
   4 * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
   5 *
   6 * This program is free software; you can redistribute it and/or modify
   7 * it under the terms of the GNU General Public License as published by
   8 * the Free Software Foundation; either version 2 of the License, or
   9 * (at your option) any later version.
  10 *
  11 * This program is distributed in the hope that it will be useful,
  12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 * GNU General Public License for more details.
  15 */
  16
  17#include <linux/linkage.h>
  18
  19.file "des3_ede-asm_64.S"
  20.text
  21
  22#define s1 .L_s1
  23#define s2 ((s1) + (64*8))
  24#define s3 ((s2) + (64*8))
  25#define s4 ((s3) + (64*8))
  26#define s5 ((s4) + (64*8))
  27#define s6 ((s5) + (64*8))
  28#define s7 ((s6) + (64*8))
  29#define s8 ((s7) + (64*8))
  30
  31/* register macros */
  32#define CTX %rdi
  33
  34#define RL0 %r8
  35#define RL1 %r9
  36#define RL2 %r10
  37
  38#define RL0d %r8d
  39#define RL1d %r9d
  40#define RL2d %r10d
  41
  42#define RR0 %r11
  43#define RR1 %r12
  44#define RR2 %r13
  45
  46#define RR0d %r11d
  47#define RR1d %r12d
  48#define RR2d %r13d
  49
  50#define RW0 %rax
  51#define RW1 %rbx
  52#define RW2 %rcx
  53
  54#define RW0d %eax
  55#define RW1d %ebx
  56#define RW2d %ecx
  57
  58#define RW0bl %al
  59#define RW1bl %bl
  60#define RW2bl %cl
  61
  62#define RW0bh %ah
  63#define RW1bh %bh
  64#define RW2bh %ch
  65
  66#define RT0 %r15
  67#define RT1 %rsi
  68#define RT2 %r14
  69#define RT3 %rdx
  70
  71#define RT0d %r15d
  72#define RT1d %esi
  73#define RT2d %r14d
  74#define RT3d %edx
  75
  76/***********************************************************************
  77 * 1-way 3DES
  78 ***********************************************************************/
  79#define do_permutation(a, b, offset, mask) \
  80        movl a, RT0d; \
  81        shrl $(offset), RT0d; \
  82        xorl b, RT0d; \
  83        andl $(mask), RT0d; \
  84        xorl RT0d, b; \
  85        shll $(offset), RT0d; \
  86        xorl RT0d, a;
  87
  88#define expand_to_64bits(val, mask) \
  89        movl val##d, RT0d; \
  90        rorl $4, RT0d; \
  91        shlq $32, RT0; \
  92        orq RT0, val; \
  93        andq mask, val;
  94
  95#define compress_to_64bits(val) \
  96        movq val, RT0; \
  97        shrq $32, RT0; \
  98        roll $4, RT0d; \
  99        orl RT0d, val##d;
 100
 101#define initial_permutation(left, right) \
 102        do_permutation(left##d, right##d,  4, 0x0f0f0f0f); \
 103        do_permutation(left##d, right##d, 16, 0x0000ffff); \
 104        do_permutation(right##d, left##d,  2, 0x33333333); \
 105        do_permutation(right##d, left##d,  8, 0x00ff00ff); \
 106        movabs $0x3f3f3f3f3f3f3f3f, RT3; \
 107        movl left##d, RW0d; \
 108        roll $1, right##d; \
 109        xorl right##d, RW0d; \
 110        andl $0xaaaaaaaa, RW0d; \
 111        xorl RW0d, left##d; \
 112        xorl RW0d, right##d; \
 113        roll $1, left##d; \
 114        expand_to_64bits(right, RT3); \
 115        expand_to_64bits(left, RT3);
 116
 117#define final_permutation(left, right) \
 118        compress_to_64bits(right); \
 119        compress_to_64bits(left); \
 120        movl right##d, RW0d; \
 121        rorl $1, left##d; \
 122        xorl left##d, RW0d; \
 123        andl $0xaaaaaaaa, RW0d; \
 124        xorl RW0d, right##d; \
 125        xorl RW0d, left##d; \
 126        rorl $1, right##d; \
 127        do_permutation(right##d, left##d,  8, 0x00ff00ff); \
 128        do_permutation(right##d, left##d,  2, 0x33333333); \
 129        do_permutation(left##d, right##d, 16, 0x0000ffff); \
 130        do_permutation(left##d, right##d,  4, 0x0f0f0f0f);
 131
 132#define round1(n, from, to, load_next_key) \
 133        xorq from, RW0; \
 134        \
 135        movzbl RW0bl, RT0d; \
 136        movzbl RW0bh, RT1d; \
 137        shrq $16, RW0; \
 138        movzbl RW0bl, RT2d; \
 139        movzbl RW0bh, RT3d; \
 140        shrq $16, RW0; \
 141        movq s8(, RT0, 8), RT0; \
 142        xorq s6(, RT1, 8), to; \
 143        movzbl RW0bl, RL1d; \
 144        movzbl RW0bh, RT1d; \
 145        shrl $16, RW0d; \
 146        xorq s4(, RT2, 8), RT0; \
 147        xorq s2(, RT3, 8), to; \
 148        movzbl RW0bl, RT2d; \
 149        movzbl RW0bh, RT3d; \
 150        xorq s7(, RL1, 8), RT0; \
 151        xorq s5(, RT1, 8), to; \
 152        xorq s3(, RT2, 8), RT0; \
 153        load_next_key(n, RW0); \
 154        xorq RT0, to; \
 155        xorq s1(, RT3, 8), to; \
 156
 157#define load_next_key(n, RWx) \
 158        movq (((n) + 1) * 8)(CTX), RWx;
 159
 160#define dummy2(a, b) /*_*/
 161
 162#define read_block(io, left, right) \
 163        movl    (io), left##d; \
 164        movl   4(io), right##d; \
 165        bswapl left##d; \
 166        bswapl right##d;
 167
 168#define write_block(io, left, right) \
 169        bswapl left##d; \
 170        bswapl right##d; \
 171        movl   left##d,   (io); \
 172        movl   right##d, 4(io);
 173
 174ENTRY(des3_ede_x86_64_crypt_blk)
 175        /* input:
 176         *      %rdi: round keys, CTX
 177         *      %rsi: dst
 178         *      %rdx: src
 179         */
 180        pushq %rbx;
 181        pushq %r12;
 182        pushq %r13;
 183        pushq %r14;
 184        pushq %r15;
 185
 186        pushq %rsi; /* dst */
 187
 188        read_block(%rdx, RL0, RR0);
 189        initial_permutation(RL0, RR0);
 190
 191        movq (CTX), RW0;
 192
 193        round1(0, RR0, RL0, load_next_key);
 194        round1(1, RL0, RR0, load_next_key);
 195        round1(2, RR0, RL0, load_next_key);
 196        round1(3, RL0, RR0, load_next_key);
 197        round1(4, RR0, RL0, load_next_key);
 198        round1(5, RL0, RR0, load_next_key);
 199        round1(6, RR0, RL0, load_next_key);
 200        round1(7, RL0, RR0, load_next_key);
 201        round1(8, RR0, RL0, load_next_key);
 202        round1(9, RL0, RR0, load_next_key);
 203        round1(10, RR0, RL0, load_next_key);
 204        round1(11, RL0, RR0, load_next_key);
 205        round1(12, RR0, RL0, load_next_key);
 206        round1(13, RL0, RR0, load_next_key);
 207        round1(14, RR0, RL0, load_next_key);
 208        round1(15, RL0, RR0, load_next_key);
 209
 210        round1(16+0, RL0, RR0, load_next_key);
 211        round1(16+1, RR0, RL0, load_next_key);
 212        round1(16+2, RL0, RR0, load_next_key);
 213        round1(16+3, RR0, RL0, load_next_key);
 214        round1(16+4, RL0, RR0, load_next_key);
 215        round1(16+5, RR0, RL0, load_next_key);
 216        round1(16+6, RL0, RR0, load_next_key);
 217        round1(16+7, RR0, RL0, load_next_key);
 218        round1(16+8, RL0, RR0, load_next_key);
 219        round1(16+9, RR0, RL0, load_next_key);
 220        round1(16+10, RL0, RR0, load_next_key);
 221        round1(16+11, RR0, RL0, load_next_key);
 222        round1(16+12, RL0, RR0, load_next_key);
 223        round1(16+13, RR0, RL0, load_next_key);
 224        round1(16+14, RL0, RR0, load_next_key);
 225        round1(16+15, RR0, RL0, load_next_key);
 226
 227        round1(32+0, RR0, RL0, load_next_key);
 228        round1(32+1, RL0, RR0, load_next_key);
 229        round1(32+2, RR0, RL0, load_next_key);
 230        round1(32+3, RL0, RR0, load_next_key);
 231        round1(32+4, RR0, RL0, load_next_key);
 232        round1(32+5, RL0, RR0, load_next_key);
 233        round1(32+6, RR0, RL0, load_next_key);
 234        round1(32+7, RL0, RR0, load_next_key);
 235        round1(32+8, RR0, RL0, load_next_key);
 236        round1(32+9, RL0, RR0, load_next_key);
 237        round1(32+10, RR0, RL0, load_next_key);
 238        round1(32+11, RL0, RR0, load_next_key);
 239        round1(32+12, RR0, RL0, load_next_key);
 240        round1(32+13, RL0, RR0, load_next_key);
 241        round1(32+14, RR0, RL0, load_next_key);
 242        round1(32+15, RL0, RR0, dummy2);
 243
 244        final_permutation(RR0, RL0);
 245
 246        popq %rsi /* dst */
 247        write_block(%rsi, RR0, RL0);
 248
 249        popq %r15;
 250        popq %r14;
 251        popq %r13;
 252        popq %r12;
 253        popq %rbx;
 254
 255        ret;
 256ENDPROC(des3_ede_x86_64_crypt_blk)
 257
 258/***********************************************************************
 259 * 3-way 3DES
 260 ***********************************************************************/
 261#define expand_to_64bits(val, mask) \
 262        movl val##d, RT0d; \
 263        rorl $4, RT0d; \
 264        shlq $32, RT0; \
 265        orq RT0, val; \
 266        andq mask, val;
 267
 268#define compress_to_64bits(val) \
 269        movq val, RT0; \
 270        shrq $32, RT0; \
 271        roll $4, RT0d; \
 272        orl RT0d, val##d;
 273
 274#define initial_permutation3(left, right) \
 275        do_permutation(left##0d, right##0d,  4, 0x0f0f0f0f); \
 276        do_permutation(left##0d, right##0d, 16, 0x0000ffff); \
 277          do_permutation(left##1d, right##1d,  4, 0x0f0f0f0f); \
 278          do_permutation(left##1d, right##1d, 16, 0x0000ffff); \
 279            do_permutation(left##2d, right##2d,  4, 0x0f0f0f0f); \
 280            do_permutation(left##2d, right##2d, 16, 0x0000ffff); \
 281            \
 282        do_permutation(right##0d, left##0d,  2, 0x33333333); \
 283        do_permutation(right##0d, left##0d,  8, 0x00ff00ff); \
 284          do_permutation(right##1d, left##1d,  2, 0x33333333); \
 285          do_permutation(right##1d, left##1d,  8, 0x00ff00ff); \
 286            do_permutation(right##2d, left##2d,  2, 0x33333333); \
 287            do_permutation(right##2d, left##2d,  8, 0x00ff00ff); \
 288            \
 289        movabs $0x3f3f3f3f3f3f3f3f, RT3; \
 290            \
 291        movl left##0d, RW0d; \
 292        roll $1, right##0d; \
 293        xorl right##0d, RW0d; \
 294        andl $0xaaaaaaaa, RW0d; \
 295        xorl RW0d, left##0d; \
 296        xorl RW0d, right##0d; \
 297        roll $1, left##0d; \
 298        expand_to_64bits(right##0, RT3); \
 299        expand_to_64bits(left##0, RT3); \
 300          movl left##1d, RW1d; \
 301          roll $1, right##1d; \
 302          xorl right##1d, RW1d; \
 303          andl $0xaaaaaaaa, RW1d; \
 304          xorl RW1d, left##1d; \
 305          xorl RW1d, right##1d; \
 306          roll $1, left##1d; \
 307          expand_to_64bits(right##1, RT3); \
 308          expand_to_64bits(left##1, RT3); \
 309            movl left##2d, RW2d; \
 310            roll $1, right##2d; \
 311            xorl right##2d, RW2d; \
 312            andl $0xaaaaaaaa, RW2d; \
 313            xorl RW2d, left##2d; \
 314            xorl RW2d, right##2d; \
 315            roll $1, left##2d; \
 316            expand_to_64bits(right##2, RT3); \
 317            expand_to_64bits(left##2, RT3);
 318
 319#define final_permutation3(left, right) \
 320        compress_to_64bits(right##0); \
 321        compress_to_64bits(left##0); \
 322        movl right##0d, RW0d; \
 323        rorl $1, left##0d; \
 324        xorl left##0d, RW0d; \
 325        andl $0xaaaaaaaa, RW0d; \
 326        xorl RW0d, right##0d; \
 327        xorl RW0d, left##0d; \
 328        rorl $1, right##0d; \
 329          compress_to_64bits(right##1); \
 330          compress_to_64bits(left##1); \
 331          movl right##1d, RW1d; \
 332          rorl $1, left##1d; \
 333          xorl left##1d, RW1d; \
 334          andl $0xaaaaaaaa, RW1d; \
 335          xorl RW1d, right##1d; \
 336          xorl RW1d, left##1d; \
 337          rorl $1, right##1d; \
 338            compress_to_64bits(right##2); \
 339            compress_to_64bits(left##2); \
 340            movl right##2d, RW2d; \
 341            rorl $1, left##2d; \
 342            xorl left##2d, RW2d; \
 343            andl $0xaaaaaaaa, RW2d; \
 344            xorl RW2d, right##2d; \
 345            xorl RW2d, left##2d; \
 346            rorl $1, right##2d; \
 347            \
 348        do_permutation(right##0d, left##0d,  8, 0x00ff00ff); \
 349        do_permutation(right##0d, left##0d,  2, 0x33333333); \
 350          do_permutation(right##1d, left##1d,  8, 0x00ff00ff); \
 351          do_permutation(right##1d, left##1d,  2, 0x33333333); \
 352            do_permutation(right##2d, left##2d,  8, 0x00ff00ff); \
 353            do_permutation(right##2d, left##2d,  2, 0x33333333); \
 354            \
 355        do_permutation(left##0d, right##0d, 16, 0x0000ffff); \
 356        do_permutation(left##0d, right##0d,  4, 0x0f0f0f0f); \
 357          do_permutation(left##1d, right##1d, 16, 0x0000ffff); \
 358          do_permutation(left##1d, right##1d,  4, 0x0f0f0f0f); \
 359            do_permutation(left##2d, right##2d, 16, 0x0000ffff); \
 360            do_permutation(left##2d, right##2d,  4, 0x0f0f0f0f);
 361
 362#define round3(n, from, to, load_next_key, do_movq) \
 363        xorq from##0, RW0; \
 364        movzbl RW0bl, RT3d; \
 365        movzbl RW0bh, RT1d; \
 366        shrq $16, RW0; \
 367        xorq s8(, RT3, 8), to##0; \
 368        xorq s6(, RT1, 8), to##0; \
 369        movzbl RW0bl, RT3d; \
 370        movzbl RW0bh, RT1d; \
 371        shrq $16, RW0; \
 372        xorq s4(, RT3, 8), to##0; \
 373        xorq s2(, RT1, 8), to##0; \
 374        movzbl RW0bl, RT3d; \
 375        movzbl RW0bh, RT1d; \
 376        shrl $16, RW0d; \
 377        xorq s7(, RT3, 8), to##0; \
 378        xorq s5(, RT1, 8), to##0; \
 379        movzbl RW0bl, RT3d; \
 380        movzbl RW0bh, RT1d; \
 381        load_next_key(n, RW0); \
 382        xorq s3(, RT3, 8), to##0; \
 383        xorq s1(, RT1, 8), to##0; \
 384                xorq from##1, RW1; \
 385                movzbl RW1bl, RT3d; \
 386                movzbl RW1bh, RT1d; \
 387                shrq $16, RW1; \
 388                xorq s8(, RT3, 8), to##1; \
 389                xorq s6(, RT1, 8), to##1; \
 390                movzbl RW1bl, RT3d; \
 391                movzbl RW1bh, RT1d; \
 392                shrq $16, RW1; \
 393                xorq s4(, RT3, 8), to##1; \
 394                xorq s2(, RT1, 8), to##1; \
 395                movzbl RW1bl, RT3d; \
 396                movzbl RW1bh, RT1d; \
 397                shrl $16, RW1d; \
 398                xorq s7(, RT3, 8), to##1; \
 399                xorq s5(, RT1, 8), to##1; \
 400                movzbl RW1bl, RT3d; \
 401                movzbl RW1bh, RT1d; \
 402                do_movq(RW0, RW1); \
 403                xorq s3(, RT3, 8), to##1; \
 404                xorq s1(, RT1, 8), to##1; \
 405                        xorq from##2, RW2; \
 406                        movzbl RW2bl, RT3d; \
 407                        movzbl RW2bh, RT1d; \
 408                        shrq $16, RW2; \
 409                        xorq s8(, RT3, 8), to##2; \
 410                        xorq s6(, RT1, 8), to##2; \
 411                        movzbl RW2bl, RT3d; \
 412                        movzbl RW2bh, RT1d; \
 413                        shrq $16, RW2; \
 414                        xorq s4(, RT3, 8), to##2; \
 415                        xorq s2(, RT1, 8), to##2; \
 416                        movzbl RW2bl, RT3d; \
 417                        movzbl RW2bh, RT1d; \
 418                        shrl $16, RW2d; \
 419                        xorq s7(, RT3, 8), to##2; \
 420                        xorq s5(, RT1, 8), to##2; \
 421                        movzbl RW2bl, RT3d; \
 422                        movzbl RW2bh, RT1d; \
 423                        do_movq(RW0, RW2); \
 424                        xorq s3(, RT3, 8), to##2; \
 425                        xorq s1(, RT1, 8), to##2;
 426
 427#define __movq(src, dst) \
 428        movq src, dst;
 429
 430ENTRY(des3_ede_x86_64_crypt_blk_3way)
 431        /* input:
 432         *      %rdi: ctx, round keys
 433         *      %rsi: dst (3 blocks)
 434         *      %rdx: src (3 blocks)
 435         */
 436
 437        pushq %rbx;
 438        pushq %r12;
 439        pushq %r13;
 440        pushq %r14;
 441        pushq %r15;
 442
 443        pushq %rsi /* dst */
 444
 445        /* load input */
 446        movl 0 * 4(%rdx), RL0d;
 447        movl 1 * 4(%rdx), RR0d;
 448        movl 2 * 4(%rdx), RL1d;
 449        movl 3 * 4(%rdx), RR1d;
 450        movl 4 * 4(%rdx), RL2d;
 451        movl 5 * 4(%rdx), RR2d;
 452
 453        bswapl RL0d;
 454        bswapl RR0d;
 455        bswapl RL1d;
 456        bswapl RR1d;
 457        bswapl RL2d;
 458        bswapl RR2d;
 459
 460        initial_permutation3(RL, RR);
 461
 462        movq 0(CTX), RW0;
 463        movq RW0, RW1;
 464        movq RW0, RW2;
 465
 466        round3(0, RR, RL, load_next_key, __movq);
 467        round3(1, RL, RR, load_next_key, __movq);
 468        round3(2, RR, RL, load_next_key, __movq);
 469        round3(3, RL, RR, load_next_key, __movq);
 470        round3(4, RR, RL, load_next_key, __movq);
 471        round3(5, RL, RR, load_next_key, __movq);
 472        round3(6, RR, RL, load_next_key, __movq);
 473        round3(7, RL, RR, load_next_key, __movq);
 474        round3(8, RR, RL, load_next_key, __movq);
 475        round3(9, RL, RR, load_next_key, __movq);
 476        round3(10, RR, RL, load_next_key, __movq);
 477        round3(11, RL, RR, load_next_key, __movq);
 478        round3(12, RR, RL, load_next_key, __movq);
 479        round3(13, RL, RR, load_next_key, __movq);
 480        round3(14, RR, RL, load_next_key, __movq);
 481        round3(15, RL, RR, load_next_key, __movq);
 482
 483        round3(16+0, RL, RR, load_next_key, __movq);
 484        round3(16+1, RR, RL, load_next_key, __movq);
 485        round3(16+2, RL, RR, load_next_key, __movq);
 486        round3(16+3, RR, RL, load_next_key, __movq);
 487        round3(16+4, RL, RR, load_next_key, __movq);
 488        round3(16+5, RR, RL, load_next_key, __movq);
 489        round3(16+6, RL, RR, load_next_key, __movq);
 490        round3(16+7, RR, RL, load_next_key, __movq);
 491        round3(16+8, RL, RR, load_next_key, __movq);
 492        round3(16+9, RR, RL, load_next_key, __movq);
 493        round3(16+10, RL, RR, load_next_key, __movq);
 494        round3(16+11, RR, RL, load_next_key, __movq);
 495        round3(16+12, RL, RR, load_next_key, __movq);
 496        round3(16+13, RR, RL, load_next_key, __movq);
 497        round3(16+14, RL, RR, load_next_key, __movq);
 498        round3(16+15, RR, RL, load_next_key, __movq);
 499
 500        round3(32+0, RR, RL, load_next_key, __movq);
 501        round3(32+1, RL, RR, load_next_key, __movq);
 502        round3(32+2, RR, RL, load_next_key, __movq);
 503        round3(32+3, RL, RR, load_next_key, __movq);
 504        round3(32+4, RR, RL, load_next_key, __movq);
 505        round3(32+5, RL, RR, load_next_key, __movq);
 506        round3(32+6, RR, RL, load_next_key, __movq);
 507        round3(32+7, RL, RR, load_next_key, __movq);
 508        round3(32+8, RR, RL, load_next_key, __movq);
 509        round3(32+9, RL, RR, load_next_key, __movq);
 510        round3(32+10, RR, RL, load_next_key, __movq);
 511        round3(32+11, RL, RR, load_next_key, __movq);
 512        round3(32+12, RR, RL, load_next_key, __movq);
 513        round3(32+13, RL, RR, load_next_key, __movq);
 514        round3(32+14, RR, RL, load_next_key, __movq);
 515        round3(32+15, RL, RR, dummy2, dummy2);
 516
 517        final_permutation3(RR, RL);
 518
 519        bswapl RR0d;
 520        bswapl RL0d;
 521        bswapl RR1d;
 522        bswapl RL1d;
 523        bswapl RR2d;
 524        bswapl RL2d;
 525
 526        popq %rsi /* dst */
 527        movl RR0d, 0 * 4(%rsi);
 528        movl RL0d, 1 * 4(%rsi);
 529        movl RR1d, 2 * 4(%rsi);
 530        movl RL1d, 3 * 4(%rsi);
 531        movl RR2d, 4 * 4(%rsi);
 532        movl RL2d, 5 * 4(%rsi);
 533
 534        popq %r15;
 535        popq %r14;
 536        popq %r13;
 537        popq %r12;
 538        popq %rbx;
 539
 540        ret;
 541ENDPROC(des3_ede_x86_64_crypt_blk_3way)
 542
 543.section        .rodata, "a", @progbits
 544.align 16
 545.L_s1:
 546        .quad 0x0010100001010400, 0x0000000000000000
 547        .quad 0x0000100000010000, 0x0010100001010404
 548        .quad 0x0010100001010004, 0x0000100000010404
 549        .quad 0x0000000000000004, 0x0000100000010000
 550        .quad 0x0000000000000400, 0x0010100001010400
 551        .quad 0x0010100001010404, 0x0000000000000400
 552        .quad 0x0010000001000404, 0x0010100001010004
 553        .quad 0x0010000001000000, 0x0000000000000004
 554        .quad 0x0000000000000404, 0x0010000001000400
 555        .quad 0x0010000001000400, 0x0000100000010400
 556        .quad 0x0000100000010400, 0x0010100001010000
 557        .quad 0x0010100001010000, 0x0010000001000404
 558        .quad 0x0000100000010004, 0x0010000001000004
 559        .quad 0x0010000001000004, 0x0000100000010004
 560        .quad 0x0000000000000000, 0x0000000000000404
 561        .quad 0x0000100000010404, 0x0010000001000000
 562        .quad 0x0000100000010000, 0x0010100001010404
 563        .quad 0x0000000000000004, 0x0010100001010000
 564        .quad 0x0010100001010400, 0x0010000001000000
 565        .quad 0x0010000001000000, 0x0000000000000400
 566        .quad 0x0010100001010004, 0x0000100000010000
 567        .quad 0x0000100000010400, 0x0010000001000004
 568        .quad 0x0000000000000400, 0x0000000000000004
 569        .quad 0x0010000001000404, 0x0000100000010404
 570        .quad 0x0010100001010404, 0x0000100000010004
 571        .quad 0x0010100001010000, 0x0010000001000404
 572        .quad 0x0010000001000004, 0x0000000000000404
 573        .quad 0x0000100000010404, 0x0010100001010400
 574        .quad 0x0000000000000404, 0x0010000001000400
 575        .quad 0x0010000001000400, 0x0000000000000000
 576        .quad 0x0000100000010004, 0x0000100000010400
 577        .quad 0x0000000000000000, 0x0010100001010004
 578.L_s2:
 579        .quad 0x0801080200100020, 0x0800080000000000
 580        .quad 0x0000080000000000, 0x0001080200100020
 581        .quad 0x0001000000100000, 0x0000000200000020
 582        .quad 0x0801000200100020, 0x0800080200000020
 583        .quad 0x0800000200000020, 0x0801080200100020
 584        .quad 0x0801080000100000, 0x0800000000000000
 585        .quad 0x0800080000000000, 0x0001000000100000
 586        .quad 0x0000000200000020, 0x0801000200100020
 587        .quad 0x0001080000100000, 0x0001000200100020
 588        .quad 0x0800080200000020, 0x0000000000000000
 589        .quad 0x0800000000000000, 0x0000080000000000
 590        .quad 0x0001080200100020, 0x0801000000100000
 591        .quad 0x0001000200100020, 0x0800000200000020
 592        .quad 0x0000000000000000, 0x0001080000100000
 593        .quad 0x0000080200000020, 0x0801080000100000
 594        .quad 0x0801000000100000, 0x0000080200000020
 595        .quad 0x0000000000000000, 0x0001080200100020
 596        .quad 0x0801000200100020, 0x0001000000100000
 597        .quad 0x0800080200000020, 0x0801000000100000
 598        .quad 0x0801080000100000, 0x0000080000000000
 599        .quad 0x0801000000100000, 0x0800080000000000
 600        .quad 0x0000000200000020, 0x0801080200100020
 601        .quad 0x0001080200100020, 0x0000000200000020
 602        .quad 0x0000080000000000, 0x0800000000000000
 603        .quad 0x0000080200000020, 0x0801080000100000
 604        .quad 0x0001000000100000, 0x0800000200000020
 605        .quad 0x0001000200100020, 0x0800080200000020
 606        .quad 0x0800000200000020, 0x0001000200100020
 607        .quad 0x0001080000100000, 0x0000000000000000
 608        .quad 0x0800080000000000, 0x0000080200000020
 609        .quad 0x0800000000000000, 0x0801000200100020
 610        .quad 0x0801080200100020, 0x0001080000100000
 611.L_s3:
 612        .quad 0x0000002000000208, 0x0000202008020200
 613        .quad 0x0000000000000000, 0x0000200008020008
 614        .quad 0x0000002008000200, 0x0000000000000000
 615        .quad 0x0000202000020208, 0x0000002008000200
 616        .quad 0x0000200000020008, 0x0000000008000008
 617        .quad 0x0000000008000008, 0x0000200000020000
 618        .quad 0x0000202008020208, 0x0000200000020008
 619        .quad 0x0000200008020000, 0x0000002000000208
 620        .quad 0x0000000008000000, 0x0000000000000008
 621        .quad 0x0000202008020200, 0x0000002000000200
 622        .quad 0x0000202000020200, 0x0000200008020000
 623        .quad 0x0000200008020008, 0x0000202000020208
 624        .quad 0x0000002008000208, 0x0000202000020200
 625        .quad 0x0000200000020000, 0x0000002008000208
 626        .quad 0x0000000000000008, 0x0000202008020208
 627        .quad 0x0000002000000200, 0x0000000008000000
 628        .quad 0x0000202008020200, 0x0000000008000000
 629        .quad 0x0000200000020008, 0x0000002000000208
 630        .quad 0x0000200000020000, 0x0000202008020200
 631        .quad 0x0000002008000200, 0x0000000000000000
 632        .quad 0x0000002000000200, 0x0000200000020008
 633        .quad 0x0000202008020208, 0x0000002008000200
 634        .quad 0x0000000008000008, 0x0000002000000200
 635        .quad 0x0000000000000000, 0x0000200008020008
 636        .quad 0x0000002008000208, 0x0000200000020000
 637        .quad 0x0000000008000000, 0x0000202008020208
 638        .quad 0x0000000000000008, 0x0000202000020208
 639        .quad 0x0000202000020200, 0x0000000008000008
 640        .quad 0x0000200008020000, 0x0000002008000208
 641        .quad 0x0000002000000208, 0x0000200008020000
 642        .quad 0x0000202000020208, 0x0000000000000008
 643        .quad 0x0000200008020008, 0x0000202000020200
 644.L_s4:
 645        .quad 0x1008020000002001, 0x1000020800002001
 646        .quad 0x1000020800002001, 0x0000000800000000
 647        .quad 0x0008020800002000, 0x1008000800000001
 648        .quad 0x1008000000000001, 0x1000020000002001
 649        .quad 0x0000000000000000, 0x0008020000002000
 650        .quad 0x0008020000002000, 0x1008020800002001
 651        .quad 0x1000000800000001, 0x0000000000000000
 652        .quad 0x0008000800000000, 0x1008000000000001
 653        .quad 0x1000000000000001, 0x0000020000002000
 654        .quad 0x0008000000000000, 0x1008020000002001
 655        .quad 0x0000000800000000, 0x0008000000000000
 656        .quad 0x1000020000002001, 0x0000020800002000
 657        .quad 0x1008000800000001, 0x1000000000000001
 658        .quad 0x0000020800002000, 0x0008000800000000
 659        .quad 0x0000020000002000, 0x0008020800002000
 660        .quad 0x1008020800002001, 0x1000000800000001
 661        .quad 0x0008000800000000, 0x1008000000000001
 662        .quad 0x0008020000002000, 0x1008020800002001
 663        .quad 0x1000000800000001, 0x0000000000000000
 664        .quad 0x0000000000000000, 0x0008020000002000
 665        .quad 0x0000020800002000, 0x0008000800000000
 666        .quad 0x1008000800000001, 0x1000000000000001
 667        .quad 0x1008020000002001, 0x1000020800002001
 668        .quad 0x1000020800002001, 0x0000000800000000
 669        .quad 0x1008020800002001, 0x1000000800000001
 670        .quad 0x1000000000000001, 0x0000020000002000
 671        .quad 0x1008000000000001, 0x1000020000002001
 672        .quad 0x0008020800002000, 0x1008000800000001
 673        .quad 0x1000020000002001, 0x0000020800002000
 674        .quad 0x0008000000000000, 0x1008020000002001
 675        .quad 0x0000000800000000, 0x0008000000000000
 676        .quad 0x0000020000002000, 0x0008020800002000
 677.L_s5:
 678        .quad 0x0000001000000100, 0x0020001002080100
 679        .quad 0x0020000002080000, 0x0420001002000100
 680        .quad 0x0000000000080000, 0x0000001000000100
 681        .quad 0x0400000000000000, 0x0020000002080000
 682        .quad 0x0400001000080100, 0x0000000000080000
 683        .quad 0x0020001002000100, 0x0400001000080100
 684        .quad 0x0420001002000100, 0x0420000002080000
 685        .quad 0x0000001000080100, 0x0400000000000000
 686        .quad 0x0020000002000000, 0x0400000000080000
 687        .quad 0x0400000000080000, 0x0000000000000000
 688        .quad 0x0400001000000100, 0x0420001002080100
 689        .quad 0x0420001002080100, 0x0020001002000100
 690        .quad 0x0420000002080000, 0x0400001000000100
 691        .quad 0x0000000000000000, 0x0420000002000000
 692        .quad 0x0020001002080100, 0x0020000002000000
 693        .quad 0x0420000002000000, 0x0000001000080100
 694        .quad 0x0000000000080000, 0x0420001002000100
 695        .quad 0x0000001000000100, 0x0020000002000000
 696        .quad 0x0400000000000000, 0x0020000002080000
 697        .quad 0x0420001002000100, 0x0400001000080100
 698        .quad 0x0020001002000100, 0x0400000000000000
 699        .quad 0x0420000002080000, 0x0020001002080100
 700        .quad 0x0400001000080100, 0x0000001000000100
 701        .quad 0x0020000002000000, 0x0420000002080000
 702        .quad 0x0420001002080100, 0x0000001000080100
 703        .quad 0x0420000002000000, 0x0420001002080100
 704        .quad 0x0020000002080000, 0x0000000000000000
 705        .quad 0x0400000000080000, 0x0420000002000000
 706        .quad 0x0000001000080100, 0x0020001002000100
 707        .quad 0x0400001000000100, 0x0000000000080000
 708        .quad 0x0000000000000000, 0x0400000000080000
 709        .quad 0x0020001002080100, 0x0400001000000100
 710.L_s6:
 711        .quad 0x0200000120000010, 0x0204000020000000
 712        .quad 0x0000040000000000, 0x0204040120000010
 713        .quad 0x0204000020000000, 0x0000000100000010
 714        .quad 0x0204040120000010, 0x0004000000000000
 715        .quad 0x0200040020000000, 0x0004040100000010
 716        .quad 0x0004000000000000, 0x0200000120000010
 717        .quad 0x0004000100000010, 0x0200040020000000
 718        .quad 0x0200000020000000, 0x0000040100000010
 719        .quad 0x0000000000000000, 0x0004000100000010
 720        .quad 0x0200040120000010, 0x0000040000000000
 721        .quad 0x0004040000000000, 0x0200040120000010
 722        .quad 0x0000000100000010, 0x0204000120000010
 723        .quad 0x0204000120000010, 0x0000000000000000
 724        .quad 0x0004040100000010, 0x0204040020000000
 725        .quad 0x0000040100000010, 0x0004040000000000
 726        .quad 0x0204040020000000, 0x0200000020000000
 727        .quad 0x0200040020000000, 0x0000000100000010
 728        .quad 0x0204000120000010, 0x0004040000000000
 729        .quad 0x0204040120000010, 0x0004000000000000
 730        .quad 0x0000040100000010, 0x0200000120000010
 731        .quad 0x0004000000000000, 0x0200040020000000
 732        .quad 0x0200000020000000, 0x0000040100000010
 733        .quad 0x0200000120000010, 0x0204040120000010
 734        .quad 0x0004040000000000, 0x0204000020000000
 735        .quad 0x0004040100000010, 0x0204040020000000
 736        .quad 0x0000000000000000, 0x0204000120000010
 737        .quad 0x0000000100000010, 0x0000040000000000
 738        .quad 0x0204000020000000, 0x0004040100000010
 739        .quad 0x0000040000000000, 0x0004000100000010
 740        .quad 0x0200040120000010, 0x0000000000000000
 741        .quad 0x0204040020000000, 0x0200000020000000
 742        .quad 0x0004000100000010, 0x0200040120000010
 743.L_s7:
 744        .quad 0x0002000000200000, 0x2002000004200002
 745        .quad 0x2000000004000802, 0x0000000000000000
 746        .quad 0x0000000000000800, 0x2000000004000802
 747        .quad 0x2002000000200802, 0x0002000004200800
 748        .quad 0x2002000004200802, 0x0002000000200000
 749        .quad 0x0000000000000000, 0x2000000004000002
 750        .quad 0x2000000000000002, 0x0000000004000000
 751        .quad 0x2002000004200002, 0x2000000000000802
 752        .quad 0x0000000004000800, 0x2002000000200802
 753        .quad 0x2002000000200002, 0x0000000004000800
 754        .quad 0x2000000004000002, 0x0002000004200000
 755        .quad 0x0002000004200800, 0x2002000000200002
 756        .quad 0x0002000004200000, 0x0000000000000800
 757        .quad 0x2000000000000802, 0x2002000004200802
 758        .quad 0x0002000000200800, 0x2000000000000002
 759        .quad 0x0000000004000000, 0x0002000000200800
 760        .quad 0x0000000004000000, 0x0002000000200800
 761        .quad 0x0002000000200000, 0x2000000004000802
 762        .quad 0x2000000004000802, 0x2002000004200002
 763        .quad 0x2002000004200002, 0x2000000000000002
 764        .quad 0x2002000000200002, 0x0000000004000000
 765        .quad 0x0000000004000800, 0x0002000000200000
 766        .quad 0x0002000004200800, 0x2000000000000802
 767        .quad 0x2002000000200802, 0x0002000004200800
 768        .quad 0x2000000000000802, 0x2000000004000002
 769        .quad 0x2002000004200802, 0x0002000004200000
 770        .quad 0x0002000000200800, 0x0000000000000000
 771        .quad 0x2000000000000002, 0x2002000004200802
 772        .quad 0x0000000000000000, 0x2002000000200802
 773        .quad 0x0002000004200000, 0x0000000000000800
 774        .quad 0x2000000004000002, 0x0000000004000800
 775        .quad 0x0000000000000800, 0x2002000000200002
 776.L_s8:
 777        .quad 0x0100010410001000, 0x0000010000001000
 778        .quad 0x0000000000040000, 0x0100010410041000
 779        .quad 0x0100000010000000, 0x0100010410001000
 780        .quad 0x0000000400000000, 0x0100000010000000
 781        .quad 0x0000000400040000, 0x0100000010040000
 782        .quad 0x0100010410041000, 0x0000010000041000
 783        .quad 0x0100010010041000, 0x0000010400041000
 784        .quad 0x0000010000001000, 0x0000000400000000
 785        .quad 0x0100000010040000, 0x0100000410000000
 786        .quad 0x0100010010001000, 0x0000010400001000
 787        .quad 0x0000010000041000, 0x0000000400040000
 788        .quad 0x0100000410040000, 0x0100010010041000
 789        .quad 0x0000010400001000, 0x0000000000000000
 790        .quad 0x0000000000000000, 0x0100000410040000
 791        .quad 0x0100000410000000, 0x0100010010001000
 792        .quad 0x0000010400041000, 0x0000000000040000
 793        .quad 0x0000010400041000, 0x0000000000040000
 794        .quad 0x0100010010041000, 0x0000010000001000
 795        .quad 0x0000000400000000, 0x0100000410040000
 796        .quad 0x0000010000001000, 0x0000010400041000
 797        .quad 0x0100010010001000, 0x0000000400000000
 798        .quad 0x0100000410000000, 0x0100000010040000
 799        .quad 0x0100000410040000, 0x0100000010000000
 800        .quad 0x0000000000040000, 0x0100010410001000
 801        .quad 0x0000000000000000, 0x0100010410041000
 802        .quad 0x0000000400040000, 0x0100000410000000
 803        .quad 0x0100000010040000, 0x0100010010001000
 804        .quad 0x0100010410001000, 0x0000000000000000
 805        .quad 0x0100010410041000, 0x0000010000041000
 806        .quad 0x0000010000041000, 0x0000010400001000
 807        .quad 0x0000010400001000, 0x0000000400040000
 808        .quad 0x0100000010000000, 0x0100010010041000
 809