linux/arch/x86/crypto/des3_ede-asm_64.S
<<
>>
Prefs
   1/* SPDX-License-Identifier: GPL-2.0-or-later */
   2/*
   3 * des3_ede-asm_64.S  -  x86-64 assembly implementation of 3DES cipher
   4 *
   5 * Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
   6 */
   7
   8#include <linux/linkage.h>
   9
  10.file "des3_ede-asm_64.S"
  11.text
  12
  13#define s1 .L_s1
  14#define s2 ((s1) + (64*8))
  15#define s3 ((s2) + (64*8))
  16#define s4 ((s3) + (64*8))
  17#define s5 ((s4) + (64*8))
  18#define s6 ((s5) + (64*8))
  19#define s7 ((s6) + (64*8))
  20#define s8 ((s7) + (64*8))
  21
  22/* register macros */
  23#define CTX %rdi
  24
  25#define RL0 %r8
  26#define RL1 %r9
  27#define RL2 %r10
  28
  29#define RL0d %r8d
  30#define RL1d %r9d
  31#define RL2d %r10d
  32
  33#define RR0 %r11
  34#define RR1 %r12
  35#define RR2 %r13
  36
  37#define RR0d %r11d
  38#define RR1d %r12d
  39#define RR2d %r13d
  40
  41#define RW0 %rax
  42#define RW1 %rbx
  43#define RW2 %rcx
  44
  45#define RW0d %eax
  46#define RW1d %ebx
  47#define RW2d %ecx
  48
  49#define RW0bl %al
  50#define RW1bl %bl
  51#define RW2bl %cl
  52
  53#define RW0bh %ah
  54#define RW1bh %bh
  55#define RW2bh %ch
  56
  57#define RT0 %r15
  58#define RT1 %rsi
  59#define RT2 %r14
  60#define RT3 %rdx
  61
  62#define RT0d %r15d
  63#define RT1d %esi
  64#define RT2d %r14d
  65#define RT3d %edx
  66
  67/***********************************************************************
  68 * 1-way 3DES
  69 ***********************************************************************/
  70#define do_permutation(a, b, offset, mask) \
  71        movl a, RT0d; \
  72        shrl $(offset), RT0d; \
  73        xorl b, RT0d; \
  74        andl $(mask), RT0d; \
  75        xorl RT0d, b; \
  76        shll $(offset), RT0d; \
  77        xorl RT0d, a;
  78
  79#define expand_to_64bits(val, mask) \
  80        movl val##d, RT0d; \
  81        rorl $4, RT0d; \
  82        shlq $32, RT0; \
  83        orq RT0, val; \
  84        andq mask, val;
  85
  86#define compress_to_64bits(val) \
  87        movq val, RT0; \
  88        shrq $32, RT0; \
  89        roll $4, RT0d; \
  90        orl RT0d, val##d;
  91
  92#define initial_permutation(left, right) \
  93        do_permutation(left##d, right##d,  4, 0x0f0f0f0f); \
  94        do_permutation(left##d, right##d, 16, 0x0000ffff); \
  95        do_permutation(right##d, left##d,  2, 0x33333333); \
  96        do_permutation(right##d, left##d,  8, 0x00ff00ff); \
  97        movabs $0x3f3f3f3f3f3f3f3f, RT3; \
  98        movl left##d, RW0d; \
  99        roll $1, right##d; \
 100        xorl right##d, RW0d; \
 101        andl $0xaaaaaaaa, RW0d; \
 102        xorl RW0d, left##d; \
 103        xorl RW0d, right##d; \
 104        roll $1, left##d; \
 105        expand_to_64bits(right, RT3); \
 106        expand_to_64bits(left, RT3);
 107
 108#define final_permutation(left, right) \
 109        compress_to_64bits(right); \
 110        compress_to_64bits(left); \
 111        movl right##d, RW0d; \
 112        rorl $1, left##d; \
 113        xorl left##d, RW0d; \
 114        andl $0xaaaaaaaa, RW0d; \
 115        xorl RW0d, right##d; \
 116        xorl RW0d, left##d; \
 117        rorl $1, right##d; \
 118        do_permutation(right##d, left##d,  8, 0x00ff00ff); \
 119        do_permutation(right##d, left##d,  2, 0x33333333); \
 120        do_permutation(left##d, right##d, 16, 0x0000ffff); \
 121        do_permutation(left##d, right##d,  4, 0x0f0f0f0f);
 122
 123#define round1(n, from, to, load_next_key) \
 124        xorq from, RW0; \
 125        \
 126        movzbl RW0bl, RT0d; \
 127        movzbl RW0bh, RT1d; \
 128        shrq $16, RW0; \
 129        movzbl RW0bl, RT2d; \
 130        movzbl RW0bh, RT3d; \
 131        shrq $16, RW0; \
 132        movq s8(, RT0, 8), RT0; \
 133        xorq s6(, RT1, 8), to; \
 134        movzbl RW0bl, RL1d; \
 135        movzbl RW0bh, RT1d; \
 136        shrl $16, RW0d; \
 137        xorq s4(, RT2, 8), RT0; \
 138        xorq s2(, RT3, 8), to; \
 139        movzbl RW0bl, RT2d; \
 140        movzbl RW0bh, RT3d; \
 141        xorq s7(, RL1, 8), RT0; \
 142        xorq s5(, RT1, 8), to; \
 143        xorq s3(, RT2, 8), RT0; \
 144        load_next_key(n, RW0); \
 145        xorq RT0, to; \
 146        xorq s1(, RT3, 8), to; \
 147
 148#define load_next_key(n, RWx) \
 149        movq (((n) + 1) * 8)(CTX), RWx;
 150
 151#define dummy2(a, b) /*_*/
 152
 153#define read_block(io, left, right) \
 154        movl    (io), left##d; \
 155        movl   4(io), right##d; \
 156        bswapl left##d; \
 157        bswapl right##d;
 158
 159#define write_block(io, left, right) \
 160        bswapl left##d; \
 161        bswapl right##d; \
 162        movl   left##d,   (io); \
 163        movl   right##d, 4(io);
 164
 165ENTRY(des3_ede_x86_64_crypt_blk)
 166        /* input:
 167         *      %rdi: round keys, CTX
 168         *      %rsi: dst
 169         *      %rdx: src
 170         */
 171        pushq %rbx;
 172        pushq %r12;
 173        pushq %r13;
 174        pushq %r14;
 175        pushq %r15;
 176
 177        pushq %rsi; /* dst */
 178
 179        read_block(%rdx, RL0, RR0);
 180        initial_permutation(RL0, RR0);
 181
 182        movq (CTX), RW0;
 183
 184        round1(0, RR0, RL0, load_next_key);
 185        round1(1, RL0, RR0, load_next_key);
 186        round1(2, RR0, RL0, load_next_key);
 187        round1(3, RL0, RR0, load_next_key);
 188        round1(4, RR0, RL0, load_next_key);
 189        round1(5, RL0, RR0, load_next_key);
 190        round1(6, RR0, RL0, load_next_key);
 191        round1(7, RL0, RR0, load_next_key);
 192        round1(8, RR0, RL0, load_next_key);
 193        round1(9, RL0, RR0, load_next_key);
 194        round1(10, RR0, RL0, load_next_key);
 195        round1(11, RL0, RR0, load_next_key);
 196        round1(12, RR0, RL0, load_next_key);
 197        round1(13, RL0, RR0, load_next_key);
 198        round1(14, RR0, RL0, load_next_key);
 199        round1(15, RL0, RR0, load_next_key);
 200
 201        round1(16+0, RL0, RR0, load_next_key);
 202        round1(16+1, RR0, RL0, load_next_key);
 203        round1(16+2, RL0, RR0, load_next_key);
 204        round1(16+3, RR0, RL0, load_next_key);
 205        round1(16+4, RL0, RR0, load_next_key);
 206        round1(16+5, RR0, RL0, load_next_key);
 207        round1(16+6, RL0, RR0, load_next_key);
 208        round1(16+7, RR0, RL0, load_next_key);
 209        round1(16+8, RL0, RR0, load_next_key);
 210        round1(16+9, RR0, RL0, load_next_key);
 211        round1(16+10, RL0, RR0, load_next_key);
 212        round1(16+11, RR0, RL0, load_next_key);
 213        round1(16+12, RL0, RR0, load_next_key);
 214        round1(16+13, RR0, RL0, load_next_key);
 215        round1(16+14, RL0, RR0, load_next_key);
 216        round1(16+15, RR0, RL0, load_next_key);
 217
 218        round1(32+0, RR0, RL0, load_next_key);
 219        round1(32+1, RL0, RR0, load_next_key);
 220        round1(32+2, RR0, RL0, load_next_key);
 221        round1(32+3, RL0, RR0, load_next_key);
 222        round1(32+4, RR0, RL0, load_next_key);
 223        round1(32+5, RL0, RR0, load_next_key);
 224        round1(32+6, RR0, RL0, load_next_key);
 225        round1(32+7, RL0, RR0, load_next_key);
 226        round1(32+8, RR0, RL0, load_next_key);
 227        round1(32+9, RL0, RR0, load_next_key);
 228        round1(32+10, RR0, RL0, load_next_key);
 229        round1(32+11, RL0, RR0, load_next_key);
 230        round1(32+12, RR0, RL0, load_next_key);
 231        round1(32+13, RL0, RR0, load_next_key);
 232        round1(32+14, RR0, RL0, load_next_key);
 233        round1(32+15, RL0, RR0, dummy2);
 234
 235        final_permutation(RR0, RL0);
 236
 237        popq %rsi /* dst */
 238        write_block(%rsi, RR0, RL0);
 239
 240        popq %r15;
 241        popq %r14;
 242        popq %r13;
 243        popq %r12;
 244        popq %rbx;
 245
 246        ret;
 247ENDPROC(des3_ede_x86_64_crypt_blk)
 248
 249/***********************************************************************
 250 * 3-way 3DES
 251 ***********************************************************************/
 252#define expand_to_64bits(val, mask) \
 253        movl val##d, RT0d; \
 254        rorl $4, RT0d; \
 255        shlq $32, RT0; \
 256        orq RT0, val; \
 257        andq mask, val;
 258
 259#define compress_to_64bits(val) \
 260        movq val, RT0; \
 261        shrq $32, RT0; \
 262        roll $4, RT0d; \
 263        orl RT0d, val##d;
 264
 265#define initial_permutation3(left, right) \
 266        do_permutation(left##0d, right##0d,  4, 0x0f0f0f0f); \
 267        do_permutation(left##0d, right##0d, 16, 0x0000ffff); \
 268          do_permutation(left##1d, right##1d,  4, 0x0f0f0f0f); \
 269          do_permutation(left##1d, right##1d, 16, 0x0000ffff); \
 270            do_permutation(left##2d, right##2d,  4, 0x0f0f0f0f); \
 271            do_permutation(left##2d, right##2d, 16, 0x0000ffff); \
 272            \
 273        do_permutation(right##0d, left##0d,  2, 0x33333333); \
 274        do_permutation(right##0d, left##0d,  8, 0x00ff00ff); \
 275          do_permutation(right##1d, left##1d,  2, 0x33333333); \
 276          do_permutation(right##1d, left##1d,  8, 0x00ff00ff); \
 277            do_permutation(right##2d, left##2d,  2, 0x33333333); \
 278            do_permutation(right##2d, left##2d,  8, 0x00ff00ff); \
 279            \
 280        movabs $0x3f3f3f3f3f3f3f3f, RT3; \
 281            \
 282        movl left##0d, RW0d; \
 283        roll $1, right##0d; \
 284        xorl right##0d, RW0d; \
 285        andl $0xaaaaaaaa, RW0d; \
 286        xorl RW0d, left##0d; \
 287        xorl RW0d, right##0d; \
 288        roll $1, left##0d; \
 289        expand_to_64bits(right##0, RT3); \
 290        expand_to_64bits(left##0, RT3); \
 291          movl left##1d, RW1d; \
 292          roll $1, right##1d; \
 293          xorl right##1d, RW1d; \
 294          andl $0xaaaaaaaa, RW1d; \
 295          xorl RW1d, left##1d; \
 296          xorl RW1d, right##1d; \
 297          roll $1, left##1d; \
 298          expand_to_64bits(right##1, RT3); \
 299          expand_to_64bits(left##1, RT3); \
 300            movl left##2d, RW2d; \
 301            roll $1, right##2d; \
 302            xorl right##2d, RW2d; \
 303            andl $0xaaaaaaaa, RW2d; \
 304            xorl RW2d, left##2d; \
 305            xorl RW2d, right##2d; \
 306            roll $1, left##2d; \
 307            expand_to_64bits(right##2, RT3); \
 308            expand_to_64bits(left##2, RT3);
 309
 310#define final_permutation3(left, right) \
 311        compress_to_64bits(right##0); \
 312        compress_to_64bits(left##0); \
 313        movl right##0d, RW0d; \
 314        rorl $1, left##0d; \
 315        xorl left##0d, RW0d; \
 316        andl $0xaaaaaaaa, RW0d; \
 317        xorl RW0d, right##0d; \
 318        xorl RW0d, left##0d; \
 319        rorl $1, right##0d; \
 320          compress_to_64bits(right##1); \
 321          compress_to_64bits(left##1); \
 322          movl right##1d, RW1d; \
 323          rorl $1, left##1d; \
 324          xorl left##1d, RW1d; \
 325          andl $0xaaaaaaaa, RW1d; \
 326          xorl RW1d, right##1d; \
 327          xorl RW1d, left##1d; \
 328          rorl $1, right##1d; \
 329            compress_to_64bits(right##2); \
 330            compress_to_64bits(left##2); \
 331            movl right##2d, RW2d; \
 332            rorl $1, left##2d; \
 333            xorl left##2d, RW2d; \
 334            andl $0xaaaaaaaa, RW2d; \
 335            xorl RW2d, right##2d; \
 336            xorl RW2d, left##2d; \
 337            rorl $1, right##2d; \
 338            \
 339        do_permutation(right##0d, left##0d,  8, 0x00ff00ff); \
 340        do_permutation(right##0d, left##0d,  2, 0x33333333); \
 341          do_permutation(right##1d, left##1d,  8, 0x00ff00ff); \
 342          do_permutation(right##1d, left##1d,  2, 0x33333333); \
 343            do_permutation(right##2d, left##2d,  8, 0x00ff00ff); \
 344            do_permutation(right##2d, left##2d,  2, 0x33333333); \
 345            \
 346        do_permutation(left##0d, right##0d, 16, 0x0000ffff); \
 347        do_permutation(left##0d, right##0d,  4, 0x0f0f0f0f); \
 348          do_permutation(left##1d, right##1d, 16, 0x0000ffff); \
 349          do_permutation(left##1d, right##1d,  4, 0x0f0f0f0f); \
 350            do_permutation(left##2d, right##2d, 16, 0x0000ffff); \
 351            do_permutation(left##2d, right##2d,  4, 0x0f0f0f0f);
 352
 353#define round3(n, from, to, load_next_key, do_movq) \
 354        xorq from##0, RW0; \
 355        movzbl RW0bl, RT3d; \
 356        movzbl RW0bh, RT1d; \
 357        shrq $16, RW0; \
 358        xorq s8(, RT3, 8), to##0; \
 359        xorq s6(, RT1, 8), to##0; \
 360        movzbl RW0bl, RT3d; \
 361        movzbl RW0bh, RT1d; \
 362        shrq $16, RW0; \
 363        xorq s4(, RT3, 8), to##0; \
 364        xorq s2(, RT1, 8), to##0; \
 365        movzbl RW0bl, RT3d; \
 366        movzbl RW0bh, RT1d; \
 367        shrl $16, RW0d; \
 368        xorq s7(, RT3, 8), to##0; \
 369        xorq s5(, RT1, 8), to##0; \
 370        movzbl RW0bl, RT3d; \
 371        movzbl RW0bh, RT1d; \
 372        load_next_key(n, RW0); \
 373        xorq s3(, RT3, 8), to##0; \
 374        xorq s1(, RT1, 8), to##0; \
 375                xorq from##1, RW1; \
 376                movzbl RW1bl, RT3d; \
 377                movzbl RW1bh, RT1d; \
 378                shrq $16, RW1; \
 379                xorq s8(, RT3, 8), to##1; \
 380                xorq s6(, RT1, 8), to##1; \
 381                movzbl RW1bl, RT3d; \
 382                movzbl RW1bh, RT1d; \
 383                shrq $16, RW1; \
 384                xorq s4(, RT3, 8), to##1; \
 385                xorq s2(, RT1, 8), to##1; \
 386                movzbl RW1bl, RT3d; \
 387                movzbl RW1bh, RT1d; \
 388                shrl $16, RW1d; \
 389                xorq s7(, RT3, 8), to##1; \
 390                xorq s5(, RT1, 8), to##1; \
 391                movzbl RW1bl, RT3d; \
 392                movzbl RW1bh, RT1d; \
 393                do_movq(RW0, RW1); \
 394                xorq s3(, RT3, 8), to##1; \
 395                xorq s1(, RT1, 8), to##1; \
 396                        xorq from##2, RW2; \
 397                        movzbl RW2bl, RT3d; \
 398                        movzbl RW2bh, RT1d; \
 399                        shrq $16, RW2; \
 400                        xorq s8(, RT3, 8), to##2; \
 401                        xorq s6(, RT1, 8), to##2; \
 402                        movzbl RW2bl, RT3d; \
 403                        movzbl RW2bh, RT1d; \
 404                        shrq $16, RW2; \
 405                        xorq s4(, RT3, 8), to##2; \
 406                        xorq s2(, RT1, 8), to##2; \
 407                        movzbl RW2bl, RT3d; \
 408                        movzbl RW2bh, RT1d; \
 409                        shrl $16, RW2d; \
 410                        xorq s7(, RT3, 8), to##2; \
 411                        xorq s5(, RT1, 8), to##2; \
 412                        movzbl RW2bl, RT3d; \
 413                        movzbl RW2bh, RT1d; \
 414                        do_movq(RW0, RW2); \
 415                        xorq s3(, RT3, 8), to##2; \
 416                        xorq s1(, RT1, 8), to##2;
 417
 418#define __movq(src, dst) \
 419        movq src, dst;
 420
 421ENTRY(des3_ede_x86_64_crypt_blk_3way)
 422        /* input:
 423         *      %rdi: ctx, round keys
 424         *      %rsi: dst (3 blocks)
 425         *      %rdx: src (3 blocks)
 426         */
 427
 428        pushq %rbx;
 429        pushq %r12;
 430        pushq %r13;
 431        pushq %r14;
 432        pushq %r15;
 433
 434        pushq %rsi /* dst */
 435
 436        /* load input */
 437        movl 0 * 4(%rdx), RL0d;
 438        movl 1 * 4(%rdx), RR0d;
 439        movl 2 * 4(%rdx), RL1d;
 440        movl 3 * 4(%rdx), RR1d;
 441        movl 4 * 4(%rdx), RL2d;
 442        movl 5 * 4(%rdx), RR2d;
 443
 444        bswapl RL0d;
 445        bswapl RR0d;
 446        bswapl RL1d;
 447        bswapl RR1d;
 448        bswapl RL2d;
 449        bswapl RR2d;
 450
 451        initial_permutation3(RL, RR);
 452
 453        movq 0(CTX), RW0;
 454        movq RW0, RW1;
 455        movq RW0, RW2;
 456
 457        round3(0, RR, RL, load_next_key, __movq);
 458        round3(1, RL, RR, load_next_key, __movq);
 459        round3(2, RR, RL, load_next_key, __movq);
 460        round3(3, RL, RR, load_next_key, __movq);
 461        round3(4, RR, RL, load_next_key, __movq);
 462        round3(5, RL, RR, load_next_key, __movq);
 463        round3(6, RR, RL, load_next_key, __movq);
 464        round3(7, RL, RR, load_next_key, __movq);
 465        round3(8, RR, RL, load_next_key, __movq);
 466        round3(9, RL, RR, load_next_key, __movq);
 467        round3(10, RR, RL, load_next_key, __movq);
 468        round3(11, RL, RR, load_next_key, __movq);
 469        round3(12, RR, RL, load_next_key, __movq);
 470        round3(13, RL, RR, load_next_key, __movq);
 471        round3(14, RR, RL, load_next_key, __movq);
 472        round3(15, RL, RR, load_next_key, __movq);
 473
 474        round3(16+0, RL, RR, load_next_key, __movq);
 475        round3(16+1, RR, RL, load_next_key, __movq);
 476        round3(16+2, RL, RR, load_next_key, __movq);
 477        round3(16+3, RR, RL, load_next_key, __movq);
 478        round3(16+4, RL, RR, load_next_key, __movq);
 479        round3(16+5, RR, RL, load_next_key, __movq);
 480        round3(16+6, RL, RR, load_next_key, __movq);
 481        round3(16+7, RR, RL, load_next_key, __movq);
 482        round3(16+8, RL, RR, load_next_key, __movq);
 483        round3(16+9, RR, RL, load_next_key, __movq);
 484        round3(16+10, RL, RR, load_next_key, __movq);
 485        round3(16+11, RR, RL, load_next_key, __movq);
 486        round3(16+12, RL, RR, load_next_key, __movq);
 487        round3(16+13, RR, RL, load_next_key, __movq);
 488        round3(16+14, RL, RR, load_next_key, __movq);
 489        round3(16+15, RR, RL, load_next_key, __movq);
 490
 491        round3(32+0, RR, RL, load_next_key, __movq);
 492        round3(32+1, RL, RR, load_next_key, __movq);
 493        round3(32+2, RR, RL, load_next_key, __movq);
 494        round3(32+3, RL, RR, load_next_key, __movq);
 495        round3(32+4, RR, RL, load_next_key, __movq);
 496        round3(32+5, RL, RR, load_next_key, __movq);
 497        round3(32+6, RR, RL, load_next_key, __movq);
 498        round3(32+7, RL, RR, load_next_key, __movq);
 499        round3(32+8, RR, RL, load_next_key, __movq);
 500        round3(32+9, RL, RR, load_next_key, __movq);
 501        round3(32+10, RR, RL, load_next_key, __movq);
 502        round3(32+11, RL, RR, load_next_key, __movq);
 503        round3(32+12, RR, RL, load_next_key, __movq);
 504        round3(32+13, RL, RR, load_next_key, __movq);
 505        round3(32+14, RR, RL, load_next_key, __movq);
 506        round3(32+15, RL, RR, dummy2, dummy2);
 507
 508        final_permutation3(RR, RL);
 509
 510        bswapl RR0d;
 511        bswapl RL0d;
 512        bswapl RR1d;
 513        bswapl RL1d;
 514        bswapl RR2d;
 515        bswapl RL2d;
 516
 517        popq %rsi /* dst */
 518        movl RR0d, 0 * 4(%rsi);
 519        movl RL0d, 1 * 4(%rsi);
 520        movl RR1d, 2 * 4(%rsi);
 521        movl RL1d, 3 * 4(%rsi);
 522        movl RR2d, 4 * 4(%rsi);
 523        movl RL2d, 5 * 4(%rsi);
 524
 525        popq %r15;
 526        popq %r14;
 527        popq %r13;
 528        popq %r12;
 529        popq %rbx;
 530
 531        ret;
 532ENDPROC(des3_ede_x86_64_crypt_blk_3way)
 533
 534.section        .rodata, "a", @progbits
 535.align 16
 536.L_s1:
 537        .quad 0x0010100001010400, 0x0000000000000000
 538        .quad 0x0000100000010000, 0x0010100001010404
 539        .quad 0x0010100001010004, 0x0000100000010404
 540        .quad 0x0000000000000004, 0x0000100000010000
 541        .quad 0x0000000000000400, 0x0010100001010400
 542        .quad 0x0010100001010404, 0x0000000000000400
 543        .quad 0x0010000001000404, 0x0010100001010004
 544        .quad 0x0010000001000000, 0x0000000000000004
 545        .quad 0x0000000000000404, 0x0010000001000400
 546        .quad 0x0010000001000400, 0x0000100000010400
 547        .quad 0x0000100000010400, 0x0010100001010000
 548        .quad 0x0010100001010000, 0x0010000001000404
 549        .quad 0x0000100000010004, 0x0010000001000004
 550        .quad 0x0010000001000004, 0x0000100000010004
 551        .quad 0x0000000000000000, 0x0000000000000404
 552        .quad 0x0000100000010404, 0x0010000001000000
 553        .quad 0x0000100000010000, 0x0010100001010404
 554        .quad 0x0000000000000004, 0x0010100001010000
 555        .quad 0x0010100001010400, 0x0010000001000000
 556        .quad 0x0010000001000000, 0x0000000000000400
 557        .quad 0x0010100001010004, 0x0000100000010000
 558        .quad 0x0000100000010400, 0x0010000001000004
 559        .quad 0x0000000000000400, 0x0000000000000004
 560        .quad 0x0010000001000404, 0x0000100000010404
 561        .quad 0x0010100001010404, 0x0000100000010004
 562        .quad 0x0010100001010000, 0x0010000001000404
 563        .quad 0x0010000001000004, 0x0000000000000404
 564        .quad 0x0000100000010404, 0x0010100001010400
 565        .quad 0x0000000000000404, 0x0010000001000400
 566        .quad 0x0010000001000400, 0x0000000000000000
 567        .quad 0x0000100000010004, 0x0000100000010400
 568        .quad 0x0000000000000000, 0x0010100001010004
 569.L_s2:
 570        .quad 0x0801080200100020, 0x0800080000000000
 571        .quad 0x0000080000000000, 0x0001080200100020
 572        .quad 0x0001000000100000, 0x0000000200000020
 573        .quad 0x0801000200100020, 0x0800080200000020
 574        .quad 0x0800000200000020, 0x0801080200100020
 575        .quad 0x0801080000100000, 0x0800000000000000
 576        .quad 0x0800080000000000, 0x0001000000100000
 577        .quad 0x0000000200000020, 0x0801000200100020
 578        .quad 0x0001080000100000, 0x0001000200100020
 579        .quad 0x0800080200000020, 0x0000000000000000
 580        .quad 0x0800000000000000, 0x0000080000000000
 581        .quad 0x0001080200100020, 0x0801000000100000
 582        .quad 0x0001000200100020, 0x0800000200000020
 583        .quad 0x0000000000000000, 0x0001080000100000
 584        .quad 0x0000080200000020, 0x0801080000100000
 585        .quad 0x0801000000100000, 0x0000080200000020
 586        .quad 0x0000000000000000, 0x0001080200100020
 587        .quad 0x0801000200100020, 0x0001000000100000
 588        .quad 0x0800080200000020, 0x0801000000100000
 589        .quad 0x0801080000100000, 0x0000080000000000
 590        .quad 0x0801000000100000, 0x0800080000000000
 591        .quad 0x0000000200000020, 0x0801080200100020
 592        .quad 0x0001080200100020, 0x0000000200000020
 593        .quad 0x0000080000000000, 0x0800000000000000
 594        .quad 0x0000080200000020, 0x0801080000100000
 595        .quad 0x0001000000100000, 0x0800000200000020
 596        .quad 0x0001000200100020, 0x0800080200000020
 597        .quad 0x0800000200000020, 0x0001000200100020
 598        .quad 0x0001080000100000, 0x0000000000000000
 599        .quad 0x0800080000000000, 0x0000080200000020
 600        .quad 0x0800000000000000, 0x0801000200100020
 601        .quad 0x0801080200100020, 0x0001080000100000
 602.L_s3:
 603        .quad 0x0000002000000208, 0x0000202008020200
 604        .quad 0x0000000000000000, 0x0000200008020008
 605        .quad 0x0000002008000200, 0x0000000000000000
 606        .quad 0x0000202000020208, 0x0000002008000200
 607        .quad 0x0000200000020008, 0x0000000008000008
 608        .quad 0x0000000008000008, 0x0000200000020000
 609        .quad 0x0000202008020208, 0x0000200000020008
 610        .quad 0x0000200008020000, 0x0000002000000208
 611        .quad 0x0000000008000000, 0x0000000000000008
 612        .quad 0x0000202008020200, 0x0000002000000200
 613        .quad 0x0000202000020200, 0x0000200008020000
 614        .quad 0x0000200008020008, 0x0000202000020208
 615        .quad 0x0000002008000208, 0x0000202000020200
 616        .quad 0x0000200000020000, 0x0000002008000208
 617        .quad 0x0000000000000008, 0x0000202008020208
 618        .quad 0x0000002000000200, 0x0000000008000000
 619        .quad 0x0000202008020200, 0x0000000008000000
 620        .quad 0x0000200000020008, 0x0000002000000208
 621        .quad 0x0000200000020000, 0x0000202008020200
 622        .quad 0x0000002008000200, 0x0000000000000000
 623        .quad 0x0000002000000200, 0x0000200000020008
 624        .quad 0x0000202008020208, 0x0000002008000200
 625        .quad 0x0000000008000008, 0x0000002000000200
 626        .quad 0x0000000000000000, 0x0000200008020008
 627        .quad 0x0000002008000208, 0x0000200000020000
 628        .quad 0x0000000008000000, 0x0000202008020208
 629        .quad 0x0000000000000008, 0x0000202000020208
 630        .quad 0x0000202000020200, 0x0000000008000008
 631        .quad 0x0000200008020000, 0x0000002008000208
 632        .quad 0x0000002000000208, 0x0000200008020000
 633        .quad 0x0000202000020208, 0x0000000000000008
 634        .quad 0x0000200008020008, 0x0000202000020200
 635.L_s4:
 636        .quad 0x1008020000002001, 0x1000020800002001
 637        .quad 0x1000020800002001, 0x0000000800000000
 638        .quad 0x0008020800002000, 0x1008000800000001
 639        .quad 0x1008000000000001, 0x1000020000002001
 640        .quad 0x0000000000000000, 0x0008020000002000
 641        .quad 0x0008020000002000, 0x1008020800002001
 642        .quad 0x1000000800000001, 0x0000000000000000
 643        .quad 0x0008000800000000, 0x1008000000000001
 644        .quad 0x1000000000000001, 0x0000020000002000
 645        .quad 0x0008000000000000, 0x1008020000002001
 646        .quad 0x0000000800000000, 0x0008000000000000
 647        .quad 0x1000020000002001, 0x0000020800002000
 648        .quad 0x1008000800000001, 0x1000000000000001
 649        .quad 0x0000020800002000, 0x0008000800000000
 650        .quad 0x0000020000002000, 0x0008020800002000
 651        .quad 0x1008020800002001, 0x1000000800000001
 652        .quad 0x0008000800000000, 0x1008000000000001
 653        .quad 0x0008020000002000, 0x1008020800002001
 654        .quad 0x1000000800000001, 0x0000000000000000
 655        .quad 0x0000000000000000, 0x0008020000002000
 656        .quad 0x0000020800002000, 0x0008000800000000
 657        .quad 0x1008000800000001, 0x1000000000000001
 658        .quad 0x1008020000002001, 0x1000020800002001
 659        .quad 0x1000020800002001, 0x0000000800000000
 660        .quad 0x1008020800002001, 0x1000000800000001
 661        .quad 0x1000000000000001, 0x0000020000002000
 662        .quad 0x1008000000000001, 0x1000020000002001
 663        .quad 0x0008020800002000, 0x1008000800000001
 664        .quad 0x1000020000002001, 0x0000020800002000
 665        .quad 0x0008000000000000, 0x1008020000002001
 666        .quad 0x0000000800000000, 0x0008000000000000
 667        .quad 0x0000020000002000, 0x0008020800002000
 668.L_s5:
 669        .quad 0x0000001000000100, 0x0020001002080100
 670        .quad 0x0020000002080000, 0x0420001002000100
 671        .quad 0x0000000000080000, 0x0000001000000100
 672        .quad 0x0400000000000000, 0x0020000002080000
 673        .quad 0x0400001000080100, 0x0000000000080000
 674        .quad 0x0020001002000100, 0x0400001000080100
 675        .quad 0x0420001002000100, 0x0420000002080000
 676        .quad 0x0000001000080100, 0x0400000000000000
 677        .quad 0x0020000002000000, 0x0400000000080000
 678        .quad 0x0400000000080000, 0x0000000000000000
 679        .quad 0x0400001000000100, 0x0420001002080100
 680        .quad 0x0420001002080100, 0x0020001002000100
 681        .quad 0x0420000002080000, 0x0400001000000100
 682        .quad 0x0000000000000000, 0x0420000002000000
 683        .quad 0x0020001002080100, 0x0020000002000000
 684        .quad 0x0420000002000000, 0x0000001000080100
 685        .quad 0x0000000000080000, 0x0420001002000100
 686        .quad 0x0000001000000100, 0x0020000002000000
 687        .quad 0x0400000000000000, 0x0020000002080000
 688        .quad 0x0420001002000100, 0x0400001000080100
 689        .quad 0x0020001002000100, 0x0400000000000000
 690        .quad 0x0420000002080000, 0x0020001002080100
 691        .quad 0x0400001000080100, 0x0000001000000100
 692        .quad 0x0020000002000000, 0x0420000002080000
 693        .quad 0x0420001002080100, 0x0000001000080100
 694        .quad 0x0420000002000000, 0x0420001002080100
 695        .quad 0x0020000002080000, 0x0000000000000000
 696        .quad 0x0400000000080000, 0x0420000002000000
 697        .quad 0x0000001000080100, 0x0020001002000100
 698        .quad 0x0400001000000100, 0x0000000000080000
 699        .quad 0x0000000000000000, 0x0400000000080000
 700        .quad 0x0020001002080100, 0x0400001000000100
 701.L_s6:
 702        .quad 0x0200000120000010, 0x0204000020000000
 703        .quad 0x0000040000000000, 0x0204040120000010
 704        .quad 0x0204000020000000, 0x0000000100000010
 705        .quad 0x0204040120000010, 0x0004000000000000
 706        .quad 0x0200040020000000, 0x0004040100000010
 707        .quad 0x0004000000000000, 0x0200000120000010
 708        .quad 0x0004000100000010, 0x0200040020000000
 709        .quad 0x0200000020000000, 0x0000040100000010
 710        .quad 0x0000000000000000, 0x0004000100000010
 711        .quad 0x0200040120000010, 0x0000040000000000
 712        .quad 0x0004040000000000, 0x0200040120000010
 713        .quad 0x0000000100000010, 0x0204000120000010
 714        .quad 0x0204000120000010, 0x0000000000000000
 715        .quad 0x0004040100000010, 0x0204040020000000
 716        .quad 0x0000040100000010, 0x0004040000000000
 717        .quad 0x0204040020000000, 0x0200000020000000
 718        .quad 0x0200040020000000, 0x0000000100000010
 719        .quad 0x0204000120000010, 0x0004040000000000
 720        .quad 0x0204040120000010, 0x0004000000000000
 721        .quad 0x0000040100000010, 0x0200000120000010
 722        .quad 0x0004000000000000, 0x0200040020000000
 723        .quad 0x0200000020000000, 0x0000040100000010
 724        .quad 0x0200000120000010, 0x0204040120000010
 725        .quad 0x0004040000000000, 0x0204000020000000
 726        .quad 0x0004040100000010, 0x0204040020000000
 727        .quad 0x0000000000000000, 0x0204000120000010
 728        .quad 0x0000000100000010, 0x0000040000000000
 729        .quad 0x0204000020000000, 0x0004040100000010
 730        .quad 0x0000040000000000, 0x0004000100000010
 731        .quad 0x0200040120000010, 0x0000000000000000
 732        .quad 0x0204040020000000, 0x0200000020000000
 733        .quad 0x0004000100000010, 0x0200040120000010
 734.L_s7:
 735        .quad 0x0002000000200000, 0x2002000004200002
 736        .quad 0x2000000004000802, 0x0000000000000000
 737        .quad 0x0000000000000800, 0x2000000004000802
 738        .quad 0x2002000000200802, 0x0002000004200800
 739        .quad 0x2002000004200802, 0x0002000000200000
 740        .quad 0x0000000000000000, 0x2000000004000002
 741        .quad 0x2000000000000002, 0x0000000004000000
 742        .quad 0x2002000004200002, 0x2000000000000802
 743        .quad 0x0000000004000800, 0x2002000000200802
 744        .quad 0x2002000000200002, 0x0000000004000800
 745        .quad 0x2000000004000002, 0x0002000004200000
 746        .quad 0x0002000004200800, 0x2002000000200002
 747        .quad 0x0002000004200000, 0x0000000000000800
 748        .quad 0x2000000000000802, 0x2002000004200802
 749        .quad 0x0002000000200800, 0x2000000000000002
 750        .quad 0x0000000004000000, 0x0002000000200800
 751        .quad 0x0000000004000000, 0x0002000000200800
 752        .quad 0x0002000000200000, 0x2000000004000802
 753        .quad 0x2000000004000802, 0x2002000004200002
 754        .quad 0x2002000004200002, 0x2000000000000002
 755        .quad 0x2002000000200002, 0x0000000004000000
 756        .quad 0x0000000004000800, 0x0002000000200000
 757        .quad 0x0002000004200800, 0x2000000000000802
 758        .quad 0x2002000000200802, 0x0002000004200800
 759        .quad 0x2000000000000802, 0x2000000004000002
 760        .quad 0x2002000004200802, 0x0002000004200000
 761        .quad 0x0002000000200800, 0x0000000000000000
 762        .quad 0x2000000000000002, 0x2002000004200802
 763        .quad 0x0000000000000000, 0x2002000000200802
 764        .quad 0x0002000004200000, 0x0000000000000800
 765        .quad 0x2000000004000002, 0x0000000004000800
 766        .quad 0x0000000000000800, 0x2002000000200002
 767.L_s8:
 768        .quad 0x0100010410001000, 0x0000010000001000
 769        .quad 0x0000000000040000, 0x0100010410041000
 770        .quad 0x0100000010000000, 0x0100010410001000
 771        .quad 0x0000000400000000, 0x0100000010000000
 772        .quad 0x0000000400040000, 0x0100000010040000
 773        .quad 0x0100010410041000, 0x0000010000041000
 774        .quad 0x0100010010041000, 0x0000010400041000
 775        .quad 0x0000010000001000, 0x0000000400000000
 776        .quad 0x0100000010040000, 0x0100000410000000
 777        .quad 0x0100010010001000, 0x0000010400001000
 778        .quad 0x0000010000041000, 0x0000000400040000
 779        .quad 0x0100000410040000, 0x0100010010041000
 780        .quad 0x0000010400001000, 0x0000000000000000
 781        .quad 0x0000000000000000, 0x0100000410040000
 782        .quad 0x0100000410000000, 0x0100010010001000
 783        .quad 0x0000010400041000, 0x0000000000040000
 784        .quad 0x0000010400041000, 0x0000000000040000
 785        .quad 0x0100010010041000, 0x0000010000001000
 786        .quad 0x0000000400000000, 0x0100000410040000
 787        .quad 0x0000010000001000, 0x0000010400041000
 788        .quad 0x0100010010001000, 0x0000000400000000
 789        .quad 0x0100000410000000, 0x0100000010040000
 790        .quad 0x0100000410040000, 0x0100000010000000
 791        .quad 0x0000000000040000, 0x0100010410001000
 792        .quad 0x0000000000000000, 0x0100010410041000
 793        .quad 0x0000000400040000, 0x0100000410000000
 794        .quad 0x0100000010040000, 0x0100010010001000
 795        .quad 0x0100010410001000, 0x0000000000000000
 796        .quad 0x0100010410041000, 0x0000010000041000
 797        .quad 0x0000010000041000, 0x0000010400001000
 798        .quad 0x0000010400001000, 0x0000000400040000
 799        .quad 0x0100000010000000, 0x0100010010041000
 800