linux/arch/tile/kernel/unaligned.c
<<
>>
Prefs
   1/*
   2 * Copyright 2013 Tilera Corporation. All Rights Reserved.
   3 *
   4 *   This program is free software; you can redistribute it and/or
   5 *   modify it under the terms of the GNU General Public License
   6 *   as published by the Free Software Foundation, version 2.
   7 *
   8 *   This program is distributed in the hope that it will be useful, but
   9 *   WITHOUT ANY WARRANTY; without even the implied warranty of
  10 *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
  11 *   NON INFRINGEMENT.  See the GNU General Public License for
  12 *   more details.
  13 *
  14 * A code-rewriter that handles unaligned exception.
  15 */
  16
  17#include <linux/smp.h>
  18#include <linux/ptrace.h>
  19#include <linux/slab.h>
  20#include <linux/thread_info.h>
  21#include <linux/uaccess.h>
  22#include <linux/mman.h>
  23#include <linux/types.h>
  24#include <linux/err.h>
  25#include <linux/module.h>
  26#include <linux/compat.h>
  27#include <linux/prctl.h>
  28#include <asm/cacheflush.h>
  29#include <asm/traps.h>
  30#include <asm/uaccess.h>
  31#include <asm/unaligned.h>
  32#include <arch/abi.h>
  33#include <arch/spr_def.h>
  34#include <arch/opcode.h>
  35
  36
  37/*
  38 * This file handles unaligned exception for tile-Gx. The tilepro's unaligned
  39 * exception is supported out of single_step.c
  40 */
  41
  42int unaligned_printk;
  43
  44static int __init setup_unaligned_printk(char *str)
  45{
  46        long val;
  47        if (kstrtol(str, 0, &val) != 0)
  48                return 0;
  49        unaligned_printk = val;
  50        pr_info("Printk for each unaligned data accesses is %s\n",
  51                unaligned_printk ? "enabled" : "disabled");
  52        return 1;
  53}
  54__setup("unaligned_printk=", setup_unaligned_printk);
  55
  56unsigned int unaligned_fixup_count;
  57
  58#ifdef __tilegx__
  59
  60/*
  61 * Unalign data jit fixup code fragement. Reserved space is 128 bytes.
  62 * The 1st 64-bit word saves fault PC address, 2nd word is the fault
  63 * instruction bundle followed by 14 JIT bundles.
  64 */
  65
  66struct unaligned_jit_fragment {
  67        unsigned long       pc;
  68        tilegx_bundle_bits  bundle;
  69        tilegx_bundle_bits  insn[14];
  70};
  71
  72/*
  73 * Check if a nop or fnop at bundle's pipeline X0.
  74 */
  75
  76static bool is_bundle_x0_nop(tilegx_bundle_bits bundle)
  77{
  78        return (((get_UnaryOpcodeExtension_X0(bundle) ==
  79                  NOP_UNARY_OPCODE_X0) &&
  80                 (get_RRROpcodeExtension_X0(bundle) ==
  81                  UNARY_RRR_0_OPCODE_X0) &&
  82                 (get_Opcode_X0(bundle) ==
  83                  RRR_0_OPCODE_X0)) ||
  84                ((get_UnaryOpcodeExtension_X0(bundle) ==
  85                  FNOP_UNARY_OPCODE_X0) &&
  86                 (get_RRROpcodeExtension_X0(bundle) ==
  87                  UNARY_RRR_0_OPCODE_X0) &&
  88                 (get_Opcode_X0(bundle) ==
  89                  RRR_0_OPCODE_X0)));
  90}
  91
  92/*
  93 * Check if nop or fnop at bundle's pipeline X1.
  94 */
  95
  96static bool is_bundle_x1_nop(tilegx_bundle_bits bundle)
  97{
  98        return (((get_UnaryOpcodeExtension_X1(bundle) ==
  99                  NOP_UNARY_OPCODE_X1) &&
 100                 (get_RRROpcodeExtension_X1(bundle) ==
 101                  UNARY_RRR_0_OPCODE_X1) &&
 102                 (get_Opcode_X1(bundle) ==
 103                  RRR_0_OPCODE_X1)) ||
 104                ((get_UnaryOpcodeExtension_X1(bundle) ==
 105                  FNOP_UNARY_OPCODE_X1) &&
 106                 (get_RRROpcodeExtension_X1(bundle) ==
 107                  UNARY_RRR_0_OPCODE_X1) &&
 108                 (get_Opcode_X1(bundle) ==
 109                  RRR_0_OPCODE_X1)));
 110}
 111
 112/*
 113 * Check if nop or fnop at bundle's Y0 pipeline.
 114 */
 115
 116static bool is_bundle_y0_nop(tilegx_bundle_bits bundle)
 117{
 118        return (((get_UnaryOpcodeExtension_Y0(bundle) ==
 119                  NOP_UNARY_OPCODE_Y0) &&
 120                 (get_RRROpcodeExtension_Y0(bundle) ==
 121                  UNARY_RRR_1_OPCODE_Y0) &&
 122                 (get_Opcode_Y0(bundle) ==
 123                  RRR_1_OPCODE_Y0)) ||
 124                ((get_UnaryOpcodeExtension_Y0(bundle) ==
 125                  FNOP_UNARY_OPCODE_Y0) &&
 126                 (get_RRROpcodeExtension_Y0(bundle) ==
 127                  UNARY_RRR_1_OPCODE_Y0) &&
 128                 (get_Opcode_Y0(bundle) ==
 129                  RRR_1_OPCODE_Y0)));
 130}
 131
 132/*
 133 * Check if nop or fnop at bundle's pipeline Y1.
 134 */
 135
 136static bool is_bundle_y1_nop(tilegx_bundle_bits bundle)
 137{
 138        return (((get_UnaryOpcodeExtension_Y1(bundle) ==
 139                  NOP_UNARY_OPCODE_Y1) &&
 140                 (get_RRROpcodeExtension_Y1(bundle) ==
 141                  UNARY_RRR_1_OPCODE_Y1) &&
 142                 (get_Opcode_Y1(bundle) ==
 143                  RRR_1_OPCODE_Y1)) ||
 144                ((get_UnaryOpcodeExtension_Y1(bundle) ==
 145                  FNOP_UNARY_OPCODE_Y1) &&
 146                 (get_RRROpcodeExtension_Y1(bundle) ==
 147                  UNARY_RRR_1_OPCODE_Y1) &&
 148                 (get_Opcode_Y1(bundle) ==
 149                  RRR_1_OPCODE_Y1)));
 150}
 151
 152/*
 153 * Test if a bundle's y0 and y1 pipelines are both nop or fnop.
 154 */
 155
 156static bool is_y0_y1_nop(tilegx_bundle_bits bundle)
 157{
 158        return is_bundle_y0_nop(bundle) && is_bundle_y1_nop(bundle);
 159}
 160
 161/*
 162 * Test if a bundle's x0 and x1 pipelines are both nop or fnop.
 163 */
 164
 165static bool is_x0_x1_nop(tilegx_bundle_bits bundle)
 166{
 167        return is_bundle_x0_nop(bundle) && is_bundle_x1_nop(bundle);
 168}
 169
 170/*
 171 * Find the destination, source registers of fault unalign access instruction
 172 * at X1 or Y2. Also, allocate up to 3 scratch registers clob1, clob2 and
 173 * clob3, which are guaranteed different from any register used in the fault
 174 * bundle. r_alias is used to return if the other instructions other than the
 175 * unalign load/store shares same register with ra, rb and rd.
 176 */
 177
 178static void find_regs(tilegx_bundle_bits bundle, uint64_t *rd, uint64_t *ra,
 179                      uint64_t *rb, uint64_t *clob1, uint64_t *clob2,
 180                      uint64_t *clob3, bool *r_alias)
 181{
 182        int i;
 183        uint64_t reg;
 184        uint64_t reg_map = 0, alias_reg_map = 0, map;
 185        bool alias;
 186
 187        *ra = -1;
 188        *rb = -1;
 189
 190        if (rd)
 191                *rd = -1;
 192
 193        *clob1 = -1;
 194        *clob2 = -1;
 195        *clob3 = -1;
 196        alias = false;
 197
 198        /*
 199         * Parse fault bundle, find potential used registers and mark
 200         * corresponding bits in reg_map and alias_map. These 2 bit maps
 201         * are used to find the scratch registers and determine if there
 202         * is register alais.
 203         */
 204        if (bundle & TILEGX_BUNDLE_MODE_MASK) {  /* Y Mode Bundle. */
 205
 206                reg = get_SrcA_Y2(bundle);
 207                reg_map |= 1ULL << reg;
 208                *ra = reg;
 209                reg = get_SrcBDest_Y2(bundle);
 210                reg_map |= 1ULL << reg;
 211
 212                if (rd) {
 213                        /* Load. */
 214                        *rd = reg;
 215                        alias_reg_map = (1ULL << *rd) | (1ULL << *ra);
 216                } else {
 217                        /* Store. */
 218                        *rb = reg;
 219                        alias_reg_map = (1ULL << *ra) | (1ULL << *rb);
 220                }
 221
 222                if (!is_bundle_y1_nop(bundle)) {
 223                        reg = get_SrcA_Y1(bundle);
 224                        reg_map |= (1ULL << reg);
 225                        map = (1ULL << reg);
 226
 227                        reg = get_SrcB_Y1(bundle);
 228                        reg_map |= (1ULL << reg);
 229                        map |= (1ULL << reg);
 230
 231                        reg = get_Dest_Y1(bundle);
 232                        reg_map |= (1ULL << reg);
 233                        map |= (1ULL << reg);
 234
 235                        if (map & alias_reg_map)
 236                                alias = true;
 237                }
 238
 239                if (!is_bundle_y0_nop(bundle)) {
 240                        reg = get_SrcA_Y0(bundle);
 241                        reg_map |= (1ULL << reg);
 242                        map = (1ULL << reg);
 243
 244                        reg = get_SrcB_Y0(bundle);
 245                        reg_map |= (1ULL << reg);
 246                        map |= (1ULL << reg);
 247
 248                        reg = get_Dest_Y0(bundle);
 249                        reg_map |= (1ULL << reg);
 250                        map |= (1ULL << reg);
 251
 252                        if (map & alias_reg_map)
 253                                alias = true;
 254                }
 255        } else  { /* X Mode Bundle. */
 256
 257                reg = get_SrcA_X1(bundle);
 258                reg_map |= (1ULL << reg);
 259                *ra = reg;
 260                if (rd) {
 261                        /* Load. */
 262                        reg = get_Dest_X1(bundle);
 263                        reg_map |= (1ULL << reg);
 264                        *rd = reg;
 265                        alias_reg_map = (1ULL << *rd) | (1ULL << *ra);
 266                } else {
 267                        /* Store. */
 268                        reg = get_SrcB_X1(bundle);
 269                        reg_map |= (1ULL << reg);
 270                        *rb = reg;
 271                        alias_reg_map = (1ULL << *ra) | (1ULL << *rb);
 272                }
 273
 274                if (!is_bundle_x0_nop(bundle)) {
 275                        reg = get_SrcA_X0(bundle);
 276                        reg_map |= (1ULL << reg);
 277                        map = (1ULL << reg);
 278
 279                        reg = get_SrcB_X0(bundle);
 280                        reg_map |= (1ULL << reg);
 281                        map |= (1ULL << reg);
 282
 283                        reg = get_Dest_X0(bundle);
 284                        reg_map |= (1ULL << reg);
 285                        map |= (1ULL << reg);
 286
 287                        if (map & alias_reg_map)
 288                                alias = true;
 289                }
 290        }
 291
 292        /*
 293         * "alias" indicates if the unalign access registers have collision
 294         * with others in the same bundle. We jsut simply test all register
 295         * operands case (RRR), ignored the case with immidate. If a bundle
 296         * has no register alias, we may do fixup in a simple or fast manner.
 297         * So if an immidata field happens to hit with a register, we may end
 298         * up fall back to the generic handling.
 299         */
 300
 301        *r_alias = alias;
 302
 303        /* Flip bits on reg_map. */
 304        reg_map ^= -1ULL;
 305
 306        /* Scan reg_map lower 54(TREG_SP) bits to find 3 set bits. */
 307        for (i = 0; i < TREG_SP; i++) {
 308                if (reg_map & (0x1ULL << i)) {
 309                        if (*clob1 == -1) {
 310                                *clob1 = i;
 311                        } else if (*clob2 == -1) {
 312                                *clob2 = i;
 313                        } else if (*clob3 == -1) {
 314                                *clob3 = i;
 315                                return;
 316                        }
 317                }
 318        }
 319}
 320
 321/*
 322 * Sanity check for register ra, rb, rd, clob1/2/3. Return true if any of them
 323 * is unexpected.
 324 */
 325
 326static bool check_regs(uint64_t rd, uint64_t ra, uint64_t rb,
 327                       uint64_t clob1, uint64_t clob2,  uint64_t clob3)
 328{
 329        bool unexpected = false;
 330        if ((ra >= 56) && (ra != TREG_ZERO))
 331                unexpected = true;
 332
 333        if ((clob1 >= 56) || (clob2 >= 56) || (clob3 >= 56))
 334                unexpected = true;
 335
 336        if (rd != -1) {
 337                if ((rd >= 56) && (rd != TREG_ZERO))
 338                        unexpected = true;
 339        } else {
 340                if ((rb >= 56) && (rb != TREG_ZERO))
 341                        unexpected = true;
 342        }
 343        return unexpected;
 344}
 345
 346
 347#define  GX_INSN_X0_MASK   ((1ULL << 31) - 1)
 348#define  GX_INSN_X1_MASK   (((1ULL << 31) - 1) << 31)
 349#define  GX_INSN_Y0_MASK   ((0xFULL << 27) | (0xFFFFFULL))
 350#define  GX_INSN_Y1_MASK   (GX_INSN_Y0_MASK << 31)
 351#define  GX_INSN_Y2_MASK   ((0x7FULL << 51) | (0x7FULL << 20))
 352
 353#ifdef __LITTLE_ENDIAN
 354#define  GX_INSN_BSWAP(_bundle_)    (_bundle_)
 355#else
 356#define  GX_INSN_BSWAP(_bundle_)    swab64(_bundle_)
 357#endif /* __LITTLE_ENDIAN */
 358
 359/*
 360 * __JIT_CODE(.) creates template bundles in .rodata.unalign_data section.
 361 * The corresponding static function jix_x#_###(.) generates partial or
 362 * whole bundle based on the template and given arguments.
 363 */
 364
 365#define __JIT_CODE(_X_)                                         \
 366        asm (".pushsection .rodata.unalign_data, \"a\"\n"       \
 367             _X_"\n"                                            \
 368             ".popsection\n")
 369
 370__JIT_CODE("__unalign_jit_x1_mtspr:   {mtspr 0,  r0}");
 371static tilegx_bundle_bits jit_x1_mtspr(int spr, int reg)
 372{
 373        extern  tilegx_bundle_bits __unalign_jit_x1_mtspr;
 374        return (GX_INSN_BSWAP(__unalign_jit_x1_mtspr) & GX_INSN_X1_MASK) |
 375                create_MT_Imm14_X1(spr) | create_SrcA_X1(reg);
 376}
 377
 378__JIT_CODE("__unalign_jit_x1_mfspr:   {mfspr r0, 0}");
 379static tilegx_bundle_bits  jit_x1_mfspr(int reg, int spr)
 380{
 381        extern  tilegx_bundle_bits __unalign_jit_x1_mfspr;
 382        return (GX_INSN_BSWAP(__unalign_jit_x1_mfspr) & GX_INSN_X1_MASK) |
 383                create_MF_Imm14_X1(spr) | create_Dest_X1(reg);
 384}
 385
 386__JIT_CODE("__unalign_jit_x0_addi:   {addi  r0, r0, 0; iret}");
 387static tilegx_bundle_bits  jit_x0_addi(int rd, int ra, int imm8)
 388{
 389        extern  tilegx_bundle_bits __unalign_jit_x0_addi;
 390        return (GX_INSN_BSWAP(__unalign_jit_x0_addi) & GX_INSN_X0_MASK) |
 391                create_Dest_X0(rd) | create_SrcA_X0(ra) |
 392                create_Imm8_X0(imm8);
 393}
 394
 395__JIT_CODE("__unalign_jit_x1_ldna:   {ldna  r0, r0}");
 396static tilegx_bundle_bits  jit_x1_ldna(int rd, int ra)
 397{
 398        extern  tilegx_bundle_bits __unalign_jit_x1_ldna;
 399        return (GX_INSN_BSWAP(__unalign_jit_x1_ldna) &  GX_INSN_X1_MASK) |
 400                create_Dest_X1(rd) | create_SrcA_X1(ra);
 401}
 402
 403__JIT_CODE("__unalign_jit_x0_dblalign:   {dblalign r0, r0 ,r0}");
 404static tilegx_bundle_bits  jit_x0_dblalign(int rd, int ra, int rb)
 405{
 406        extern  tilegx_bundle_bits __unalign_jit_x0_dblalign;
 407        return (GX_INSN_BSWAP(__unalign_jit_x0_dblalign) & GX_INSN_X0_MASK) |
 408                create_Dest_X0(rd) | create_SrcA_X0(ra) |
 409                create_SrcB_X0(rb);
 410}
 411
 412__JIT_CODE("__unalign_jit_x1_iret:   {iret}");
 413static tilegx_bundle_bits  jit_x1_iret(void)
 414{
 415        extern  tilegx_bundle_bits __unalign_jit_x1_iret;
 416        return GX_INSN_BSWAP(__unalign_jit_x1_iret) & GX_INSN_X1_MASK;
 417}
 418
 419__JIT_CODE("__unalign_jit_x01_fnop:   {fnop;fnop}");
 420static tilegx_bundle_bits  jit_x0_fnop(void)
 421{
 422        extern  tilegx_bundle_bits __unalign_jit_x01_fnop;
 423        return GX_INSN_BSWAP(__unalign_jit_x01_fnop) & GX_INSN_X0_MASK;
 424}
 425
 426static tilegx_bundle_bits  jit_x1_fnop(void)
 427{
 428        extern  tilegx_bundle_bits __unalign_jit_x01_fnop;
 429        return GX_INSN_BSWAP(__unalign_jit_x01_fnop) & GX_INSN_X1_MASK;
 430}
 431
 432__JIT_CODE("__unalign_jit_y2_dummy:   {fnop; fnop; ld zero, sp}");
 433static tilegx_bundle_bits  jit_y2_dummy(void)
 434{
 435        extern  tilegx_bundle_bits __unalign_jit_y2_dummy;
 436        return GX_INSN_BSWAP(__unalign_jit_y2_dummy) & GX_INSN_Y2_MASK;
 437}
 438
 439static tilegx_bundle_bits  jit_y1_fnop(void)
 440{
 441        extern  tilegx_bundle_bits __unalign_jit_y2_dummy;
 442        return GX_INSN_BSWAP(__unalign_jit_y2_dummy) & GX_INSN_Y1_MASK;
 443}
 444
 445__JIT_CODE("__unalign_jit_x1_st1_add:  {st1_add r1, r0, 0}");
 446static tilegx_bundle_bits  jit_x1_st1_add(int ra, int rb, int imm8)
 447{
 448        extern  tilegx_bundle_bits __unalign_jit_x1_st1_add;
 449        return (GX_INSN_BSWAP(__unalign_jit_x1_st1_add) &
 450                (~create_SrcA_X1(-1)) &
 451                GX_INSN_X1_MASK) | create_SrcA_X1(ra) |
 452                create_SrcB_X1(rb) | create_Dest_Imm8_X1(imm8);
 453}
 454
 455__JIT_CODE("__unalign_jit_x1_st:  {crc32_8 r1, r0, r0; st  r0, r0}");
 456static tilegx_bundle_bits  jit_x1_st(int ra, int rb)
 457{
 458        extern  tilegx_bundle_bits __unalign_jit_x1_st;
 459        return (GX_INSN_BSWAP(__unalign_jit_x1_st) & GX_INSN_X1_MASK) |
 460                create_SrcA_X1(ra) | create_SrcB_X1(rb);
 461}
 462
 463__JIT_CODE("__unalign_jit_x1_st_add:  {st_add  r1, r0, 0}");
 464static tilegx_bundle_bits  jit_x1_st_add(int ra, int rb, int imm8)
 465{
 466        extern  tilegx_bundle_bits __unalign_jit_x1_st_add;
 467        return (GX_INSN_BSWAP(__unalign_jit_x1_st_add) &
 468                (~create_SrcA_X1(-1)) &
 469                GX_INSN_X1_MASK) | create_SrcA_X1(ra) |
 470                create_SrcB_X1(rb) | create_Dest_Imm8_X1(imm8);
 471}
 472
 473__JIT_CODE("__unalign_jit_x1_ld:  {crc32_8 r1, r0, r0; ld  r0, r0}");
 474static tilegx_bundle_bits  jit_x1_ld(int rd, int ra)
 475{
 476        extern  tilegx_bundle_bits __unalign_jit_x1_ld;
 477        return (GX_INSN_BSWAP(__unalign_jit_x1_ld) & GX_INSN_X1_MASK) |
 478                create_Dest_X1(rd) | create_SrcA_X1(ra);
 479}
 480
 481__JIT_CODE("__unalign_jit_x1_ld_add:  {ld_add  r1, r0, 0}");
 482static tilegx_bundle_bits  jit_x1_ld_add(int rd, int ra, int imm8)
 483{
 484        extern  tilegx_bundle_bits __unalign_jit_x1_ld_add;
 485        return (GX_INSN_BSWAP(__unalign_jit_x1_ld_add) &
 486                (~create_Dest_X1(-1)) &
 487                GX_INSN_X1_MASK) | create_Dest_X1(rd) |
 488                create_SrcA_X1(ra) | create_Imm8_X1(imm8);
 489}
 490
 491__JIT_CODE("__unalign_jit_x0_bfexts:  {bfexts r0, r0, 0, 0}");
 492static tilegx_bundle_bits  jit_x0_bfexts(int rd, int ra, int bfs, int bfe)
 493{
 494        extern  tilegx_bundle_bits __unalign_jit_x0_bfexts;
 495        return (GX_INSN_BSWAP(__unalign_jit_x0_bfexts) &
 496                GX_INSN_X0_MASK) |
 497                create_Dest_X0(rd) | create_SrcA_X0(ra) |
 498                create_BFStart_X0(bfs) | create_BFEnd_X0(bfe);
 499}
 500
 501__JIT_CODE("__unalign_jit_x0_bfextu:  {bfextu r0, r0, 0, 0}");
 502static tilegx_bundle_bits  jit_x0_bfextu(int rd, int ra, int bfs, int bfe)
 503{
 504        extern  tilegx_bundle_bits __unalign_jit_x0_bfextu;
 505        return (GX_INSN_BSWAP(__unalign_jit_x0_bfextu) &
 506                GX_INSN_X0_MASK) |
 507                create_Dest_X0(rd) | create_SrcA_X0(ra) |
 508                create_BFStart_X0(bfs) | create_BFEnd_X0(bfe);
 509}
 510
 511__JIT_CODE("__unalign_jit_x1_addi:  {bfextu r1, r1, 0, 0; addi r0, r0, 0}");
 512static tilegx_bundle_bits  jit_x1_addi(int rd, int ra, int imm8)
 513{
 514        extern  tilegx_bundle_bits __unalign_jit_x1_addi;
 515        return (GX_INSN_BSWAP(__unalign_jit_x1_addi) & GX_INSN_X1_MASK) |
 516                create_Dest_X1(rd) | create_SrcA_X1(ra) |
 517                create_Imm8_X1(imm8);
 518}
 519
 520__JIT_CODE("__unalign_jit_x0_shrui:  {shrui r0, r0, 0; iret}");
 521static tilegx_bundle_bits  jit_x0_shrui(int rd, int ra, int imm6)
 522{
 523        extern  tilegx_bundle_bits __unalign_jit_x0_shrui;
 524        return (GX_INSN_BSWAP(__unalign_jit_x0_shrui) &
 525                GX_INSN_X0_MASK) |
 526                create_Dest_X0(rd) | create_SrcA_X0(ra) |
 527                create_ShAmt_X0(imm6);
 528}
 529
 530__JIT_CODE("__unalign_jit_x0_rotli:  {rotli r0, r0, 0; iret}");
 531static tilegx_bundle_bits  jit_x0_rotli(int rd, int ra, int imm6)
 532{
 533        extern  tilegx_bundle_bits __unalign_jit_x0_rotli;
 534        return (GX_INSN_BSWAP(__unalign_jit_x0_rotli) &
 535                GX_INSN_X0_MASK) |
 536                create_Dest_X0(rd) | create_SrcA_X0(ra) |
 537                create_ShAmt_X0(imm6);
 538}
 539
 540__JIT_CODE("__unalign_jit_x1_bnezt:  {bnezt r0, __unalign_jit_x1_bnezt}");
 541static tilegx_bundle_bits  jit_x1_bnezt(int ra, int broff)
 542{
 543        extern  tilegx_bundle_bits __unalign_jit_x1_bnezt;
 544        return (GX_INSN_BSWAP(__unalign_jit_x1_bnezt) &
 545                GX_INSN_X1_MASK) |
 546                create_SrcA_X1(ra) | create_BrOff_X1(broff);
 547}
 548
 549#undef __JIT_CODE
 550
 551/*
 552 * This function generates unalign fixup JIT.
 553 *
 554 * We first find unalign load/store instruction's destination, source
 555 * registers: ra, rb and rd. and 3 scratch registers by calling
 556 * find_regs(...). 3 scratch clobbers should not alias with any register
 557 * used in the fault bundle. Then analyze the fault bundle to determine
 558 * if it's a load or store, operand width, branch or address increment etc.
 559 * At last generated JIT is copied into JIT code area in user space.
 560 */
 561
 562static
 563void jit_bundle_gen(struct pt_regs *regs, tilegx_bundle_bits bundle,
 564                    int align_ctl)
 565{
 566        struct thread_info *info = current_thread_info();
 567        struct unaligned_jit_fragment frag;
 568        struct unaligned_jit_fragment *jit_code_area;
 569        tilegx_bundle_bits bundle_2 = 0;
 570        /* If bundle_2_enable = false, bundle_2 is fnop/nop operation. */
 571        bool     bundle_2_enable = true;
 572        uint64_t ra, rb, rd = -1, clob1, clob2, clob3;
 573        /*
 574         * Indicate if the unalign access
 575         * instruction's registers hit with
 576         * others in the same bundle.
 577         */
 578        bool     alias = false;
 579        bool     load_n_store = true;
 580        bool     load_store_signed = false;
 581        unsigned int  load_store_size = 8;
 582        bool     y1_br = false;  /* True, for a branch in same bundle at Y1.*/
 583        int      y1_br_reg = 0;
 584        /* True for link operation. i.e. jalr or lnk at Y1 */
 585        bool     y1_lr = false;
 586        int      y1_lr_reg = 0;
 587        bool     x1_add = false;/* True, for load/store ADD instruction at X1*/
 588        int      x1_add_imm8 = 0;
 589        bool     unexpected = false;
 590        int      n = 0, k;
 591
 592        jit_code_area =
 593                (struct unaligned_jit_fragment *)(info->unalign_jit_base);
 594
 595        memset((void *)&frag, 0, sizeof(frag));
 596
 597        /* 0: X mode, Otherwise: Y mode. */
 598        if (bundle & TILEGX_BUNDLE_MODE_MASK) {
 599                unsigned int mod, opcode;
 600
 601                if (get_Opcode_Y1(bundle) == RRR_1_OPCODE_Y1 &&
 602                    get_RRROpcodeExtension_Y1(bundle) ==
 603                    UNARY_RRR_1_OPCODE_Y1) {
 604
 605                        opcode = get_UnaryOpcodeExtension_Y1(bundle);
 606
 607                        /*
 608                         * Test "jalr", "jalrp", "jr", "jrp" instruction at Y1
 609                         * pipeline.
 610                         */
 611                        switch (opcode) {
 612                        case JALR_UNARY_OPCODE_Y1:
 613                        case JALRP_UNARY_OPCODE_Y1:
 614                                y1_lr = true;
 615                                y1_lr_reg = 55; /* Link register. */
 616                                /* FALLTHROUGH */
 617                        case JR_UNARY_OPCODE_Y1:
 618                        case JRP_UNARY_OPCODE_Y1:
 619                                y1_br = true;
 620                                y1_br_reg = get_SrcA_Y1(bundle);
 621                                break;
 622                        case LNK_UNARY_OPCODE_Y1:
 623                                /* "lnk" at Y1 pipeline. */
 624                                y1_lr = true;
 625                                y1_lr_reg = get_Dest_Y1(bundle);
 626                                break;
 627                        }
 628                }
 629
 630                opcode = get_Opcode_Y2(bundle);
 631                mod = get_Mode(bundle);
 632
 633                /*
 634                 *  bundle_2 is bundle after making Y2 as a dummy operation
 635                 *  - ld zero, sp
 636                 */
 637                bundle_2 = (bundle & (~GX_INSN_Y2_MASK)) | jit_y2_dummy();
 638
 639                /* Make Y1 as fnop if Y1 is a branch or lnk operation. */
 640                if (y1_br || y1_lr) {
 641                        bundle_2 &= ~(GX_INSN_Y1_MASK);
 642                        bundle_2 |= jit_y1_fnop();
 643                }
 644
 645                if (is_y0_y1_nop(bundle_2))
 646                        bundle_2_enable = false;
 647
 648                if (mod == MODE_OPCODE_YC2) {
 649                        /* Store. */
 650                        load_n_store = false;
 651                        load_store_size = 1 << opcode;
 652                        load_store_signed = false;
 653                        find_regs(bundle, 0, &ra, &rb, &clob1, &clob2,
 654                                  &clob3, &alias);
 655                        if (load_store_size > 8)
 656                                unexpected = true;
 657                } else {
 658                        /* Load. */
 659                        load_n_store = true;
 660                        if (mod == MODE_OPCODE_YB2) {
 661                                switch (opcode) {
 662                                case LD_OPCODE_Y2:
 663                                        load_store_signed = false;
 664                                        load_store_size = 8;
 665                                        break;
 666                                case LD4S_OPCODE_Y2:
 667                                        load_store_signed = true;
 668                                        load_store_size = 4;
 669                                        break;
 670                                case LD4U_OPCODE_Y2:
 671                                        load_store_signed = false;
 672                                        load_store_size = 4;
 673                                        break;
 674                                default:
 675                                        unexpected = true;
 676                                }
 677                        } else if (mod == MODE_OPCODE_YA2) {
 678                                if (opcode == LD2S_OPCODE_Y2) {
 679                                        load_store_signed = true;
 680                                        load_store_size = 2;
 681                                } else if (opcode == LD2U_OPCODE_Y2) {
 682                                        load_store_signed = false;
 683                                        load_store_size = 2;
 684                                } else
 685                                        unexpected = true;
 686                        } else
 687                                unexpected = true;
 688                        find_regs(bundle, &rd, &ra, &rb, &clob1, &clob2,
 689                                  &clob3, &alias);
 690                }
 691        } else {
 692                unsigned int opcode;
 693
 694                /* bundle_2 is bundle after making X1 as "fnop". */
 695                bundle_2 = (bundle & (~GX_INSN_X1_MASK)) | jit_x1_fnop();
 696
 697                if (is_x0_x1_nop(bundle_2))
 698                        bundle_2_enable = false;
 699
 700                if (get_Opcode_X1(bundle) == RRR_0_OPCODE_X1) {
 701                        opcode = get_UnaryOpcodeExtension_X1(bundle);
 702
 703                        if (get_RRROpcodeExtension_X1(bundle) ==
 704                            UNARY_RRR_0_OPCODE_X1) {
 705                                load_n_store = true;
 706                                find_regs(bundle, &rd, &ra, &rb, &clob1,
 707                                          &clob2, &clob3, &alias);
 708
 709                                switch (opcode) {
 710                                case LD_UNARY_OPCODE_X1:
 711                                        load_store_signed = false;
 712                                        load_store_size = 8;
 713                                        break;
 714                                case LD4S_UNARY_OPCODE_X1:
 715                                        load_store_signed = true;
 716                                        /* FALLTHROUGH */
 717                                case LD4U_UNARY_OPCODE_X1:
 718                                        load_store_size = 4;
 719                                        break;
 720
 721                                case LD2S_UNARY_OPCODE_X1:
 722                                        load_store_signed = true;
 723                                        /* FALLTHROUGH */
 724                                case LD2U_UNARY_OPCODE_X1:
 725                                        load_store_size = 2;
 726                                        break;
 727                                default:
 728                                        unexpected = true;
 729                                }
 730                        } else {
 731                                load_n_store = false;
 732                                load_store_signed = false;
 733                                find_regs(bundle, 0, &ra, &rb,
 734                                          &clob1, &clob2, &clob3,
 735                                          &alias);
 736
 737                                opcode = get_RRROpcodeExtension_X1(bundle);
 738                                switch (opcode) {
 739                                case ST_RRR_0_OPCODE_X1:
 740                                        load_store_size = 8;
 741                                        break;
 742                                case ST4_RRR_0_OPCODE_X1:
 743                                        load_store_size = 4;
 744                                        break;
 745                                case ST2_RRR_0_OPCODE_X1:
 746                                        load_store_size = 2;
 747                                        break;
 748                                default:
 749                                        unexpected = true;
 750                                }
 751                        }
 752                } else if (get_Opcode_X1(bundle) == IMM8_OPCODE_X1) {
 753                        load_n_store = true;
 754                        opcode = get_Imm8OpcodeExtension_X1(bundle);
 755                        switch (opcode) {
 756                        case LD_ADD_IMM8_OPCODE_X1:
 757                                load_store_size = 8;
 758                                break;
 759
 760                        case LD4S_ADD_IMM8_OPCODE_X1:
 761                                load_store_signed = true;
 762                                /* FALLTHROUGH */
 763                        case LD4U_ADD_IMM8_OPCODE_X1:
 764                                load_store_size = 4;
 765                                break;
 766
 767                        case LD2S_ADD_IMM8_OPCODE_X1:
 768                                load_store_signed = true;
 769                                /* FALLTHROUGH */
 770                        case LD2U_ADD_IMM8_OPCODE_X1:
 771                                load_store_size = 2;
 772                                break;
 773
 774                        case ST_ADD_IMM8_OPCODE_X1:
 775                                load_n_store = false;
 776                                load_store_size = 8;
 777                                break;
 778                        case ST4_ADD_IMM8_OPCODE_X1:
 779                                load_n_store = false;
 780                                load_store_size = 4;
 781                                break;
 782                        case ST2_ADD_IMM8_OPCODE_X1:
 783                                load_n_store = false;
 784                                load_store_size = 2;
 785                                break;
 786                        default:
 787                                unexpected = true;
 788                        }
 789
 790                        if (!unexpected) {
 791                                x1_add = true;
 792                                if (load_n_store)
 793                                        x1_add_imm8 = get_Imm8_X1(bundle);
 794                                else
 795                                        x1_add_imm8 = get_Dest_Imm8_X1(bundle);
 796                        }
 797
 798                        find_regs(bundle, load_n_store ? (&rd) : NULL,
 799                                  &ra, &rb, &clob1, &clob2, &clob3, &alias);
 800                } else
 801                        unexpected = true;
 802        }
 803
 804        /*
 805         * Some sanity check for register numbers extracted from fault bundle.
 806         */
 807        if (check_regs(rd, ra, rb, clob1, clob2, clob3) == true)
 808                unexpected = true;
 809
 810        /* Give warning if register ra has an aligned address. */
 811        if (!unexpected)
 812                WARN_ON(!((load_store_size - 1) & (regs->regs[ra])));
 813
 814
 815        /*
 816         * Fault came from kernel space, here we only need take care of
 817         * unaligned "get_user/put_user" macros defined in "uaccess.h".
 818         * Basically, we will handle bundle like this:
 819         * {ld/2u/4s rd, ra; movei rx, 0} or {st/2/4 ra, rb; movei rx, 0}
 820         * (Refer to file "arch/tile/include/asm/uaccess.h" for details).
 821         * For either load or store, byte-wise operation is performed by calling
 822         * get_user() or put_user(). If the macro returns non-zero value,
 823         * set the value to rx, otherwise set zero to rx. Finally make pc point
 824         * to next bundle and return.
 825         */
 826
 827        if (EX1_PL(regs->ex1) != USER_PL) {
 828
 829                unsigned long rx = 0;
 830                unsigned long x = 0, ret = 0;
 831
 832                if (y1_br || y1_lr || x1_add ||
 833                    (load_store_signed !=
 834                     (load_n_store && load_store_size == 4))) {
 835                        /* No branch, link, wrong sign-ext or load/store add. */
 836                        unexpected = true;
 837                } else if (!unexpected) {
 838                        if (bundle & TILEGX_BUNDLE_MODE_MASK) {
 839                                /*
 840                                 * Fault bundle is Y mode.
 841                                 * Check if the Y1 and Y0 is the form of
 842                                 * { movei rx, 0; nop/fnop }, if yes,
 843                                 * find the rx.
 844                                 */
 845
 846                                if ((get_Opcode_Y1(bundle) == ADDI_OPCODE_Y1)
 847                                    && (get_SrcA_Y1(bundle) == TREG_ZERO) &&
 848                                    (get_Imm8_Y1(bundle) == 0) &&
 849                                    is_bundle_y0_nop(bundle)) {
 850                                        rx = get_Dest_Y1(bundle);
 851                                } else if ((get_Opcode_Y0(bundle) ==
 852                                            ADDI_OPCODE_Y0) &&
 853                                           (get_SrcA_Y0(bundle) == TREG_ZERO) &&
 854                                           (get_Imm8_Y0(bundle) == 0) &&
 855                                           is_bundle_y1_nop(bundle)) {
 856                                        rx = get_Dest_Y0(bundle);
 857                                } else {
 858                                        unexpected = true;
 859                                }
 860                        } else {
 861                                /*
 862                                 * Fault bundle is X mode.
 863                                 * Check if the X0 is 'movei rx, 0',
 864                                 * if yes, find the rx.
 865                                 */
 866
 867                                if ((get_Opcode_X0(bundle) == IMM8_OPCODE_X0)
 868                                    && (get_Imm8OpcodeExtension_X0(bundle) ==
 869                                        ADDI_IMM8_OPCODE_X0) &&
 870                                    (get_SrcA_X0(bundle) == TREG_ZERO) &&
 871                                    (get_Imm8_X0(bundle) == 0)) {
 872                                        rx = get_Dest_X0(bundle);
 873                                } else {
 874                                        unexpected = true;
 875                                }
 876                        }
 877
 878                        /* rx should be less than 56. */
 879                        if (!unexpected && (rx >= 56))
 880                                unexpected = true;
 881                }
 882
 883                if (!search_exception_tables(regs->pc)) {
 884                        /* No fixup in the exception tables for the pc. */
 885                        unexpected = true;
 886                }
 887
 888                if (unexpected) {
 889                        /* Unexpected unalign kernel fault. */
 890                        struct task_struct *tsk = validate_current();
 891
 892                        bust_spinlocks(1);
 893
 894                        show_regs(regs);
 895
 896                        if (unlikely(tsk->pid < 2)) {
 897                                panic("Kernel unalign fault running %s!",
 898                                      tsk->pid ? "init" : "the idle task");
 899                        }
 900#ifdef SUPPORT_DIE
 901                        die("Oops", regs);
 902#endif
 903                        bust_spinlocks(1);
 904
 905                        do_group_exit(SIGKILL);
 906
 907                } else {
 908                        unsigned long i, b = 0;
 909                        unsigned char *ptr =
 910                                (unsigned char *)regs->regs[ra];
 911                        if (load_n_store) {
 912                                /* handle get_user(x, ptr) */
 913                                for (i = 0; i < load_store_size; i++) {
 914                                        ret = get_user(b, ptr++);
 915                                        if (!ret) {
 916                                                /* Success! update x. */
 917#ifdef __LITTLE_ENDIAN
 918                                                x |= (b << (8 * i));
 919#else
 920                                                x <<= 8;
 921                                                x |= b;
 922#endif /* __LITTLE_ENDIAN */
 923                                        } else {
 924                                                x = 0;
 925                                                break;
 926                                        }
 927                                }
 928
 929                                /* Sign-extend 4-byte loads. */
 930                                if (load_store_size == 4)
 931                                        x = (long)(int)x;
 932
 933                                /* Set register rd. */
 934                                regs->regs[rd] = x;
 935
 936                                /* Set register rx. */
 937                                regs->regs[rx] = ret;
 938
 939                                /* Bump pc. */
 940                                regs->pc += 8;
 941
 942                        } else {
 943                                /* Handle put_user(x, ptr) */
 944                                x = regs->regs[rb];
 945#ifdef __LITTLE_ENDIAN
 946                                b = x;
 947#else
 948                                /*
 949                                 * Swap x in order to store x from low
 950                                 * to high memory same as the
 951                                 * little-endian case.
 952                                 */
 953                                switch (load_store_size) {
 954                                case 8:
 955                                        b = swab64(x);
 956                                        break;
 957                                case 4:
 958                                        b = swab32(x);
 959                                        break;
 960                                case 2:
 961                                        b = swab16(x);
 962                                        break;
 963                                }
 964#endif /* __LITTLE_ENDIAN */
 965                                for (i = 0; i < load_store_size; i++) {
 966                                        ret = put_user(b, ptr++);
 967                                        if (ret)
 968                                                break;
 969                                        /* Success! shift 1 byte. */
 970                                        b >>= 8;
 971                                }
 972                                /* Set register rx. */
 973                                regs->regs[rx] = ret;
 974
 975                                /* Bump pc. */
 976                                regs->pc += 8;
 977                        }
 978                }
 979
 980                unaligned_fixup_count++;
 981
 982                if (unaligned_printk) {
 983                        pr_info("%s/%d. Unalign fixup for kernel access "
 984                                "to userspace %lx.",
 985                                current->comm, current->pid, regs->regs[ra]);
 986                }
 987
 988                /* Done! Return to the exception handler. */
 989                return;
 990        }
 991
 992        if ((align_ctl == 0) || unexpected) {
 993                siginfo_t info = {
 994                        .si_signo = SIGBUS,
 995                        .si_code = BUS_ADRALN,
 996                        .si_addr = (unsigned char __user *)0
 997                };
 998                if (unaligned_printk)
 999                        pr_info("Unalign bundle: unexp @%llx, %llx",
1000                                (unsigned long long)regs->pc,
1001                                (unsigned long long)bundle);
1002
1003                if (ra < 56) {
1004                        unsigned long uaa = (unsigned long)regs->regs[ra];
1005                        /* Set bus Address. */
1006                        info.si_addr = (unsigned char __user *)uaa;
1007                }
1008
1009                unaligned_fixup_count++;
1010
1011                trace_unhandled_signal("unaligned fixup trap", regs,
1012                                       (unsigned long)info.si_addr, SIGBUS);
1013                force_sig_info(info.si_signo, &info, current);
1014                return;
1015        }
1016
1017#ifdef __LITTLE_ENDIAN
1018#define UA_FIXUP_ADDR_DELTA          1
1019#define UA_FIXUP_BFEXT_START(_B_)    0
1020#define UA_FIXUP_BFEXT_END(_B_)     (8 * (_B_) - 1)
1021#else /* __BIG_ENDIAN */
1022#define UA_FIXUP_ADDR_DELTA          -1
1023#define UA_FIXUP_BFEXT_START(_B_)   (64 - 8 * (_B_))
1024#define UA_FIXUP_BFEXT_END(_B_)      63
1025#endif /* __LITTLE_ENDIAN */
1026
1027
1028
1029        if ((ra != rb) && (rd != TREG_SP) && !alias &&
1030            !y1_br && !y1_lr && !x1_add) {
1031                /*
1032                 * Simple case: ra != rb and no register alias found,
1033                 * and no branch or link. This will be the majority.
1034                 * We can do a little better for simplae case than the
1035                 * generic scheme below.
1036                 */
1037                if (!load_n_store) {
1038                        /*
1039                         * Simple store: ra != rb, no need for scratch register.
1040                         * Just store and rotate to right bytewise.
1041                         */
1042#ifdef __BIG_ENDIAN
1043                        frag.insn[n++] =
1044                                jit_x0_addi(ra, ra, load_store_size - 1) |
1045                                jit_x1_fnop();
1046#endif /* __BIG_ENDIAN */
1047                        for (k = 0; k < load_store_size; k++) {
1048                                /* Store a byte. */
1049                                frag.insn[n++] =
1050                                        jit_x0_rotli(rb, rb, 56) |
1051                                        jit_x1_st1_add(ra, rb,
1052                                                       UA_FIXUP_ADDR_DELTA);
1053                        }
1054#ifdef __BIG_ENDIAN
1055                        frag.insn[n] = jit_x1_addi(ra, ra, 1);
1056#else
1057                        frag.insn[n] = jit_x1_addi(ra, ra,
1058                                                   -1 * load_store_size);
1059#endif /* __LITTLE_ENDIAN */
1060
1061                        if (load_store_size == 8) {
1062                                frag.insn[n] |= jit_x0_fnop();
1063                        } else if (load_store_size == 4) {
1064                                frag.insn[n] |= jit_x0_rotli(rb, rb, 32);
1065                        } else { /* = 2 */
1066                                frag.insn[n] |= jit_x0_rotli(rb, rb, 16);
1067                        }
1068                        n++;
1069                        if (bundle_2_enable)
1070                                frag.insn[n++] = bundle_2;
1071                        frag.insn[n++] = jit_x0_fnop() | jit_x1_iret();
1072                } else {
1073                        if (rd == ra) {
1074                                /* Use two clobber registers: clob1/2. */
1075                                frag.insn[n++] =
1076                                        jit_x0_addi(TREG_SP, TREG_SP, -16) |
1077                                        jit_x1_fnop();
1078                                frag.insn[n++] =
1079                                        jit_x0_addi(clob1, ra, 7) |
1080                                        jit_x1_st_add(TREG_SP, clob1, -8);
1081                                frag.insn[n++] =
1082                                        jit_x0_addi(clob2, ra, 0) |
1083                                        jit_x1_st(TREG_SP, clob2);
1084                                frag.insn[n++] =
1085                                        jit_x0_fnop() |
1086                                        jit_x1_ldna(rd, ra);
1087                                frag.insn[n++] =
1088                                        jit_x0_fnop() |
1089                                        jit_x1_ldna(clob1, clob1);
1090                                /*
1091                                 * Note: we must make sure that rd must not
1092                                 * be sp. Recover clob1/2 from stack.
1093                                 */
1094                                frag.insn[n++] =
1095                                        jit_x0_dblalign(rd, clob1, clob2) |
1096                                        jit_x1_ld_add(clob2, TREG_SP, 8);
1097                                frag.insn[n++] =
1098                                        jit_x0_fnop() |
1099                                        jit_x1_ld_add(clob1, TREG_SP, 16);
1100                        } else {
1101                                /* Use one clobber register: clob1 only. */
1102                                frag.insn[n++] =
1103                                        jit_x0_addi(TREG_SP, TREG_SP, -16) |
1104                                        jit_x1_fnop();
1105                                frag.insn[n++] =
1106                                        jit_x0_addi(clob1, ra, 7) |
1107                                        jit_x1_st(TREG_SP, clob1);
1108                                frag.insn[n++] =
1109                                        jit_x0_fnop() |
1110                                        jit_x1_ldna(rd, ra);
1111                                frag.insn[n++] =
1112                                        jit_x0_fnop() |
1113                                        jit_x1_ldna(clob1, clob1);
1114                                /*
1115                                 * Note: we must make sure that rd must not
1116                                 * be sp. Recover clob1 from stack.
1117                                 */
1118                                frag.insn[n++] =
1119                                        jit_x0_dblalign(rd, clob1, ra) |
1120                                        jit_x1_ld_add(clob1, TREG_SP, 16);
1121                        }
1122
1123                        if (bundle_2_enable)
1124                                frag.insn[n++] = bundle_2;
1125                        /*
1126                         * For non 8-byte load, extract corresponding bytes and
1127                         * signed extension.
1128                         */
1129                        if (load_store_size == 4) {
1130                                if (load_store_signed)
1131                                        frag.insn[n++] =
1132                                                jit_x0_bfexts(
1133                                                        rd, rd,
1134                                                        UA_FIXUP_BFEXT_START(4),
1135                                                        UA_FIXUP_BFEXT_END(4)) |
1136                                                jit_x1_fnop();
1137                                else
1138                                        frag.insn[n++] =
1139                                                jit_x0_bfextu(
1140                                                        rd, rd,
1141                                                        UA_FIXUP_BFEXT_START(4),
1142                                                        UA_FIXUP_BFEXT_END(4)) |
1143                                                jit_x1_fnop();
1144                        } else if (load_store_size == 2) {
1145                                if (load_store_signed)
1146                                        frag.insn[n++] =
1147                                                jit_x0_bfexts(
1148                                                        rd, rd,
1149                                                        UA_FIXUP_BFEXT_START(2),
1150                                                        UA_FIXUP_BFEXT_END(2)) |
1151                                                jit_x1_fnop();
1152                                else
1153                                        frag.insn[n++] =
1154                                                jit_x0_bfextu(
1155                                                        rd, rd,
1156                                                        UA_FIXUP_BFEXT_START(2),
1157                                                        UA_FIXUP_BFEXT_END(2)) |
1158                                                jit_x1_fnop();
1159                        }
1160
1161                        frag.insn[n++] =
1162                                jit_x0_fnop()  |
1163                                jit_x1_iret();
1164                }
1165        } else if (!load_n_store) {
1166
1167                /*
1168                 * Generic memory store cases: use 3 clobber registers.
1169                 *
1170                 * Alloc space for saveing clob2,1,3 on user's stack.
1171                 * register clob3 points to where clob2 saved, followed by
1172                 * clob1 and 3 from high to low memory.
1173                 */
1174                frag.insn[n++] =
1175                        jit_x0_addi(TREG_SP, TREG_SP, -32)    |
1176                        jit_x1_fnop();
1177                frag.insn[n++] =
1178                        jit_x0_addi(clob3, TREG_SP, 16)  |
1179                        jit_x1_st_add(TREG_SP, clob3, 8);
1180#ifdef __LITTLE_ENDIAN
1181                frag.insn[n++] =
1182                        jit_x0_addi(clob1, ra, 0)   |
1183                        jit_x1_st_add(TREG_SP, clob1, 8);
1184#else
1185                frag.insn[n++] =
1186                        jit_x0_addi(clob1, ra, load_store_size - 1)   |
1187                        jit_x1_st_add(TREG_SP, clob1, 8);
1188#endif
1189                if (load_store_size == 8) {
1190                        /*
1191                         * We save one byte a time, not for fast, but compact
1192                         * code. After each store, data source register shift
1193                         * right one byte. unchanged after 8 stores.
1194                         */
1195                        frag.insn[n++] =
1196                                jit_x0_addi(clob2, TREG_ZERO, 7)     |
1197                                jit_x1_st_add(TREG_SP, clob2, 16);
1198                        frag.insn[n++] =
1199                                jit_x0_rotli(rb, rb, 56)      |
1200                                jit_x1_st1_add(clob1, rb, UA_FIXUP_ADDR_DELTA);
1201                        frag.insn[n++] =
1202                                jit_x0_addi(clob2, clob2, -1) |
1203                                jit_x1_bnezt(clob2, -1);
1204                        frag.insn[n++] =
1205                                jit_x0_fnop()                 |
1206                                jit_x1_addi(clob2, y1_br_reg, 0);
1207                } else if (load_store_size == 4) {
1208                        frag.insn[n++] =
1209                                jit_x0_addi(clob2, TREG_ZERO, 3)     |
1210                                jit_x1_st_add(TREG_SP, clob2, 16);
1211                        frag.insn[n++] =
1212                                jit_x0_rotli(rb, rb, 56)      |
1213                                jit_x1_st1_add(clob1, rb, UA_FIXUP_ADDR_DELTA);
1214                        frag.insn[n++] =
1215                                jit_x0_addi(clob2, clob2, -1) |
1216                                jit_x1_bnezt(clob2, -1);
1217                        /*
1218                         * same as 8-byte case, but need shift another 4
1219                         * byte to recover rb for 4-byte store.
1220                         */
1221                        frag.insn[n++] = jit_x0_rotli(rb, rb, 32)      |
1222                                jit_x1_addi(clob2, y1_br_reg, 0);
1223                } else { /* =2 */
1224                        frag.insn[n++] =
1225                                jit_x0_addi(clob2, rb, 0)     |
1226                                jit_x1_st_add(TREG_SP, clob2, 16);
1227                        for (k = 0; k < 2; k++) {
1228                                frag.insn[n++] =
1229                                        jit_x0_shrui(rb, rb, 8)  |
1230                                        jit_x1_st1_add(clob1, rb,
1231                                                       UA_FIXUP_ADDR_DELTA);
1232                        }
1233                        frag.insn[n++] =
1234                                jit_x0_addi(rb, clob2, 0)       |
1235                                jit_x1_addi(clob2, y1_br_reg, 0);
1236                }
1237
1238                if (bundle_2_enable)
1239                        frag.insn[n++] = bundle_2;
1240
1241                if (y1_lr) {
1242                        frag.insn[n++] =
1243                                jit_x0_fnop()                    |
1244                                jit_x1_mfspr(y1_lr_reg,
1245                                             SPR_EX_CONTEXT_0_0);
1246                }
1247                if (y1_br) {
1248                        frag.insn[n++] =
1249                                jit_x0_fnop()                    |
1250                                jit_x1_mtspr(SPR_EX_CONTEXT_0_0,
1251                                             clob2);
1252                }
1253                if (x1_add) {
1254                        frag.insn[n++] =
1255                                jit_x0_addi(ra, ra, x1_add_imm8) |
1256                                jit_x1_ld_add(clob2, clob3, -8);
1257                } else {
1258                        frag.insn[n++] =
1259                                jit_x0_fnop()                    |
1260                                jit_x1_ld_add(clob2, clob3, -8);
1261                }
1262                frag.insn[n++] =
1263                        jit_x0_fnop()   |
1264                        jit_x1_ld_add(clob1, clob3, -8);
1265                frag.insn[n++] = jit_x0_fnop()   | jit_x1_ld(clob3, clob3);
1266                frag.insn[n++] = jit_x0_fnop()   | jit_x1_iret();
1267
1268        } else {
1269                /*
1270                 * Generic memory load cases.
1271                 *
1272                 * Alloc space for saveing clob1,2,3 on user's stack.
1273                 * register clob3 points to where clob1 saved, followed
1274                 * by clob2 and 3 from high to low memory.
1275                 */
1276
1277                frag.insn[n++] =
1278                        jit_x0_addi(TREG_SP, TREG_SP, -32) |
1279                        jit_x1_fnop();
1280                frag.insn[n++] =
1281                        jit_x0_addi(clob3, TREG_SP, 16) |
1282                        jit_x1_st_add(TREG_SP, clob3, 8);
1283                frag.insn[n++] =
1284                        jit_x0_addi(clob2, ra, 0) |
1285                        jit_x1_st_add(TREG_SP, clob2, 8);
1286
1287                if (y1_br) {
1288                        frag.insn[n++] =
1289                                jit_x0_addi(clob1, y1_br_reg, 0) |
1290                                jit_x1_st_add(TREG_SP, clob1, 16);
1291                } else {
1292                        frag.insn[n++] =
1293                                jit_x0_fnop() |
1294                                jit_x1_st_add(TREG_SP, clob1, 16);
1295                }
1296
1297                if (bundle_2_enable)
1298                        frag.insn[n++] = bundle_2;
1299
1300                if (y1_lr) {
1301                        frag.insn[n++] =
1302                                jit_x0_fnop()  |
1303                                jit_x1_mfspr(y1_lr_reg,
1304                                             SPR_EX_CONTEXT_0_0);
1305                }
1306
1307                if (y1_br) {
1308                        frag.insn[n++] =
1309                                jit_x0_fnop() |
1310                                jit_x1_mtspr(SPR_EX_CONTEXT_0_0,
1311                                             clob1);
1312                }
1313
1314                frag.insn[n++] =
1315                        jit_x0_addi(clob1, clob2, 7)      |
1316                        jit_x1_ldna(rd, clob2);
1317                frag.insn[n++] =
1318                        jit_x0_fnop()                     |
1319                        jit_x1_ldna(clob1, clob1);
1320                frag.insn[n++] =
1321                        jit_x0_dblalign(rd, clob1, clob2) |
1322                        jit_x1_ld_add(clob1, clob3, -8);
1323                if (x1_add) {
1324                        frag.insn[n++] =
1325                                jit_x0_addi(ra, ra, x1_add_imm8) |
1326                                jit_x1_ld_add(clob2, clob3, -8);
1327                } else {
1328                        frag.insn[n++] =
1329                                jit_x0_fnop()  |
1330                                jit_x1_ld_add(clob2, clob3, -8);
1331                }
1332
1333                frag.insn[n++] =
1334                        jit_x0_fnop() |
1335                        jit_x1_ld(clob3, clob3);
1336
1337                if (load_store_size == 4) {
1338                        if (load_store_signed)
1339                                frag.insn[n++] =
1340                                        jit_x0_bfexts(
1341                                                rd, rd,
1342                                                UA_FIXUP_BFEXT_START(4),
1343                                                UA_FIXUP_BFEXT_END(4)) |
1344                                        jit_x1_fnop();
1345                        else
1346                                frag.insn[n++] =
1347                                        jit_x0_bfextu(
1348                                                rd, rd,
1349                                                UA_FIXUP_BFEXT_START(4),
1350                                                UA_FIXUP_BFEXT_END(4)) |
1351                                        jit_x1_fnop();
1352                } else if (load_store_size == 2) {
1353                        if (load_store_signed)
1354                                frag.insn[n++] =
1355                                        jit_x0_bfexts(
1356                                                rd, rd,
1357                                                UA_FIXUP_BFEXT_START(2),
1358                                                UA_FIXUP_BFEXT_END(2)) |
1359                                        jit_x1_fnop();
1360                        else
1361                                frag.insn[n++] =
1362                                        jit_x0_bfextu(
1363                                                rd, rd,
1364                                                UA_FIXUP_BFEXT_START(2),
1365                                                UA_FIXUP_BFEXT_END(2)) |
1366                                        jit_x1_fnop();
1367                }
1368
1369                frag.insn[n++] = jit_x0_fnop() | jit_x1_iret();
1370        }
1371
1372        /* Max JIT bundle count is 14. */
1373        WARN_ON(n > 14);
1374
1375        if (!unexpected) {
1376                int status = 0;
1377                int idx = (regs->pc >> 3) &
1378                        ((1ULL << (PAGE_SHIFT - UNALIGN_JIT_SHIFT)) - 1);
1379
1380                frag.pc = regs->pc;
1381                frag.bundle = bundle;
1382
1383                if (unaligned_printk) {
1384                        pr_info("%s/%d, Unalign fixup: pc=%lx "
1385                                "bundle=%lx %d %d %d %d %d %d %d %d.",
1386                                current->comm, current->pid,
1387                                (unsigned long)frag.pc,
1388                                (unsigned long)frag.bundle,
1389                                (int)alias, (int)rd, (int)ra,
1390                                (int)rb, (int)bundle_2_enable,
1391                                (int)y1_lr, (int)y1_br, (int)x1_add);
1392
1393                        for (k = 0; k < n; k += 2)
1394                                pr_info("[%d] %016llx %016llx", k,
1395                                        (unsigned long long)frag.insn[k],
1396                                        (unsigned long long)frag.insn[k+1]);
1397                }
1398
1399                /* Swap bundle byte order for big endian sys. */
1400#ifdef __BIG_ENDIAN
1401                frag.bundle = GX_INSN_BSWAP(frag.bundle);
1402                for (k = 0; k < n; k++)
1403                        frag.insn[k] = GX_INSN_BSWAP(frag.insn[k]);
1404#endif /* __BIG_ENDIAN */
1405
1406                status = copy_to_user((void __user *)&jit_code_area[idx],
1407                                      &frag, sizeof(frag));
1408                if (status) {
1409                        /* Fail to copy JIT into user land. send SIGSEGV. */
1410                        siginfo_t info = {
1411                                .si_signo = SIGSEGV,
1412                                .si_code = SEGV_MAPERR,
1413                                .si_addr = (void __user *)&jit_code_area[idx]
1414                        };
1415
1416                        pr_warn("Unalign fixup: pid=%d %s jit_code_area=%llx",
1417                                current->pid, current->comm,
1418                                (unsigned long long)&jit_code_area[idx]);
1419
1420                        trace_unhandled_signal("segfault in unalign fixup",
1421                                               regs,
1422                                               (unsigned long)info.si_addr,
1423                                               SIGSEGV);
1424                        force_sig_info(info.si_signo, &info, current);
1425                        return;
1426                }
1427
1428
1429                /* Do a cheaper increment, not accurate. */
1430                unaligned_fixup_count++;
1431                __flush_icache_range((unsigned long)&jit_code_area[idx],
1432                                     (unsigned long)&jit_code_area[idx] +
1433                                     sizeof(frag));
1434
1435                /* Setup SPR_EX_CONTEXT_0_0/1 for returning to user program.*/
1436                __insn_mtspr(SPR_EX_CONTEXT_0_0, regs->pc + 8);
1437                __insn_mtspr(SPR_EX_CONTEXT_0_1, PL_ICS_EX1(USER_PL, 0));
1438
1439                /* Modify pc at the start of new JIT. */
1440                regs->pc = (unsigned long)&jit_code_area[idx].insn[0];
1441                /* Set ICS in SPR_EX_CONTEXT_K_1. */
1442                regs->ex1 = PL_ICS_EX1(USER_PL, 1);
1443        }
1444}
1445
1446
1447/*
1448 * C function to generate unalign data JIT. Called from unalign data
1449 * interrupt handler.
1450 *
1451 * First check if unalign fix is disabled or exception did not not come from
1452 * user space or sp register points to unalign address, if true, generate a
1453 * SIGBUS. Then map a page into user space as JIT area if it is not mapped
1454 * yet. Genenerate JIT code by calling jit_bundle_gen(). After that return
1455 * back to exception handler.
1456 *
1457 * The exception handler will "iret" to new generated JIT code after
1458 * restoring caller saved registers. In theory, the JIT code will perform
1459 * another "iret" to resume user's program.
1460 */
1461
1462void do_unaligned(struct pt_regs *regs, int vecnum)
1463{
1464        tilegx_bundle_bits __user  *pc;
1465        tilegx_bundle_bits bundle;
1466        struct thread_info *info = current_thread_info();
1467        int align_ctl;
1468
1469        /* Checks the per-process unaligned JIT flags */
1470        align_ctl = unaligned_fixup;
1471        switch (task_thread_info(current)->align_ctl) {
1472        case PR_UNALIGN_NOPRINT:
1473                align_ctl = 1;
1474                break;
1475        case PR_UNALIGN_SIGBUS:
1476                align_ctl = 0;
1477                break;
1478        }
1479
1480        /* Enable iterrupt in order to access user land. */
1481        local_irq_enable();
1482
1483        /*
1484         * The fault came from kernel space. Two choices:
1485         * (a) unaligned_fixup < 1, we will first call get/put_user fixup
1486         *     to return -EFAULT. If no fixup, simply panic the kernel.
1487         * (b) unaligned_fixup >=1, we will try to fix the unaligned access
1488         *     if it was triggered by get_user/put_user() macros. Panic the
1489         *     kernel if it is not fixable.
1490         */
1491
1492        if (EX1_PL(regs->ex1) != USER_PL) {
1493
1494                if (align_ctl < 1) {
1495                        unaligned_fixup_count++;
1496                        /* If exception came from kernel, try fix it up. */
1497                        if (fixup_exception(regs)) {
1498                                if (unaligned_printk)
1499                                        pr_info("Unalign fixup: %d %llx @%llx",
1500                                                (int)unaligned_fixup,
1501                                                (unsigned long long)regs->ex1,
1502                                                (unsigned long long)regs->pc);
1503                                return;
1504                        }
1505                        /* Not fixable. Go panic. */
1506                        panic("Unalign exception in Kernel. pc=%lx",
1507                              regs->pc);
1508                        return;
1509                } else {
1510                        /*
1511                         * Try to fix the exception. If we can't, panic the
1512                         * kernel.
1513                         */
1514                        bundle = GX_INSN_BSWAP(
1515                                *((tilegx_bundle_bits *)(regs->pc)));
1516                        jit_bundle_gen(regs, bundle, align_ctl);
1517                        return;
1518                }
1519        }
1520
1521        /*
1522         * Fault came from user with ICS or stack is not aligned.
1523         * If so, we will trigger SIGBUS.
1524         */
1525        if ((regs->sp & 0x7) || (regs->ex1) || (align_ctl < 0)) {
1526                siginfo_t info = {
1527                        .si_signo = SIGBUS,
1528                        .si_code = BUS_ADRALN,
1529                        .si_addr = (unsigned char __user *)0
1530                };
1531
1532                if (unaligned_printk)
1533                        pr_info("Unalign fixup: %d %llx @%llx",
1534                                (int)unaligned_fixup,
1535                                (unsigned long long)regs->ex1,
1536                                (unsigned long long)regs->pc);
1537
1538                unaligned_fixup_count++;
1539
1540                trace_unhandled_signal("unaligned fixup trap", regs, 0, SIGBUS);
1541                force_sig_info(info.si_signo, &info, current);
1542                return;
1543        }
1544
1545
1546        /* Read the bundle casued the exception! */
1547        pc = (tilegx_bundle_bits __user *)(regs->pc);
1548        if (get_user(bundle, pc) != 0) {
1549                /* Probably never be here since pc is valid user address.*/
1550                siginfo_t info = {
1551                        .si_signo = SIGSEGV,
1552                        .si_code = SEGV_MAPERR,
1553                        .si_addr = (void __user *)pc
1554                };
1555                pr_err("Couldn't read instruction at %p trying to step\n", pc);
1556                trace_unhandled_signal("segfault in unalign fixup", regs,
1557                                       (unsigned long)info.si_addr, SIGSEGV);
1558                force_sig_info(info.si_signo, &info, current);
1559                return;
1560        }
1561
1562        if (!info->unalign_jit_base) {
1563                void __user *user_page;
1564
1565                /*
1566                 * Allocate a page in userland.
1567                 * For 64-bit processes we try to place the mapping far
1568                 * from anything else that might be going on (specifically
1569                 * 64 GB below the top of the user address space).  If it
1570                 * happens not to be possible to put it there, it's OK;
1571                 * the kernel will choose another location and we'll
1572                 * remember it for later.
1573                 */
1574                if (is_compat_task())
1575                        user_page = NULL;
1576                else
1577                        user_page = (void __user *)(TASK_SIZE - (1UL << 36)) +
1578                                (current->pid << PAGE_SHIFT);
1579
1580                user_page = (void __user *) vm_mmap(NULL,
1581                                                    (unsigned long)user_page,
1582                                                    PAGE_SIZE,
1583                                                    PROT_EXEC | PROT_READ |
1584                                                    PROT_WRITE,
1585#ifdef CONFIG_HOMECACHE
1586                                                    MAP_CACHE_HOME_TASK |
1587#endif
1588                                                    MAP_PRIVATE |
1589                                                    MAP_ANONYMOUS,
1590                                                    0);
1591
1592                if (IS_ERR((void __force *)user_page)) {
1593                        pr_err("Out of kernel pages trying do_mmap.\n");
1594                        return;
1595                }
1596
1597                /* Save the address in the thread_info struct */
1598                info->unalign_jit_base = user_page;
1599                if (unaligned_printk)
1600                        pr_info("Unalign bundle: %d:%d, allocate page @%llx",
1601                                raw_smp_processor_id(), current->pid,
1602                                (unsigned long long)user_page);
1603        }
1604
1605        /* Generate unalign JIT */
1606        jit_bundle_gen(regs, GX_INSN_BSWAP(bundle), align_ctl);
1607}
1608
1609#endif /* __tilegx__ */
1610