linux/arch/tile/kernel/unaligned.c
<<
>>
Prefs
   1/*
   2 * Copyright 2013 Tilera Corporation. All Rights Reserved.
   3 *
   4 *   This program is free software; you can redistribute it and/or
   5 *   modify it under the terms of the GNU General Public License
   6 *   as published by the Free Software Foundation, version 2.
   7 *
   8 *   This program is distributed in the hope that it will be useful, but
   9 *   WITHOUT ANY WARRANTY; without even the implied warranty of
  10 *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
  11 *   NON INFRINGEMENT.  See the GNU General Public License for
  12 *   more details.
  13 *
  14 * A code-rewriter that handles unaligned exception.
  15 */
  16
  17#include <linux/smp.h>
  18#include <linux/ptrace.h>
  19#include <linux/slab.h>
  20#include <linux/thread_info.h>
  21#include <linux/uaccess.h>
  22#include <linux/mman.h>
  23#include <linux/types.h>
  24#include <linux/err.h>
  25#include <linux/module.h>
  26#include <linux/compat.h>
  27#include <linux/prctl.h>
  28#include <asm/cacheflush.h>
  29#include <asm/traps.h>
  30#include <asm/uaccess.h>
  31#include <asm/unaligned.h>
  32#include <arch/abi.h>
  33#include <arch/spr_def.h>
  34#include <arch/opcode.h>
  35
  36
  37/*
  38 * This file handles unaligned exception for tile-Gx. The tilepro's unaligned
  39 * exception is supported out of single_step.c
  40 */
  41
  42int unaligned_printk;
  43
  44static int __init setup_unaligned_printk(char *str)
  45{
  46        long val;
  47        if (kstrtol(str, 0, &val) != 0)
  48                return 0;
  49        unaligned_printk = val;
  50        pr_info("Printk for each unaligned data accesses is %s\n",
  51                unaligned_printk ? "enabled" : "disabled");
  52        return 1;
  53}
  54__setup("unaligned_printk=", setup_unaligned_printk);
  55
  56unsigned int unaligned_fixup_count;
  57
  58#ifdef __tilegx__
  59
  60/*
  61 * Unalign data jit fixup code fragement. Reserved space is 128 bytes.
  62 * The 1st 64-bit word saves fault PC address, 2nd word is the fault
  63 * instruction bundle followed by 14 JIT bundles.
  64 */
  65
  66struct unaligned_jit_fragment {
  67        unsigned long       pc;
  68        tilegx_bundle_bits  bundle;
  69        tilegx_bundle_bits  insn[14];
  70};
  71
  72/*
  73 * Check if a nop or fnop at bundle's pipeline X0.
  74 */
  75
  76static bool is_bundle_x0_nop(tilegx_bundle_bits bundle)
  77{
  78        return (((get_UnaryOpcodeExtension_X0(bundle) ==
  79                  NOP_UNARY_OPCODE_X0) &&
  80                 (get_RRROpcodeExtension_X0(bundle) ==
  81                  UNARY_RRR_0_OPCODE_X0) &&
  82                 (get_Opcode_X0(bundle) ==
  83                  RRR_0_OPCODE_X0)) ||
  84                ((get_UnaryOpcodeExtension_X0(bundle) ==
  85                  FNOP_UNARY_OPCODE_X0) &&
  86                 (get_RRROpcodeExtension_X0(bundle) ==
  87                  UNARY_RRR_0_OPCODE_X0) &&
  88                 (get_Opcode_X0(bundle) ==
  89                  RRR_0_OPCODE_X0)));
  90}
  91
  92/*
  93 * Check if nop or fnop at bundle's pipeline X1.
  94 */
  95
  96static bool is_bundle_x1_nop(tilegx_bundle_bits bundle)
  97{
  98        return (((get_UnaryOpcodeExtension_X1(bundle) ==
  99                  NOP_UNARY_OPCODE_X1) &&
 100                 (get_RRROpcodeExtension_X1(bundle) ==
 101                  UNARY_RRR_0_OPCODE_X1) &&
 102                 (get_Opcode_X1(bundle) ==
 103                  RRR_0_OPCODE_X1)) ||
 104                ((get_UnaryOpcodeExtension_X1(bundle) ==
 105                  FNOP_UNARY_OPCODE_X1) &&
 106                 (get_RRROpcodeExtension_X1(bundle) ==
 107                  UNARY_RRR_0_OPCODE_X1) &&
 108                 (get_Opcode_X1(bundle) ==
 109                  RRR_0_OPCODE_X1)));
 110}
 111
 112/*
 113 * Check if nop or fnop at bundle's Y0 pipeline.
 114 */
 115
 116static bool is_bundle_y0_nop(tilegx_bundle_bits bundle)
 117{
 118        return (((get_UnaryOpcodeExtension_Y0(bundle) ==
 119                  NOP_UNARY_OPCODE_Y0) &&
 120                 (get_RRROpcodeExtension_Y0(bundle) ==
 121                  UNARY_RRR_1_OPCODE_Y0) &&
 122                 (get_Opcode_Y0(bundle) ==
 123                  RRR_1_OPCODE_Y0)) ||
 124                ((get_UnaryOpcodeExtension_Y0(bundle) ==
 125                  FNOP_UNARY_OPCODE_Y0) &&
 126                 (get_RRROpcodeExtension_Y0(bundle) ==
 127                  UNARY_RRR_1_OPCODE_Y0) &&
 128                 (get_Opcode_Y0(bundle) ==
 129                  RRR_1_OPCODE_Y0)));
 130}
 131
 132/*
 133 * Check if nop or fnop at bundle's pipeline Y1.
 134 */
 135
 136static bool is_bundle_y1_nop(tilegx_bundle_bits bundle)
 137{
 138        return (((get_UnaryOpcodeExtension_Y1(bundle) ==
 139                  NOP_UNARY_OPCODE_Y1) &&
 140                 (get_RRROpcodeExtension_Y1(bundle) ==
 141                  UNARY_RRR_1_OPCODE_Y1) &&
 142                 (get_Opcode_Y1(bundle) ==
 143                  RRR_1_OPCODE_Y1)) ||
 144                ((get_UnaryOpcodeExtension_Y1(bundle) ==
 145                  FNOP_UNARY_OPCODE_Y1) &&
 146                 (get_RRROpcodeExtension_Y1(bundle) ==
 147                  UNARY_RRR_1_OPCODE_Y1) &&
 148                 (get_Opcode_Y1(bundle) ==
 149                  RRR_1_OPCODE_Y1)));
 150}
 151
 152/*
 153 * Test if a bundle's y0 and y1 pipelines are both nop or fnop.
 154 */
 155
 156static bool is_y0_y1_nop(tilegx_bundle_bits bundle)
 157{
 158        return is_bundle_y0_nop(bundle) && is_bundle_y1_nop(bundle);
 159}
 160
 161/*
 162 * Test if a bundle's x0 and x1 pipelines are both nop or fnop.
 163 */
 164
 165static bool is_x0_x1_nop(tilegx_bundle_bits bundle)
 166{
 167        return is_bundle_x0_nop(bundle) && is_bundle_x1_nop(bundle);
 168}
 169
 170/*
 171 * Find the destination, source registers of fault unalign access instruction
 172 * at X1 or Y2. Also, allocate up to 3 scratch registers clob1, clob2 and
 173 * clob3, which are guaranteed different from any register used in the fault
 174 * bundle. r_alias is used to return if the other instructions other than the
 175 * unalign load/store shares same register with ra, rb and rd.
 176 */
 177
 178static void find_regs(tilegx_bundle_bits bundle, uint64_t *rd, uint64_t *ra,
 179                      uint64_t *rb, uint64_t *clob1, uint64_t *clob2,
 180                      uint64_t *clob3, bool *r_alias)
 181{
 182        int i;
 183        uint64_t reg;
 184        uint64_t reg_map = 0, alias_reg_map = 0, map;
 185        bool alias = false;
 186
 187        /*
 188         * Parse fault bundle, find potential used registers and mark
 189         * corresponding bits in reg_map and alias_map. These 2 bit maps
 190         * are used to find the scratch registers and determine if there
 191         * is register alias.
 192         */
 193        if (bundle & TILEGX_BUNDLE_MODE_MASK) {  /* Y Mode Bundle. */
 194
 195                reg = get_SrcA_Y2(bundle);
 196                reg_map |= 1ULL << reg;
 197                *ra = reg;
 198                reg = get_SrcBDest_Y2(bundle);
 199                reg_map |= 1ULL << reg;
 200
 201                if (rd) {
 202                        /* Load. */
 203                        *rd = reg;
 204                        alias_reg_map = (1ULL << *rd) | (1ULL << *ra);
 205                } else {
 206                        /* Store. */
 207                        *rb = reg;
 208                        alias_reg_map = (1ULL << *ra) | (1ULL << *rb);
 209                }
 210
 211                if (!is_bundle_y1_nop(bundle)) {
 212                        reg = get_SrcA_Y1(bundle);
 213                        reg_map |= (1ULL << reg);
 214                        map = (1ULL << reg);
 215
 216                        reg = get_SrcB_Y1(bundle);
 217                        reg_map |= (1ULL << reg);
 218                        map |= (1ULL << reg);
 219
 220                        reg = get_Dest_Y1(bundle);
 221                        reg_map |= (1ULL << reg);
 222                        map |= (1ULL << reg);
 223
 224                        if (map & alias_reg_map)
 225                                alias = true;
 226                }
 227
 228                if (!is_bundle_y0_nop(bundle)) {
 229                        reg = get_SrcA_Y0(bundle);
 230                        reg_map |= (1ULL << reg);
 231                        map = (1ULL << reg);
 232
 233                        reg = get_SrcB_Y0(bundle);
 234                        reg_map |= (1ULL << reg);
 235                        map |= (1ULL << reg);
 236
 237                        reg = get_Dest_Y0(bundle);
 238                        reg_map |= (1ULL << reg);
 239                        map |= (1ULL << reg);
 240
 241                        if (map & alias_reg_map)
 242                                alias = true;
 243                }
 244        } else  { /* X Mode Bundle. */
 245
 246                reg = get_SrcA_X1(bundle);
 247                reg_map |= (1ULL << reg);
 248                *ra = reg;
 249                if (rd) {
 250                        /* Load. */
 251                        reg = get_Dest_X1(bundle);
 252                        reg_map |= (1ULL << reg);
 253                        *rd = reg;
 254                        alias_reg_map = (1ULL << *rd) | (1ULL << *ra);
 255                } else {
 256                        /* Store. */
 257                        reg = get_SrcB_X1(bundle);
 258                        reg_map |= (1ULL << reg);
 259                        *rb = reg;
 260                        alias_reg_map = (1ULL << *ra) | (1ULL << *rb);
 261                }
 262
 263                if (!is_bundle_x0_nop(bundle)) {
 264                        reg = get_SrcA_X0(bundle);
 265                        reg_map |= (1ULL << reg);
 266                        map = (1ULL << reg);
 267
 268                        reg = get_SrcB_X0(bundle);
 269                        reg_map |= (1ULL << reg);
 270                        map |= (1ULL << reg);
 271
 272                        reg = get_Dest_X0(bundle);
 273                        reg_map |= (1ULL << reg);
 274                        map |= (1ULL << reg);
 275
 276                        if (map & alias_reg_map)
 277                                alias = true;
 278                }
 279        }
 280
 281        /*
 282         * "alias" indicates if the unalign access registers have collision
 283         * with others in the same bundle. We jsut simply test all register
 284         * operands case (RRR), ignored the case with immidate. If a bundle
 285         * has no register alias, we may do fixup in a simple or fast manner.
 286         * So if an immidata field happens to hit with a register, we may end
 287         * up fall back to the generic handling.
 288         */
 289
 290        *r_alias = alias;
 291
 292        /* Flip bits on reg_map. */
 293        reg_map ^= -1ULL;
 294
 295        /* Scan reg_map lower 54(TREG_SP) bits to find 3 set bits. */
 296        for (i = 0; i < TREG_SP; i++) {
 297                if (reg_map & (0x1ULL << i)) {
 298                        if (*clob1 == -1) {
 299                                *clob1 = i;
 300                        } else if (*clob2 == -1) {
 301                                *clob2 = i;
 302                        } else if (*clob3 == -1) {
 303                                *clob3 = i;
 304                                return;
 305                        }
 306                }
 307        }
 308}
 309
 310/*
 311 * Sanity check for register ra, rb, rd, clob1/2/3. Return true if any of them
 312 * is unexpected.
 313 */
 314
 315static bool check_regs(uint64_t rd, uint64_t ra, uint64_t rb,
 316                       uint64_t clob1, uint64_t clob2,  uint64_t clob3)
 317{
 318        bool unexpected = false;
 319        if ((ra >= 56) && (ra != TREG_ZERO))
 320                unexpected = true;
 321
 322        if ((clob1 >= 56) || (clob2 >= 56) || (clob3 >= 56))
 323                unexpected = true;
 324
 325        if (rd != -1) {
 326                if ((rd >= 56) && (rd != TREG_ZERO))
 327                        unexpected = true;
 328        } else {
 329                if ((rb >= 56) && (rb != TREG_ZERO))
 330                        unexpected = true;
 331        }
 332        return unexpected;
 333}
 334
 335
 336#define  GX_INSN_X0_MASK   ((1ULL << 31) - 1)
 337#define  GX_INSN_X1_MASK   (((1ULL << 31) - 1) << 31)
 338#define  GX_INSN_Y0_MASK   ((0xFULL << 27) | (0xFFFFFULL))
 339#define  GX_INSN_Y1_MASK   (GX_INSN_Y0_MASK << 31)
 340#define  GX_INSN_Y2_MASK   ((0x7FULL << 51) | (0x7FULL << 20))
 341
 342#ifdef __LITTLE_ENDIAN
 343#define  GX_INSN_BSWAP(_bundle_)    (_bundle_)
 344#else
 345#define  GX_INSN_BSWAP(_bundle_)    swab64(_bundle_)
 346#endif /* __LITTLE_ENDIAN */
 347
 348/*
 349 * __JIT_CODE(.) creates template bundles in .rodata.unalign_data section.
 350 * The corresponding static function jix_x#_###(.) generates partial or
 351 * whole bundle based on the template and given arguments.
 352 */
 353
 354#define __JIT_CODE(_X_)                                         \
 355        asm (".pushsection .rodata.unalign_data, \"a\"\n"       \
 356             _X_"\n"                                            \
 357             ".popsection\n")
 358
 359__JIT_CODE("__unalign_jit_x1_mtspr:   {mtspr 0,  r0}");
 360static tilegx_bundle_bits jit_x1_mtspr(int spr, int reg)
 361{
 362        extern  tilegx_bundle_bits __unalign_jit_x1_mtspr;
 363        return (GX_INSN_BSWAP(__unalign_jit_x1_mtspr) & GX_INSN_X1_MASK) |
 364                create_MT_Imm14_X1(spr) | create_SrcA_X1(reg);
 365}
 366
 367__JIT_CODE("__unalign_jit_x1_mfspr:   {mfspr r0, 0}");
 368static tilegx_bundle_bits  jit_x1_mfspr(int reg, int spr)
 369{
 370        extern  tilegx_bundle_bits __unalign_jit_x1_mfspr;
 371        return (GX_INSN_BSWAP(__unalign_jit_x1_mfspr) & GX_INSN_X1_MASK) |
 372                create_MF_Imm14_X1(spr) | create_Dest_X1(reg);
 373}
 374
 375__JIT_CODE("__unalign_jit_x0_addi:   {addi  r0, r0, 0; iret}");
 376static tilegx_bundle_bits  jit_x0_addi(int rd, int ra, int imm8)
 377{
 378        extern  tilegx_bundle_bits __unalign_jit_x0_addi;
 379        return (GX_INSN_BSWAP(__unalign_jit_x0_addi) & GX_INSN_X0_MASK) |
 380                create_Dest_X0(rd) | create_SrcA_X0(ra) |
 381                create_Imm8_X0(imm8);
 382}
 383
 384__JIT_CODE("__unalign_jit_x1_ldna:   {ldna  r0, r0}");
 385static tilegx_bundle_bits  jit_x1_ldna(int rd, int ra)
 386{
 387        extern  tilegx_bundle_bits __unalign_jit_x1_ldna;
 388        return (GX_INSN_BSWAP(__unalign_jit_x1_ldna) &  GX_INSN_X1_MASK) |
 389                create_Dest_X1(rd) | create_SrcA_X1(ra);
 390}
 391
 392__JIT_CODE("__unalign_jit_x0_dblalign:   {dblalign r0, r0 ,r0}");
 393static tilegx_bundle_bits  jit_x0_dblalign(int rd, int ra, int rb)
 394{
 395        extern  tilegx_bundle_bits __unalign_jit_x0_dblalign;
 396        return (GX_INSN_BSWAP(__unalign_jit_x0_dblalign) & GX_INSN_X0_MASK) |
 397                create_Dest_X0(rd) | create_SrcA_X0(ra) |
 398                create_SrcB_X0(rb);
 399}
 400
 401__JIT_CODE("__unalign_jit_x1_iret:   {iret}");
 402static tilegx_bundle_bits  jit_x1_iret(void)
 403{
 404        extern  tilegx_bundle_bits __unalign_jit_x1_iret;
 405        return GX_INSN_BSWAP(__unalign_jit_x1_iret) & GX_INSN_X1_MASK;
 406}
 407
 408__JIT_CODE("__unalign_jit_x01_fnop:   {fnop;fnop}");
 409static tilegx_bundle_bits  jit_x0_fnop(void)
 410{
 411        extern  tilegx_bundle_bits __unalign_jit_x01_fnop;
 412        return GX_INSN_BSWAP(__unalign_jit_x01_fnop) & GX_INSN_X0_MASK;
 413}
 414
 415static tilegx_bundle_bits  jit_x1_fnop(void)
 416{
 417        extern  tilegx_bundle_bits __unalign_jit_x01_fnop;
 418        return GX_INSN_BSWAP(__unalign_jit_x01_fnop) & GX_INSN_X1_MASK;
 419}
 420
 421__JIT_CODE("__unalign_jit_y2_dummy:   {fnop; fnop; ld zero, sp}");
 422static tilegx_bundle_bits  jit_y2_dummy(void)
 423{
 424        extern  tilegx_bundle_bits __unalign_jit_y2_dummy;
 425        return GX_INSN_BSWAP(__unalign_jit_y2_dummy) & GX_INSN_Y2_MASK;
 426}
 427
 428static tilegx_bundle_bits  jit_y1_fnop(void)
 429{
 430        extern  tilegx_bundle_bits __unalign_jit_y2_dummy;
 431        return GX_INSN_BSWAP(__unalign_jit_y2_dummy) & GX_INSN_Y1_MASK;
 432}
 433
 434__JIT_CODE("__unalign_jit_x1_st1_add:  {st1_add r1, r0, 0}");
 435static tilegx_bundle_bits  jit_x1_st1_add(int ra, int rb, int imm8)
 436{
 437        extern  tilegx_bundle_bits __unalign_jit_x1_st1_add;
 438        return (GX_INSN_BSWAP(__unalign_jit_x1_st1_add) &
 439                (~create_SrcA_X1(-1)) &
 440                GX_INSN_X1_MASK) | create_SrcA_X1(ra) |
 441                create_SrcB_X1(rb) | create_Dest_Imm8_X1(imm8);
 442}
 443
 444__JIT_CODE("__unalign_jit_x1_st:  {crc32_8 r1, r0, r0; st  r0, r0}");
 445static tilegx_bundle_bits  jit_x1_st(int ra, int rb)
 446{
 447        extern  tilegx_bundle_bits __unalign_jit_x1_st;
 448        return (GX_INSN_BSWAP(__unalign_jit_x1_st) & GX_INSN_X1_MASK) |
 449                create_SrcA_X1(ra) | create_SrcB_X1(rb);
 450}
 451
 452__JIT_CODE("__unalign_jit_x1_st_add:  {st_add  r1, r0, 0}");
 453static tilegx_bundle_bits  jit_x1_st_add(int ra, int rb, int imm8)
 454{
 455        extern  tilegx_bundle_bits __unalign_jit_x1_st_add;
 456        return (GX_INSN_BSWAP(__unalign_jit_x1_st_add) &
 457                (~create_SrcA_X1(-1)) &
 458                GX_INSN_X1_MASK) | create_SrcA_X1(ra) |
 459                create_SrcB_X1(rb) | create_Dest_Imm8_X1(imm8);
 460}
 461
 462__JIT_CODE("__unalign_jit_x1_ld:  {crc32_8 r1, r0, r0; ld  r0, r0}");
 463static tilegx_bundle_bits  jit_x1_ld(int rd, int ra)
 464{
 465        extern  tilegx_bundle_bits __unalign_jit_x1_ld;
 466        return (GX_INSN_BSWAP(__unalign_jit_x1_ld) & GX_INSN_X1_MASK) |
 467                create_Dest_X1(rd) | create_SrcA_X1(ra);
 468}
 469
 470__JIT_CODE("__unalign_jit_x1_ld_add:  {ld_add  r1, r0, 0}");
 471static tilegx_bundle_bits  jit_x1_ld_add(int rd, int ra, int imm8)
 472{
 473        extern  tilegx_bundle_bits __unalign_jit_x1_ld_add;
 474        return (GX_INSN_BSWAP(__unalign_jit_x1_ld_add) &
 475                (~create_Dest_X1(-1)) &
 476                GX_INSN_X1_MASK) | create_Dest_X1(rd) |
 477                create_SrcA_X1(ra) | create_Imm8_X1(imm8);
 478}
 479
 480__JIT_CODE("__unalign_jit_x0_bfexts:  {bfexts r0, r0, 0, 0}");
 481static tilegx_bundle_bits  jit_x0_bfexts(int rd, int ra, int bfs, int bfe)
 482{
 483        extern  tilegx_bundle_bits __unalign_jit_x0_bfexts;
 484        return (GX_INSN_BSWAP(__unalign_jit_x0_bfexts) &
 485                GX_INSN_X0_MASK) |
 486                create_Dest_X0(rd) | create_SrcA_X0(ra) |
 487                create_BFStart_X0(bfs) | create_BFEnd_X0(bfe);
 488}
 489
 490__JIT_CODE("__unalign_jit_x0_bfextu:  {bfextu r0, r0, 0, 0}");
 491static tilegx_bundle_bits  jit_x0_bfextu(int rd, int ra, int bfs, int bfe)
 492{
 493        extern  tilegx_bundle_bits __unalign_jit_x0_bfextu;
 494        return (GX_INSN_BSWAP(__unalign_jit_x0_bfextu) &
 495                GX_INSN_X0_MASK) |
 496                create_Dest_X0(rd) | create_SrcA_X0(ra) |
 497                create_BFStart_X0(bfs) | create_BFEnd_X0(bfe);
 498}
 499
 500__JIT_CODE("__unalign_jit_x1_addi:  {bfextu r1, r1, 0, 0; addi r0, r0, 0}");
 501static tilegx_bundle_bits  jit_x1_addi(int rd, int ra, int imm8)
 502{
 503        extern  tilegx_bundle_bits __unalign_jit_x1_addi;
 504        return (GX_INSN_BSWAP(__unalign_jit_x1_addi) & GX_INSN_X1_MASK) |
 505                create_Dest_X1(rd) | create_SrcA_X1(ra) |
 506                create_Imm8_X1(imm8);
 507}
 508
 509__JIT_CODE("__unalign_jit_x0_shrui:  {shrui r0, r0, 0; iret}");
 510static tilegx_bundle_bits  jit_x0_shrui(int rd, int ra, int imm6)
 511{
 512        extern  tilegx_bundle_bits __unalign_jit_x0_shrui;
 513        return (GX_INSN_BSWAP(__unalign_jit_x0_shrui) &
 514                GX_INSN_X0_MASK) |
 515                create_Dest_X0(rd) | create_SrcA_X0(ra) |
 516                create_ShAmt_X0(imm6);
 517}
 518
 519__JIT_CODE("__unalign_jit_x0_rotli:  {rotli r0, r0, 0; iret}");
 520static tilegx_bundle_bits  jit_x0_rotli(int rd, int ra, int imm6)
 521{
 522        extern  tilegx_bundle_bits __unalign_jit_x0_rotli;
 523        return (GX_INSN_BSWAP(__unalign_jit_x0_rotli) &
 524                GX_INSN_X0_MASK) |
 525                create_Dest_X0(rd) | create_SrcA_X0(ra) |
 526                create_ShAmt_X0(imm6);
 527}
 528
 529__JIT_CODE("__unalign_jit_x1_bnezt:  {bnezt r0, __unalign_jit_x1_bnezt}");
 530static tilegx_bundle_bits  jit_x1_bnezt(int ra, int broff)
 531{
 532        extern  tilegx_bundle_bits __unalign_jit_x1_bnezt;
 533        return (GX_INSN_BSWAP(__unalign_jit_x1_bnezt) &
 534                GX_INSN_X1_MASK) |
 535                create_SrcA_X1(ra) | create_BrOff_X1(broff);
 536}
 537
 538#undef __JIT_CODE
 539
 540/*
 541 * This function generates unalign fixup JIT.
 542 *
 543 * We first find unalign load/store instruction's destination, source
 544 * registers: ra, rb and rd. and 3 scratch registers by calling
 545 * find_regs(...). 3 scratch clobbers should not alias with any register
 546 * used in the fault bundle. Then analyze the fault bundle to determine
 547 * if it's a load or store, operand width, branch or address increment etc.
 548 * At last generated JIT is copied into JIT code area in user space.
 549 */
 550
 551static
 552void jit_bundle_gen(struct pt_regs *regs, tilegx_bundle_bits bundle,
 553                    int align_ctl)
 554{
 555        struct thread_info *info = current_thread_info();
 556        struct unaligned_jit_fragment frag;
 557        struct unaligned_jit_fragment *jit_code_area;
 558        tilegx_bundle_bits bundle_2 = 0;
 559        /* If bundle_2_enable = false, bundle_2 is fnop/nop operation. */
 560        bool     bundle_2_enable = true;
 561        uint64_t ra = -1, rb = -1, rd = -1, clob1 = -1, clob2 = -1, clob3 = -1;
 562        /*
 563         * Indicate if the unalign access
 564         * instruction's registers hit with
 565         * others in the same bundle.
 566         */
 567        bool     alias = false;
 568        bool     load_n_store = true;
 569        bool     load_store_signed = false;
 570        unsigned int  load_store_size = 8;
 571        bool     y1_br = false;  /* True, for a branch in same bundle at Y1.*/
 572        int      y1_br_reg = 0;
 573        /* True for link operation. i.e. jalr or lnk at Y1 */
 574        bool     y1_lr = false;
 575        int      y1_lr_reg = 0;
 576        bool     x1_add = false;/* True, for load/store ADD instruction at X1*/
 577        int      x1_add_imm8 = 0;
 578        bool     unexpected = false;
 579        int      n = 0, k;
 580
 581        jit_code_area =
 582                (struct unaligned_jit_fragment *)(info->unalign_jit_base);
 583
 584        memset((void *)&frag, 0, sizeof(frag));
 585
 586        /* 0: X mode, Otherwise: Y mode. */
 587        if (bundle & TILEGX_BUNDLE_MODE_MASK) {
 588                unsigned int mod, opcode;
 589
 590                if (get_Opcode_Y1(bundle) == RRR_1_OPCODE_Y1 &&
 591                    get_RRROpcodeExtension_Y1(bundle) ==
 592                    UNARY_RRR_1_OPCODE_Y1) {
 593
 594                        opcode = get_UnaryOpcodeExtension_Y1(bundle);
 595
 596                        /*
 597                         * Test "jalr", "jalrp", "jr", "jrp" instruction at Y1
 598                         * pipeline.
 599                         */
 600                        switch (opcode) {
 601                        case JALR_UNARY_OPCODE_Y1:
 602                        case JALRP_UNARY_OPCODE_Y1:
 603                                y1_lr = true;
 604                                y1_lr_reg = 55; /* Link register. */
 605                                /* FALLTHROUGH */
 606                        case JR_UNARY_OPCODE_Y1:
 607                        case JRP_UNARY_OPCODE_Y1:
 608                                y1_br = true;
 609                                y1_br_reg = get_SrcA_Y1(bundle);
 610                                break;
 611                        case LNK_UNARY_OPCODE_Y1:
 612                                /* "lnk" at Y1 pipeline. */
 613                                y1_lr = true;
 614                                y1_lr_reg = get_Dest_Y1(bundle);
 615                                break;
 616                        }
 617                }
 618
 619                opcode = get_Opcode_Y2(bundle);
 620                mod = get_Mode(bundle);
 621
 622                /*
 623                 *  bundle_2 is bundle after making Y2 as a dummy operation
 624                 *  - ld zero, sp
 625                 */
 626                bundle_2 = (bundle & (~GX_INSN_Y2_MASK)) | jit_y2_dummy();
 627
 628                /* Make Y1 as fnop if Y1 is a branch or lnk operation. */
 629                if (y1_br || y1_lr) {
 630                        bundle_2 &= ~(GX_INSN_Y1_MASK);
 631                        bundle_2 |= jit_y1_fnop();
 632                }
 633
 634                if (is_y0_y1_nop(bundle_2))
 635                        bundle_2_enable = false;
 636
 637                if (mod == MODE_OPCODE_YC2) {
 638                        /* Store. */
 639                        load_n_store = false;
 640                        load_store_size = 1 << opcode;
 641                        load_store_signed = false;
 642                        find_regs(bundle, 0, &ra, &rb, &clob1, &clob2,
 643                                  &clob3, &alias);
 644                        if (load_store_size > 8)
 645                                unexpected = true;
 646                } else {
 647                        /* Load. */
 648                        load_n_store = true;
 649                        if (mod == MODE_OPCODE_YB2) {
 650                                switch (opcode) {
 651                                case LD_OPCODE_Y2:
 652                                        load_store_signed = false;
 653                                        load_store_size = 8;
 654                                        break;
 655                                case LD4S_OPCODE_Y2:
 656                                        load_store_signed = true;
 657                                        load_store_size = 4;
 658                                        break;
 659                                case LD4U_OPCODE_Y2:
 660                                        load_store_signed = false;
 661                                        load_store_size = 4;
 662                                        break;
 663                                default:
 664                                        unexpected = true;
 665                                }
 666                        } else if (mod == MODE_OPCODE_YA2) {
 667                                if (opcode == LD2S_OPCODE_Y2) {
 668                                        load_store_signed = true;
 669                                        load_store_size = 2;
 670                                } else if (opcode == LD2U_OPCODE_Y2) {
 671                                        load_store_signed = false;
 672                                        load_store_size = 2;
 673                                } else
 674                                        unexpected = true;
 675                        } else
 676                                unexpected = true;
 677                        find_regs(bundle, &rd, &ra, &rb, &clob1, &clob2,
 678                                  &clob3, &alias);
 679                }
 680        } else {
 681                unsigned int opcode;
 682
 683                /* bundle_2 is bundle after making X1 as "fnop". */
 684                bundle_2 = (bundle & (~GX_INSN_X1_MASK)) | jit_x1_fnop();
 685
 686                if (is_x0_x1_nop(bundle_2))
 687                        bundle_2_enable = false;
 688
 689                if (get_Opcode_X1(bundle) == RRR_0_OPCODE_X1) {
 690                        opcode = get_UnaryOpcodeExtension_X1(bundle);
 691
 692                        if (get_RRROpcodeExtension_X1(bundle) ==
 693                            UNARY_RRR_0_OPCODE_X1) {
 694                                load_n_store = true;
 695                                find_regs(bundle, &rd, &ra, &rb, &clob1,
 696                                          &clob2, &clob3, &alias);
 697
 698                                switch (opcode) {
 699                                case LD_UNARY_OPCODE_X1:
 700                                        load_store_signed = false;
 701                                        load_store_size = 8;
 702                                        break;
 703                                case LD4S_UNARY_OPCODE_X1:
 704                                        load_store_signed = true;
 705                                        /* FALLTHROUGH */
 706                                case LD4U_UNARY_OPCODE_X1:
 707                                        load_store_size = 4;
 708                                        break;
 709
 710                                case LD2S_UNARY_OPCODE_X1:
 711                                        load_store_signed = true;
 712                                        /* FALLTHROUGH */
 713                                case LD2U_UNARY_OPCODE_X1:
 714                                        load_store_size = 2;
 715                                        break;
 716                                default:
 717                                        unexpected = true;
 718                                }
 719                        } else {
 720                                load_n_store = false;
 721                                load_store_signed = false;
 722                                find_regs(bundle, 0, &ra, &rb,
 723                                          &clob1, &clob2, &clob3,
 724                                          &alias);
 725
 726                                opcode = get_RRROpcodeExtension_X1(bundle);
 727                                switch (opcode) {
 728                                case ST_RRR_0_OPCODE_X1:
 729                                        load_store_size = 8;
 730                                        break;
 731                                case ST4_RRR_0_OPCODE_X1:
 732                                        load_store_size = 4;
 733                                        break;
 734                                case ST2_RRR_0_OPCODE_X1:
 735                                        load_store_size = 2;
 736                                        break;
 737                                default:
 738                                        unexpected = true;
 739                                }
 740                        }
 741                } else if (get_Opcode_X1(bundle) == IMM8_OPCODE_X1) {
 742                        load_n_store = true;
 743                        opcode = get_Imm8OpcodeExtension_X1(bundle);
 744                        switch (opcode) {
 745                        case LD_ADD_IMM8_OPCODE_X1:
 746                                load_store_size = 8;
 747                                break;
 748
 749                        case LD4S_ADD_IMM8_OPCODE_X1:
 750                                load_store_signed = true;
 751                                /* FALLTHROUGH */
 752                        case LD4U_ADD_IMM8_OPCODE_X1:
 753                                load_store_size = 4;
 754                                break;
 755
 756                        case LD2S_ADD_IMM8_OPCODE_X1:
 757                                load_store_signed = true;
 758                                /* FALLTHROUGH */
 759                        case LD2U_ADD_IMM8_OPCODE_X1:
 760                                load_store_size = 2;
 761                                break;
 762
 763                        case ST_ADD_IMM8_OPCODE_X1:
 764                                load_n_store = false;
 765                                load_store_size = 8;
 766                                break;
 767                        case ST4_ADD_IMM8_OPCODE_X1:
 768                                load_n_store = false;
 769                                load_store_size = 4;
 770                                break;
 771                        case ST2_ADD_IMM8_OPCODE_X1:
 772                                load_n_store = false;
 773                                load_store_size = 2;
 774                                break;
 775                        default:
 776                                unexpected = true;
 777                        }
 778
 779                        if (!unexpected) {
 780                                x1_add = true;
 781                                if (load_n_store)
 782                                        x1_add_imm8 = get_Imm8_X1(bundle);
 783                                else
 784                                        x1_add_imm8 = get_Dest_Imm8_X1(bundle);
 785                        }
 786
 787                        find_regs(bundle, load_n_store ? (&rd) : NULL,
 788                                  &ra, &rb, &clob1, &clob2, &clob3, &alias);
 789                } else
 790                        unexpected = true;
 791        }
 792
 793        /*
 794         * Some sanity check for register numbers extracted from fault bundle.
 795         */
 796        if (check_regs(rd, ra, rb, clob1, clob2, clob3) == true)
 797                unexpected = true;
 798
 799        /* Give warning if register ra has an aligned address. */
 800        if (!unexpected)
 801                WARN_ON(!((load_store_size - 1) & (regs->regs[ra])));
 802
 803
 804        /*
 805         * Fault came from kernel space, here we only need take care of
 806         * unaligned "get_user/put_user" macros defined in "uaccess.h".
 807         * Basically, we will handle bundle like this:
 808         * {ld/2u/4s rd, ra; movei rx, 0} or {st/2/4 ra, rb; movei rx, 0}
 809         * (Refer to file "arch/tile/include/asm/uaccess.h" for details).
 810         * For either load or store, byte-wise operation is performed by calling
 811         * get_user() or put_user(). If the macro returns non-zero value,
 812         * set the value to rx, otherwise set zero to rx. Finally make pc point
 813         * to next bundle and return.
 814         */
 815
 816        if (EX1_PL(regs->ex1) != USER_PL) {
 817
 818                unsigned long rx = 0;
 819                unsigned long x = 0, ret = 0;
 820
 821                if (y1_br || y1_lr || x1_add ||
 822                    (load_store_signed !=
 823                     (load_n_store && load_store_size == 4))) {
 824                        /* No branch, link, wrong sign-ext or load/store add. */
 825                        unexpected = true;
 826                } else if (!unexpected) {
 827                        if (bundle & TILEGX_BUNDLE_MODE_MASK) {
 828                                /*
 829                                 * Fault bundle is Y mode.
 830                                 * Check if the Y1 and Y0 is the form of
 831                                 * { movei rx, 0; nop/fnop }, if yes,
 832                                 * find the rx.
 833                                 */
 834
 835                                if ((get_Opcode_Y1(bundle) == ADDI_OPCODE_Y1)
 836                                    && (get_SrcA_Y1(bundle) == TREG_ZERO) &&
 837                                    (get_Imm8_Y1(bundle) == 0) &&
 838                                    is_bundle_y0_nop(bundle)) {
 839                                        rx = get_Dest_Y1(bundle);
 840                                } else if ((get_Opcode_Y0(bundle) ==
 841                                            ADDI_OPCODE_Y0) &&
 842                                           (get_SrcA_Y0(bundle) == TREG_ZERO) &&
 843                                           (get_Imm8_Y0(bundle) == 0) &&
 844                                           is_bundle_y1_nop(bundle)) {
 845                                        rx = get_Dest_Y0(bundle);
 846                                } else {
 847                                        unexpected = true;
 848                                }
 849                        } else {
 850                                /*
 851                                 * Fault bundle is X mode.
 852                                 * Check if the X0 is 'movei rx, 0',
 853                                 * if yes, find the rx.
 854                                 */
 855
 856                                if ((get_Opcode_X0(bundle) == IMM8_OPCODE_X0)
 857                                    && (get_Imm8OpcodeExtension_X0(bundle) ==
 858                                        ADDI_IMM8_OPCODE_X0) &&
 859                                    (get_SrcA_X0(bundle) == TREG_ZERO) &&
 860                                    (get_Imm8_X0(bundle) == 0)) {
 861                                        rx = get_Dest_X0(bundle);
 862                                } else {
 863                                        unexpected = true;
 864                                }
 865                        }
 866
 867                        /* rx should be less than 56. */
 868                        if (!unexpected && (rx >= 56))
 869                                unexpected = true;
 870                }
 871
 872                if (!search_exception_tables(regs->pc)) {
 873                        /* No fixup in the exception tables for the pc. */
 874                        unexpected = true;
 875                }
 876
 877                if (unexpected) {
 878                        /* Unexpected unalign kernel fault. */
 879                        struct task_struct *tsk = validate_current();
 880
 881                        bust_spinlocks(1);
 882
 883                        show_regs(regs);
 884
 885                        if (unlikely(tsk->pid < 2)) {
 886                                panic("Kernel unalign fault running %s!",
 887                                      tsk->pid ? "init" : "the idle task");
 888                        }
 889#ifdef SUPPORT_DIE
 890                        die("Oops", regs);
 891#endif
 892                        bust_spinlocks(1);
 893
 894                        do_group_exit(SIGKILL);
 895
 896                } else {
 897                        unsigned long i, b = 0;
 898                        unsigned char *ptr =
 899                                (unsigned char *)regs->regs[ra];
 900                        if (load_n_store) {
 901                                /* handle get_user(x, ptr) */
 902                                for (i = 0; i < load_store_size; i++) {
 903                                        ret = get_user(b, ptr++);
 904                                        if (!ret) {
 905                                                /* Success! update x. */
 906#ifdef __LITTLE_ENDIAN
 907                                                x |= (b << (8 * i));
 908#else
 909                                                x <<= 8;
 910                                                x |= b;
 911#endif /* __LITTLE_ENDIAN */
 912                                        } else {
 913                                                x = 0;
 914                                                break;
 915                                        }
 916                                }
 917
 918                                /* Sign-extend 4-byte loads. */
 919                                if (load_store_size == 4)
 920                                        x = (long)(int)x;
 921
 922                                /* Set register rd. */
 923                                regs->regs[rd] = x;
 924
 925                                /* Set register rx. */
 926                                regs->regs[rx] = ret;
 927
 928                                /* Bump pc. */
 929                                regs->pc += 8;
 930
 931                        } else {
 932                                /* Handle put_user(x, ptr) */
 933                                x = regs->regs[rb];
 934#ifdef __LITTLE_ENDIAN
 935                                b = x;
 936#else
 937                                /*
 938                                 * Swap x in order to store x from low
 939                                 * to high memory same as the
 940                                 * little-endian case.
 941                                 */
 942                                switch (load_store_size) {
 943                                case 8:
 944                                        b = swab64(x);
 945                                        break;
 946                                case 4:
 947                                        b = swab32(x);
 948                                        break;
 949                                case 2:
 950                                        b = swab16(x);
 951                                        break;
 952                                }
 953#endif /* __LITTLE_ENDIAN */
 954                                for (i = 0; i < load_store_size; i++) {
 955                                        ret = put_user(b, ptr++);
 956                                        if (ret)
 957                                                break;
 958                                        /* Success! shift 1 byte. */
 959                                        b >>= 8;
 960                                }
 961                                /* Set register rx. */
 962                                regs->regs[rx] = ret;
 963
 964                                /* Bump pc. */
 965                                regs->pc += 8;
 966                        }
 967                }
 968
 969                unaligned_fixup_count++;
 970
 971                if (unaligned_printk) {
 972                        pr_info("%s/%d - Unalign fixup for kernel access to userspace %lx\n",
 973                                current->comm, current->pid, regs->regs[ra]);
 974                }
 975
 976                /* Done! Return to the exception handler. */
 977                return;
 978        }
 979
 980        if ((align_ctl == 0) || unexpected) {
 981                siginfo_t info = {
 982                        .si_signo = SIGBUS,
 983                        .si_code = BUS_ADRALN,
 984                        .si_addr = (unsigned char __user *)0
 985                };
 986                if (unaligned_printk)
 987                        pr_info("Unalign bundle: unexp @%llx, %llx\n",
 988                                (unsigned long long)regs->pc,
 989                                (unsigned long long)bundle);
 990
 991                if (ra < 56) {
 992                        unsigned long uaa = (unsigned long)regs->regs[ra];
 993                        /* Set bus Address. */
 994                        info.si_addr = (unsigned char __user *)uaa;
 995                }
 996
 997                unaligned_fixup_count++;
 998
 999                trace_unhandled_signal("unaligned fixup trap", regs,
1000                                       (unsigned long)info.si_addr, SIGBUS);
1001                force_sig_info(info.si_signo, &info, current);
1002                return;
1003        }
1004
1005#ifdef __LITTLE_ENDIAN
1006#define UA_FIXUP_ADDR_DELTA          1
1007#define UA_FIXUP_BFEXT_START(_B_)    0
1008#define UA_FIXUP_BFEXT_END(_B_)     (8 * (_B_) - 1)
1009#else /* __BIG_ENDIAN */
1010#define UA_FIXUP_ADDR_DELTA          -1
1011#define UA_FIXUP_BFEXT_START(_B_)   (64 - 8 * (_B_))
1012#define UA_FIXUP_BFEXT_END(_B_)      63
1013#endif /* __LITTLE_ENDIAN */
1014
1015
1016
1017        if ((ra != rb) && (rd != TREG_SP) && !alias &&
1018            !y1_br && !y1_lr && !x1_add) {
1019                /*
1020                 * Simple case: ra != rb and no register alias found,
1021                 * and no branch or link. This will be the majority.
1022                 * We can do a little better for simplae case than the
1023                 * generic scheme below.
1024                 */
1025                if (!load_n_store) {
1026                        /*
1027                         * Simple store: ra != rb, no need for scratch register.
1028                         * Just store and rotate to right bytewise.
1029                         */
1030#ifdef __BIG_ENDIAN
1031                        frag.insn[n++] =
1032                                jit_x0_addi(ra, ra, load_store_size - 1) |
1033                                jit_x1_fnop();
1034#endif /* __BIG_ENDIAN */
1035                        for (k = 0; k < load_store_size; k++) {
1036                                /* Store a byte. */
1037                                frag.insn[n++] =
1038                                        jit_x0_rotli(rb, rb, 56) |
1039                                        jit_x1_st1_add(ra, rb,
1040                                                       UA_FIXUP_ADDR_DELTA);
1041                        }
1042#ifdef __BIG_ENDIAN
1043                        frag.insn[n] = jit_x1_addi(ra, ra, 1);
1044#else
1045                        frag.insn[n] = jit_x1_addi(ra, ra,
1046                                                   -1 * load_store_size);
1047#endif /* __LITTLE_ENDIAN */
1048
1049                        if (load_store_size == 8) {
1050                                frag.insn[n] |= jit_x0_fnop();
1051                        } else if (load_store_size == 4) {
1052                                frag.insn[n] |= jit_x0_rotli(rb, rb, 32);
1053                        } else { /* = 2 */
1054                                frag.insn[n] |= jit_x0_rotli(rb, rb, 16);
1055                        }
1056                        n++;
1057                        if (bundle_2_enable)
1058                                frag.insn[n++] = bundle_2;
1059                        frag.insn[n++] = jit_x0_fnop() | jit_x1_iret();
1060                } else {
1061                        if (rd == ra) {
1062                                /* Use two clobber registers: clob1/2. */
1063                                frag.insn[n++] =
1064                                        jit_x0_addi(TREG_SP, TREG_SP, -16) |
1065                                        jit_x1_fnop();
1066                                frag.insn[n++] =
1067                                        jit_x0_addi(clob1, ra, 7) |
1068                                        jit_x1_st_add(TREG_SP, clob1, -8);
1069                                frag.insn[n++] =
1070                                        jit_x0_addi(clob2, ra, 0) |
1071                                        jit_x1_st(TREG_SP, clob2);
1072                                frag.insn[n++] =
1073                                        jit_x0_fnop() |
1074                                        jit_x1_ldna(rd, ra);
1075                                frag.insn[n++] =
1076                                        jit_x0_fnop() |
1077                                        jit_x1_ldna(clob1, clob1);
1078                                /*
1079                                 * Note: we must make sure that rd must not
1080                                 * be sp. Recover clob1/2 from stack.
1081                                 */
1082                                frag.insn[n++] =
1083                                        jit_x0_dblalign(rd, clob1, clob2) |
1084                                        jit_x1_ld_add(clob2, TREG_SP, 8);
1085                                frag.insn[n++] =
1086                                        jit_x0_fnop() |
1087                                        jit_x1_ld_add(clob1, TREG_SP, 16);
1088                        } else {
1089                                /* Use one clobber register: clob1 only. */
1090                                frag.insn[n++] =
1091                                        jit_x0_addi(TREG_SP, TREG_SP, -16) |
1092                                        jit_x1_fnop();
1093                                frag.insn[n++] =
1094                                        jit_x0_addi(clob1, ra, 7) |
1095                                        jit_x1_st(TREG_SP, clob1);
1096                                frag.insn[n++] =
1097                                        jit_x0_fnop() |
1098                                        jit_x1_ldna(rd, ra);
1099                                frag.insn[n++] =
1100                                        jit_x0_fnop() |
1101                                        jit_x1_ldna(clob1, clob1);
1102                                /*
1103                                 * Note: we must make sure that rd must not
1104                                 * be sp. Recover clob1 from stack.
1105                                 */
1106                                frag.insn[n++] =
1107                                        jit_x0_dblalign(rd, clob1, ra) |
1108                                        jit_x1_ld_add(clob1, TREG_SP, 16);
1109                        }
1110
1111                        if (bundle_2_enable)
1112                                frag.insn[n++] = bundle_2;
1113                        /*
1114                         * For non 8-byte load, extract corresponding bytes and
1115                         * signed extension.
1116                         */
1117                        if (load_store_size == 4) {
1118                                if (load_store_signed)
1119                                        frag.insn[n++] =
1120                                                jit_x0_bfexts(
1121                                                        rd, rd,
1122                                                        UA_FIXUP_BFEXT_START(4),
1123                                                        UA_FIXUP_BFEXT_END(4)) |
1124                                                jit_x1_fnop();
1125                                else
1126                                        frag.insn[n++] =
1127                                                jit_x0_bfextu(
1128                                                        rd, rd,
1129                                                        UA_FIXUP_BFEXT_START(4),
1130                                                        UA_FIXUP_BFEXT_END(4)) |
1131                                                jit_x1_fnop();
1132                        } else if (load_store_size == 2) {
1133                                if (load_store_signed)
1134                                        frag.insn[n++] =
1135                                                jit_x0_bfexts(
1136                                                        rd, rd,
1137                                                        UA_FIXUP_BFEXT_START(2),
1138                                                        UA_FIXUP_BFEXT_END(2)) |
1139                                                jit_x1_fnop();
1140                                else
1141                                        frag.insn[n++] =
1142                                                jit_x0_bfextu(
1143                                                        rd, rd,
1144                                                        UA_FIXUP_BFEXT_START(2),
1145                                                        UA_FIXUP_BFEXT_END(2)) |
1146                                                jit_x1_fnop();
1147                        }
1148
1149                        frag.insn[n++] =
1150                                jit_x0_fnop()  |
1151                                jit_x1_iret();
1152                }
1153        } else if (!load_n_store) {
1154
1155                /*
1156                 * Generic memory store cases: use 3 clobber registers.
1157                 *
1158                 * Alloc space for saveing clob2,1,3 on user's stack.
1159                 * register clob3 points to where clob2 saved, followed by
1160                 * clob1 and 3 from high to low memory.
1161                 */
1162                frag.insn[n++] =
1163                        jit_x0_addi(TREG_SP, TREG_SP, -32)    |
1164                        jit_x1_fnop();
1165                frag.insn[n++] =
1166                        jit_x0_addi(clob3, TREG_SP, 16)  |
1167                        jit_x1_st_add(TREG_SP, clob3, 8);
1168#ifdef __LITTLE_ENDIAN
1169                frag.insn[n++] =
1170                        jit_x0_addi(clob1, ra, 0)   |
1171                        jit_x1_st_add(TREG_SP, clob1, 8);
1172#else
1173                frag.insn[n++] =
1174                        jit_x0_addi(clob1, ra, load_store_size - 1)   |
1175                        jit_x1_st_add(TREG_SP, clob1, 8);
1176#endif
1177                if (load_store_size == 8) {
1178                        /*
1179                         * We save one byte a time, not for fast, but compact
1180                         * code. After each store, data source register shift
1181                         * right one byte. unchanged after 8 stores.
1182                         */
1183                        frag.insn[n++] =
1184                                jit_x0_addi(clob2, TREG_ZERO, 7)     |
1185                                jit_x1_st_add(TREG_SP, clob2, 16);
1186                        frag.insn[n++] =
1187                                jit_x0_rotli(rb, rb, 56)      |
1188                                jit_x1_st1_add(clob1, rb, UA_FIXUP_ADDR_DELTA);
1189                        frag.insn[n++] =
1190                                jit_x0_addi(clob2, clob2, -1) |
1191                                jit_x1_bnezt(clob2, -1);
1192                        frag.insn[n++] =
1193                                jit_x0_fnop()                 |
1194                                jit_x1_addi(clob2, y1_br_reg, 0);
1195                } else if (load_store_size == 4) {
1196                        frag.insn[n++] =
1197                                jit_x0_addi(clob2, TREG_ZERO, 3)     |
1198                                jit_x1_st_add(TREG_SP, clob2, 16);
1199                        frag.insn[n++] =
1200                                jit_x0_rotli(rb, rb, 56)      |
1201                                jit_x1_st1_add(clob1, rb, UA_FIXUP_ADDR_DELTA);
1202                        frag.insn[n++] =
1203                                jit_x0_addi(clob2, clob2, -1) |
1204                                jit_x1_bnezt(clob2, -1);
1205                        /*
1206                         * same as 8-byte case, but need shift another 4
1207                         * byte to recover rb for 4-byte store.
1208                         */
1209                        frag.insn[n++] = jit_x0_rotli(rb, rb, 32)      |
1210                                jit_x1_addi(clob2, y1_br_reg, 0);
1211                } else { /* =2 */
1212                        frag.insn[n++] =
1213                                jit_x0_addi(clob2, rb, 0)     |
1214                                jit_x1_st_add(TREG_SP, clob2, 16);
1215                        for (k = 0; k < 2; k++) {
1216                                frag.insn[n++] =
1217                                        jit_x0_shrui(rb, rb, 8)  |
1218                                        jit_x1_st1_add(clob1, rb,
1219                                                       UA_FIXUP_ADDR_DELTA);
1220                        }
1221                        frag.insn[n++] =
1222                                jit_x0_addi(rb, clob2, 0)       |
1223                                jit_x1_addi(clob2, y1_br_reg, 0);
1224                }
1225
1226                if (bundle_2_enable)
1227                        frag.insn[n++] = bundle_2;
1228
1229                if (y1_lr) {
1230                        frag.insn[n++] =
1231                                jit_x0_fnop()                    |
1232                                jit_x1_mfspr(y1_lr_reg,
1233                                             SPR_EX_CONTEXT_0_0);
1234                }
1235                if (y1_br) {
1236                        frag.insn[n++] =
1237                                jit_x0_fnop()                    |
1238                                jit_x1_mtspr(SPR_EX_CONTEXT_0_0,
1239                                             clob2);
1240                }
1241                if (x1_add) {
1242                        frag.insn[n++] =
1243                                jit_x0_addi(ra, ra, x1_add_imm8) |
1244                                jit_x1_ld_add(clob2, clob3, -8);
1245                } else {
1246                        frag.insn[n++] =
1247                                jit_x0_fnop()                    |
1248                                jit_x1_ld_add(clob2, clob3, -8);
1249                }
1250                frag.insn[n++] =
1251                        jit_x0_fnop()   |
1252                        jit_x1_ld_add(clob1, clob3, -8);
1253                frag.insn[n++] = jit_x0_fnop()   | jit_x1_ld(clob3, clob3);
1254                frag.insn[n++] = jit_x0_fnop()   | jit_x1_iret();
1255
1256        } else {
1257                /*
1258                 * Generic memory load cases.
1259                 *
1260                 * Alloc space for saveing clob1,2,3 on user's stack.
1261                 * register clob3 points to where clob1 saved, followed
1262                 * by clob2 and 3 from high to low memory.
1263                 */
1264
1265                frag.insn[n++] =
1266                        jit_x0_addi(TREG_SP, TREG_SP, -32) |
1267                        jit_x1_fnop();
1268                frag.insn[n++] =
1269                        jit_x0_addi(clob3, TREG_SP, 16) |
1270                        jit_x1_st_add(TREG_SP, clob3, 8);
1271                frag.insn[n++] =
1272                        jit_x0_addi(clob2, ra, 0) |
1273                        jit_x1_st_add(TREG_SP, clob2, 8);
1274
1275                if (y1_br) {
1276                        frag.insn[n++] =
1277                                jit_x0_addi(clob1, y1_br_reg, 0) |
1278                                jit_x1_st_add(TREG_SP, clob1, 16);
1279                } else {
1280                        frag.insn[n++] =
1281                                jit_x0_fnop() |
1282                                jit_x1_st_add(TREG_SP, clob1, 16);
1283                }
1284
1285                if (bundle_2_enable)
1286                        frag.insn[n++] = bundle_2;
1287
1288                if (y1_lr) {
1289                        frag.insn[n++] =
1290                                jit_x0_fnop()  |
1291                                jit_x1_mfspr(y1_lr_reg,
1292                                             SPR_EX_CONTEXT_0_0);
1293                }
1294
1295                if (y1_br) {
1296                        frag.insn[n++] =
1297                                jit_x0_fnop() |
1298                                jit_x1_mtspr(SPR_EX_CONTEXT_0_0,
1299                                             clob1);
1300                }
1301
1302                frag.insn[n++] =
1303                        jit_x0_addi(clob1, clob2, 7)      |
1304                        jit_x1_ldna(rd, clob2);
1305                frag.insn[n++] =
1306                        jit_x0_fnop()                     |
1307                        jit_x1_ldna(clob1, clob1);
1308                frag.insn[n++] =
1309                        jit_x0_dblalign(rd, clob1, clob2) |
1310                        jit_x1_ld_add(clob1, clob3, -8);
1311                if (x1_add) {
1312                        frag.insn[n++] =
1313                                jit_x0_addi(ra, ra, x1_add_imm8) |
1314                                jit_x1_ld_add(clob2, clob3, -8);
1315                } else {
1316                        frag.insn[n++] =
1317                                jit_x0_fnop()  |
1318                                jit_x1_ld_add(clob2, clob3, -8);
1319                }
1320
1321                frag.insn[n++] =
1322                        jit_x0_fnop() |
1323                        jit_x1_ld(clob3, clob3);
1324
1325                if (load_store_size == 4) {
1326                        if (load_store_signed)
1327                                frag.insn[n++] =
1328                                        jit_x0_bfexts(
1329                                                rd, rd,
1330                                                UA_FIXUP_BFEXT_START(4),
1331                                                UA_FIXUP_BFEXT_END(4)) |
1332                                        jit_x1_fnop();
1333                        else
1334                                frag.insn[n++] =
1335                                        jit_x0_bfextu(
1336                                                rd, rd,
1337                                                UA_FIXUP_BFEXT_START(4),
1338                                                UA_FIXUP_BFEXT_END(4)) |
1339                                        jit_x1_fnop();
1340                } else if (load_store_size == 2) {
1341                        if (load_store_signed)
1342                                frag.insn[n++] =
1343                                        jit_x0_bfexts(
1344                                                rd, rd,
1345                                                UA_FIXUP_BFEXT_START(2),
1346                                                UA_FIXUP_BFEXT_END(2)) |
1347                                        jit_x1_fnop();
1348                        else
1349                                frag.insn[n++] =
1350                                        jit_x0_bfextu(
1351                                                rd, rd,
1352                                                UA_FIXUP_BFEXT_START(2),
1353                                                UA_FIXUP_BFEXT_END(2)) |
1354                                        jit_x1_fnop();
1355                }
1356
1357                frag.insn[n++] = jit_x0_fnop() | jit_x1_iret();
1358        }
1359
1360        /* Max JIT bundle count is 14. */
1361        WARN_ON(n > 14);
1362
1363        if (!unexpected) {
1364                int status = 0;
1365                int idx = (regs->pc >> 3) &
1366                        ((1ULL << (PAGE_SHIFT - UNALIGN_JIT_SHIFT)) - 1);
1367
1368                frag.pc = regs->pc;
1369                frag.bundle = bundle;
1370
1371                if (unaligned_printk) {
1372                        pr_info("%s/%d, Unalign fixup: pc=%lx bundle=%lx %d %d %d %d %d %d %d %d\n",
1373                                current->comm, current->pid,
1374                                (unsigned long)frag.pc,
1375                                (unsigned long)frag.bundle,
1376                                (int)alias, (int)rd, (int)ra,
1377                                (int)rb, (int)bundle_2_enable,
1378                                (int)y1_lr, (int)y1_br, (int)x1_add);
1379
1380                        for (k = 0; k < n; k += 2)
1381                                pr_info("[%d] %016llx %016llx\n",
1382                                        k, (unsigned long long)frag.insn[k],
1383                                        (unsigned long long)frag.insn[k+1]);
1384                }
1385
1386                /* Swap bundle byte order for big endian sys. */
1387#ifdef __BIG_ENDIAN
1388                frag.bundle = GX_INSN_BSWAP(frag.bundle);
1389                for (k = 0; k < n; k++)
1390                        frag.insn[k] = GX_INSN_BSWAP(frag.insn[k]);
1391#endif /* __BIG_ENDIAN */
1392
1393                status = copy_to_user((void __user *)&jit_code_area[idx],
1394                                      &frag, sizeof(frag));
1395                if (status) {
1396                        /* Fail to copy JIT into user land. send SIGSEGV. */
1397                        siginfo_t info = {
1398                                .si_signo = SIGSEGV,
1399                                .si_code = SEGV_MAPERR,
1400                                .si_addr = (void __user *)&jit_code_area[idx]
1401                        };
1402
1403                        pr_warn("Unalign fixup: pid=%d %s jit_code_area=%llx\n",
1404                                current->pid, current->comm,
1405                                (unsigned long long)&jit_code_area[idx]);
1406
1407                        trace_unhandled_signal("segfault in unalign fixup",
1408                                               regs,
1409                                               (unsigned long)info.si_addr,
1410                                               SIGSEGV);
1411                        force_sig_info(info.si_signo, &info, current);
1412                        return;
1413                }
1414
1415
1416                /* Do a cheaper increment, not accurate. */
1417                unaligned_fixup_count++;
1418                __flush_icache_range((unsigned long)&jit_code_area[idx],
1419                                     (unsigned long)&jit_code_area[idx] +
1420                                     sizeof(frag));
1421
1422                /* Setup SPR_EX_CONTEXT_0_0/1 for returning to user program.*/
1423                __insn_mtspr(SPR_EX_CONTEXT_0_0, regs->pc + 8);
1424                __insn_mtspr(SPR_EX_CONTEXT_0_1, PL_ICS_EX1(USER_PL, 0));
1425
1426                /* Modify pc at the start of new JIT. */
1427                regs->pc = (unsigned long)&jit_code_area[idx].insn[0];
1428                /* Set ICS in SPR_EX_CONTEXT_K_1. */
1429                regs->ex1 = PL_ICS_EX1(USER_PL, 1);
1430        }
1431}
1432
1433
1434/*
1435 * C function to generate unalign data JIT. Called from unalign data
1436 * interrupt handler.
1437 *
1438 * First check if unalign fix is disabled or exception did not not come from
1439 * user space or sp register points to unalign address, if true, generate a
1440 * SIGBUS. Then map a page into user space as JIT area if it is not mapped
1441 * yet. Genenerate JIT code by calling jit_bundle_gen(). After that return
1442 * back to exception handler.
1443 *
1444 * The exception handler will "iret" to new generated JIT code after
1445 * restoring caller saved registers. In theory, the JIT code will perform
1446 * another "iret" to resume user's program.
1447 */
1448
1449void do_unaligned(struct pt_regs *regs, int vecnum)
1450{
1451        tilegx_bundle_bits __user  *pc;
1452        tilegx_bundle_bits bundle;
1453        struct thread_info *info = current_thread_info();
1454        int align_ctl;
1455
1456        /* Checks the per-process unaligned JIT flags */
1457        align_ctl = unaligned_fixup;
1458        switch (task_thread_info(current)->align_ctl) {
1459        case PR_UNALIGN_NOPRINT:
1460                align_ctl = 1;
1461                break;
1462        case PR_UNALIGN_SIGBUS:
1463                align_ctl = 0;
1464                break;
1465        }
1466
1467        /* Enable iterrupt in order to access user land. */
1468        local_irq_enable();
1469
1470        /*
1471         * The fault came from kernel space. Two choices:
1472         * (a) unaligned_fixup < 1, we will first call get/put_user fixup
1473         *     to return -EFAULT. If no fixup, simply panic the kernel.
1474         * (b) unaligned_fixup >=1, we will try to fix the unaligned access
1475         *     if it was triggered by get_user/put_user() macros. Panic the
1476         *     kernel if it is not fixable.
1477         */
1478
1479        if (EX1_PL(regs->ex1) != USER_PL) {
1480
1481                if (align_ctl < 1) {
1482                        unaligned_fixup_count++;
1483                        /* If exception came from kernel, try fix it up. */
1484                        if (fixup_exception(regs)) {
1485                                if (unaligned_printk)
1486                                        pr_info("Unalign fixup: %d %llx @%llx\n",
1487                                                (int)unaligned_fixup,
1488                                                (unsigned long long)regs->ex1,
1489                                                (unsigned long long)regs->pc);
1490                        } else {
1491                                /* Not fixable. Go panic. */
1492                                panic("Unalign exception in Kernel. pc=%lx",
1493                                      regs->pc);
1494                        }
1495                } else {
1496                        /*
1497                         * Try to fix the exception. If we can't, panic the
1498                         * kernel.
1499                         */
1500                        bundle = GX_INSN_BSWAP(
1501                                *((tilegx_bundle_bits *)(regs->pc)));
1502                        jit_bundle_gen(regs, bundle, align_ctl);
1503                }
1504                return;
1505        }
1506
1507        /*
1508         * Fault came from user with ICS or stack is not aligned.
1509         * If so, we will trigger SIGBUS.
1510         */
1511        if ((regs->sp & 0x7) || (regs->ex1) || (align_ctl < 0)) {
1512                siginfo_t info = {
1513                        .si_signo = SIGBUS,
1514                        .si_code = BUS_ADRALN,
1515                        .si_addr = (unsigned char __user *)0
1516                };
1517
1518                if (unaligned_printk)
1519                        pr_info("Unalign fixup: %d %llx @%llx\n",
1520                                (int)unaligned_fixup,
1521                                (unsigned long long)regs->ex1,
1522                                (unsigned long long)regs->pc);
1523
1524                unaligned_fixup_count++;
1525
1526                trace_unhandled_signal("unaligned fixup trap", regs, 0, SIGBUS);
1527                force_sig_info(info.si_signo, &info, current);
1528                return;
1529        }
1530
1531
1532        /* Read the bundle caused the exception! */
1533        pc = (tilegx_bundle_bits __user *)(regs->pc);
1534        if (get_user(bundle, pc) != 0) {
1535                /* Probably never be here since pc is valid user address.*/
1536                siginfo_t info = {
1537                        .si_signo = SIGSEGV,
1538                        .si_code = SEGV_MAPERR,
1539                        .si_addr = (void __user *)pc
1540                };
1541                pr_err("Couldn't read instruction at %p trying to step\n", pc);
1542                trace_unhandled_signal("segfault in unalign fixup", regs,
1543                                       (unsigned long)info.si_addr, SIGSEGV);
1544                force_sig_info(info.si_signo, &info, current);
1545                return;
1546        }
1547
1548        if (!info->unalign_jit_base) {
1549                void __user *user_page;
1550
1551                /*
1552                 * Allocate a page in userland.
1553                 * For 64-bit processes we try to place the mapping far
1554                 * from anything else that might be going on (specifically
1555                 * 64 GB below the top of the user address space).  If it
1556                 * happens not to be possible to put it there, it's OK;
1557                 * the kernel will choose another location and we'll
1558                 * remember it for later.
1559                 */
1560                if (is_compat_task())
1561                        user_page = NULL;
1562                else
1563                        user_page = (void __user *)(TASK_SIZE - (1UL << 36)) +
1564                                (current->pid << PAGE_SHIFT);
1565
1566                user_page = (void __user *) vm_mmap(NULL,
1567                                                    (unsigned long)user_page,
1568                                                    PAGE_SIZE,
1569                                                    PROT_EXEC | PROT_READ |
1570                                                    PROT_WRITE,
1571#ifdef CONFIG_HOMECACHE
1572                                                    MAP_CACHE_HOME_TASK |
1573#endif
1574                                                    MAP_PRIVATE |
1575                                                    MAP_ANONYMOUS,
1576                                                    0);
1577
1578                if (IS_ERR((void __force *)user_page)) {
1579                        pr_err("Out of kernel pages trying do_mmap\n");
1580                        return;
1581                }
1582
1583                /* Save the address in the thread_info struct */
1584                info->unalign_jit_base = user_page;
1585                if (unaligned_printk)
1586                        pr_info("Unalign bundle: %d:%d, allocate page @%llx\n",
1587                                raw_smp_processor_id(), current->pid,
1588                                (unsigned long long)user_page);
1589        }
1590
1591        /* Generate unalign JIT */
1592        jit_bundle_gen(regs, GX_INSN_BSWAP(bundle), align_ctl);
1593}
1594
1595#endif /* __tilegx__ */
1596