linux/arch/mips/mm/page.c
<<
>>
Prefs
   1/*
   2 * This file is subject to the terms and conditions of the GNU General Public
   3 * License.  See the file "COPYING" in the main directory of this archive
   4 * for more details.
   5 *
   6 * Copyright (C) 2003, 04, 05 Ralf Baechle (ralf@linux-mips.org)
   7 * Copyright (C) 2007  Maciej W. Rozycki
   8 * Copyright (C) 2008  Thiemo Seufer
   9 * Copyright (C) 2012  MIPS Technologies, Inc.
  10 */
  11#include <linux/kernel.h>
  12#include <linux/sched.h>
  13#include <linux/smp.h>
  14#include <linux/mm.h>
  15#include <linux/module.h>
  16#include <linux/proc_fs.h>
  17
  18#include <asm/bugs.h>
  19#include <asm/cacheops.h>
  20#include <asm/cpu-type.h>
  21#include <asm/inst.h>
  22#include <asm/io.h>
  23#include <asm/page.h>
  24#include <asm/pgtable.h>
  25#include <asm/prefetch.h>
  26#include <asm/bootinfo.h>
  27#include <asm/mipsregs.h>
  28#include <asm/mmu_context.h>
  29#include <asm/cpu.h>
  30#include <asm/war.h>
  31
  32#ifdef CONFIG_SIBYTE_DMA_PAGEOPS
  33#include <asm/sibyte/sb1250.h>
  34#include <asm/sibyte/sb1250_regs.h>
  35#include <asm/sibyte/sb1250_dma.h>
  36#endif
  37
  38#include <asm/uasm.h>
  39
  40/* Registers used in the assembled routines. */
  41#define ZERO 0
  42#define AT 2
  43#define A0 4
  44#define A1 5
  45#define A2 6
  46#define T0 8
  47#define T1 9
  48#define T2 10
  49#define T3 11
  50#define T9 25
  51#define RA 31
  52
  53/* Handle labels (which must be positive integers). */
  54enum label_id {
  55        label_clear_nopref = 1,
  56        label_clear_pref,
  57        label_copy_nopref,
  58        label_copy_pref_both,
  59        label_copy_pref_store,
  60};
  61
  62UASM_L_LA(_clear_nopref)
  63UASM_L_LA(_clear_pref)
  64UASM_L_LA(_copy_nopref)
  65UASM_L_LA(_copy_pref_both)
  66UASM_L_LA(_copy_pref_store)
  67
  68/* We need one branch and therefore one relocation per target label. */
  69static struct uasm_label labels[5];
  70static struct uasm_reloc relocs[5];
  71
  72#define cpu_is_r4600_v1_x()     ((read_c0_prid() & 0xfffffff0) == 0x00002010)
  73#define cpu_is_r4600_v2_x()     ((read_c0_prid() & 0xfffffff0) == 0x00002020)
  74
  75/*
  76 * R6 has a limited offset of the pref instruction.
  77 * Skip it if the offset is more than 9 bits.
  78 */
  79#define _uasm_i_pref(a, b, c, d)                \
  80do {                                            \
  81        if (cpu_has_mips_r6) {                  \
  82                if (c <= 0xff && c >= -0x100)   \
  83                        uasm_i_pref(a, b, c, d);\
  84        } else {                                \
  85                uasm_i_pref(a, b, c, d);        \
  86        }                                       \
  87} while(0)
  88
  89static int pref_bias_clear_store;
  90static int pref_bias_copy_load;
  91static int pref_bias_copy_store;
  92
  93static u32 pref_src_mode;
  94static u32 pref_dst_mode;
  95
  96static int clear_word_size;
  97static int copy_word_size;
  98
  99static int half_clear_loop_size;
 100static int half_copy_loop_size;
 101
 102static int cache_line_size;
 103#define cache_line_mask() (cache_line_size - 1)
 104
 105static inline void
 106pg_addiu(u32 **buf, unsigned int reg1, unsigned int reg2, unsigned int off)
 107{
 108        if (cpu_has_64bit_gp_regs && DADDI_WAR && r4k_daddiu_bug()) {
 109                if (off > 0x7fff) {
 110                        uasm_i_lui(buf, T9, uasm_rel_hi(off));
 111                        uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off));
 112                } else
 113                        uasm_i_addiu(buf, T9, ZERO, off);
 114                uasm_i_daddu(buf, reg1, reg2, T9);
 115        } else {
 116                if (off > 0x7fff) {
 117                        uasm_i_lui(buf, T9, uasm_rel_hi(off));
 118                        uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off));
 119                        UASM_i_ADDU(buf, reg1, reg2, T9);
 120                } else
 121                        UASM_i_ADDIU(buf, reg1, reg2, off);
 122        }
 123}
 124
 125static void set_prefetch_parameters(void)
 126{
 127        if (cpu_has_64bit_gp_regs || cpu_has_64bit_zero_reg)
 128                clear_word_size = 8;
 129        else
 130                clear_word_size = 4;
 131
 132        if (cpu_has_64bit_gp_regs)
 133                copy_word_size = 8;
 134        else
 135                copy_word_size = 4;
 136
 137        /*
 138         * The pref's used here are using "streaming" hints, which cause the
 139         * copied data to be kicked out of the cache sooner.  A page copy often
 140         * ends up copying a lot more data than is commonly used, so this seems
 141         * to make sense in terms of reducing cache pollution, but I've no real
 142         * performance data to back this up.
 143         */
 144        if (cpu_has_prefetch) {
 145                /*
 146                 * XXX: Most prefetch bias values in here are based on
 147                 * guesswork.
 148                 */
 149                cache_line_size = cpu_dcache_line_size();
 150                switch (current_cpu_type()) {
 151                case CPU_R5500:
 152                case CPU_TX49XX:
 153                        /* These processors only support the Pref_Load. */
 154                        pref_bias_copy_load = 256;
 155                        break;
 156
 157                case CPU_R10000:
 158                case CPU_R12000:
 159                case CPU_R14000:
 160                case CPU_R16000:
 161                        /*
 162                         * Those values have been experimentally tuned for an
 163                         * Origin 200.
 164                         */
 165                        pref_bias_clear_store = 512;
 166                        pref_bias_copy_load = 256;
 167                        pref_bias_copy_store = 256;
 168                        pref_src_mode = Pref_LoadStreamed;
 169                        pref_dst_mode = Pref_StoreStreamed;
 170                        break;
 171
 172                case CPU_SB1:
 173                case CPU_SB1A:
 174                        pref_bias_clear_store = 128;
 175                        pref_bias_copy_load = 128;
 176                        pref_bias_copy_store = 128;
 177                        /*
 178                         * SB1 pass1 Pref_LoadStreamed/Pref_StoreStreamed
 179                         * hints are broken.
 180                         */
 181                        if (current_cpu_type() == CPU_SB1 &&
 182                            (current_cpu_data.processor_id & 0xff) < 0x02) {
 183                                pref_src_mode = Pref_Load;
 184                                pref_dst_mode = Pref_Store;
 185                        } else {
 186                                pref_src_mode = Pref_LoadStreamed;
 187                                pref_dst_mode = Pref_StoreStreamed;
 188                        }
 189                        break;
 190
 191                case CPU_LOONGSON3:
 192                        /* Loongson-3 only support the Pref_Load/Pref_Store. */
 193                        pref_bias_clear_store = 128;
 194                        pref_bias_copy_load = 128;
 195                        pref_bias_copy_store = 128;
 196                        pref_src_mode = Pref_Load;
 197                        pref_dst_mode = Pref_Store;
 198                        break;
 199
 200                default:
 201                        pref_bias_clear_store = 128;
 202                        pref_bias_copy_load = 256;
 203                        pref_bias_copy_store = 128;
 204                        pref_src_mode = Pref_LoadStreamed;
 205                        if (cpu_has_mips_r6)
 206                                /*
 207                                 * Bit 30 (Pref_PrepareForStore) has been
 208                                 * removed from MIPS R6. Use bit 5
 209                                 * (Pref_StoreStreamed).
 210                                 */
 211                                pref_dst_mode = Pref_StoreStreamed;
 212                        else
 213                                pref_dst_mode = Pref_PrepareForStore;
 214                        break;
 215                }
 216        } else {
 217                if (cpu_has_cache_cdex_s)
 218                        cache_line_size = cpu_scache_line_size();
 219                else if (cpu_has_cache_cdex_p)
 220                        cache_line_size = cpu_dcache_line_size();
 221        }
 222        /*
 223         * Too much unrolling will overflow the available space in
 224         * clear_space_array / copy_page_array.
 225         */
 226        half_clear_loop_size = min(16 * clear_word_size,
 227                                   max(cache_line_size >> 1,
 228                                       4 * clear_word_size));
 229        half_copy_loop_size = min(16 * copy_word_size,
 230                                  max(cache_line_size >> 1,
 231                                      4 * copy_word_size));
 232}
 233
 234static void build_clear_store(u32 **buf, int off)
 235{
 236        if (cpu_has_64bit_gp_regs || cpu_has_64bit_zero_reg) {
 237                uasm_i_sd(buf, ZERO, off, A0);
 238        } else {
 239                uasm_i_sw(buf, ZERO, off, A0);
 240        }
 241}
 242
 243static inline void build_clear_pref(u32 **buf, int off)
 244{
 245        if (off & cache_line_mask())
 246                return;
 247
 248        if (pref_bias_clear_store) {
 249                _uasm_i_pref(buf, pref_dst_mode, pref_bias_clear_store + off,
 250                            A0);
 251        } else if (cache_line_size == (half_clear_loop_size << 1)) {
 252                if (cpu_has_cache_cdex_s) {
 253                        uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0);
 254                } else if (cpu_has_cache_cdex_p) {
 255                        if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) {
 256                                uasm_i_nop(buf);
 257                                uasm_i_nop(buf);
 258                                uasm_i_nop(buf);
 259                                uasm_i_nop(buf);
 260                        }
 261
 262                        if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
 263                                uasm_i_lw(buf, ZERO, ZERO, AT);
 264
 265                        uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0);
 266                }
 267        }
 268}
 269
 270extern u32 __clear_page_start;
 271extern u32 __clear_page_end;
 272extern u32 __copy_page_start;
 273extern u32 __copy_page_end;
 274
 275void build_clear_page(void)
 276{
 277        int off;
 278        u32 *buf = &__clear_page_start;
 279        struct uasm_label *l = labels;
 280        struct uasm_reloc *r = relocs;
 281        int i;
 282        static atomic_t run_once = ATOMIC_INIT(0);
 283
 284        if (atomic_xchg(&run_once, 1)) {
 285                return;
 286        }
 287
 288        memset(labels, 0, sizeof(labels));
 289        memset(relocs, 0, sizeof(relocs));
 290
 291        set_prefetch_parameters();
 292
 293        /*
 294         * This algorithm makes the following assumptions:
 295         *   - The prefetch bias is a multiple of 2 words.
 296         *   - The prefetch bias is less than one page.
 297         */
 298        BUG_ON(pref_bias_clear_store % (2 * clear_word_size));
 299        BUG_ON(PAGE_SIZE < pref_bias_clear_store);
 300
 301        off = PAGE_SIZE - pref_bias_clear_store;
 302        if (off > 0xffff || !pref_bias_clear_store)
 303                pg_addiu(&buf, A2, A0, off);
 304        else
 305                uasm_i_ori(&buf, A2, A0, off);
 306
 307        if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
 308                uasm_i_lui(&buf, AT, uasm_rel_hi(0xa0000000));
 309
 310        off = cache_line_size ? min(8, pref_bias_clear_store / cache_line_size)
 311                                * cache_line_size : 0;
 312        while (off) {
 313                build_clear_pref(&buf, -off);
 314                off -= cache_line_size;
 315        }
 316        uasm_l_clear_pref(&l, buf);
 317        do {
 318                build_clear_pref(&buf, off);
 319                build_clear_store(&buf, off);
 320                off += clear_word_size;
 321        } while (off < half_clear_loop_size);
 322        pg_addiu(&buf, A0, A0, 2 * off);
 323        off = -off;
 324        do {
 325                build_clear_pref(&buf, off);
 326                if (off == -clear_word_size)
 327                        uasm_il_bne(&buf, &r, A0, A2, label_clear_pref);
 328                build_clear_store(&buf, off);
 329                off += clear_word_size;
 330        } while (off < 0);
 331
 332        if (pref_bias_clear_store) {
 333                pg_addiu(&buf, A2, A0, pref_bias_clear_store);
 334                uasm_l_clear_nopref(&l, buf);
 335                off = 0;
 336                do {
 337                        build_clear_store(&buf, off);
 338                        off += clear_word_size;
 339                } while (off < half_clear_loop_size);
 340                pg_addiu(&buf, A0, A0, 2 * off);
 341                off = -off;
 342                do {
 343                        if (off == -clear_word_size)
 344                                uasm_il_bne(&buf, &r, A0, A2,
 345                                            label_clear_nopref);
 346                        build_clear_store(&buf, off);
 347                        off += clear_word_size;
 348                } while (off < 0);
 349        }
 350
 351        uasm_i_jr(&buf, RA);
 352        uasm_i_nop(&buf);
 353
 354        BUG_ON(buf > &__clear_page_end);
 355
 356        uasm_resolve_relocs(relocs, labels);
 357
 358        pr_debug("Synthesized clear page handler (%u instructions).\n",
 359                 (u32)(buf - &__clear_page_start));
 360
 361        pr_debug("\t.set push\n");
 362        pr_debug("\t.set noreorder\n");
 363        for (i = 0; i < (buf - &__clear_page_start); i++)
 364                pr_debug("\t.word 0x%08x\n", (&__clear_page_start)[i]);
 365        pr_debug("\t.set pop\n");
 366}
 367
 368static void build_copy_load(u32 **buf, int reg, int off)
 369{
 370        if (cpu_has_64bit_gp_regs) {
 371                uasm_i_ld(buf, reg, off, A1);
 372        } else {
 373                uasm_i_lw(buf, reg, off, A1);
 374        }
 375}
 376
 377static void build_copy_store(u32 **buf, int reg, int off)
 378{
 379        if (cpu_has_64bit_gp_regs) {
 380                uasm_i_sd(buf, reg, off, A0);
 381        } else {
 382                uasm_i_sw(buf, reg, off, A0);
 383        }
 384}
 385
 386static inline void build_copy_load_pref(u32 **buf, int off)
 387{
 388        if (off & cache_line_mask())
 389                return;
 390
 391        if (pref_bias_copy_load)
 392                _uasm_i_pref(buf, pref_src_mode, pref_bias_copy_load + off, A1);
 393}
 394
 395static inline void build_copy_store_pref(u32 **buf, int off)
 396{
 397        if (off & cache_line_mask())
 398                return;
 399
 400        if (pref_bias_copy_store) {
 401                _uasm_i_pref(buf, pref_dst_mode, pref_bias_copy_store + off,
 402                            A0);
 403        } else if (cache_line_size == (half_copy_loop_size << 1)) {
 404                if (cpu_has_cache_cdex_s) {
 405                        uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0);
 406                } else if (cpu_has_cache_cdex_p) {
 407                        if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) {
 408                                uasm_i_nop(buf);
 409                                uasm_i_nop(buf);
 410                                uasm_i_nop(buf);
 411                                uasm_i_nop(buf);
 412                        }
 413
 414                        if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
 415                                uasm_i_lw(buf, ZERO, ZERO, AT);
 416
 417                        uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0);
 418                }
 419        }
 420}
 421
 422void build_copy_page(void)
 423{
 424        int off;
 425        u32 *buf = &__copy_page_start;
 426        struct uasm_label *l = labels;
 427        struct uasm_reloc *r = relocs;
 428        int i;
 429        static atomic_t run_once = ATOMIC_INIT(0);
 430
 431        if (atomic_xchg(&run_once, 1)) {
 432                return;
 433        }
 434
 435        memset(labels, 0, sizeof(labels));
 436        memset(relocs, 0, sizeof(relocs));
 437
 438        set_prefetch_parameters();
 439
 440        /*
 441         * This algorithm makes the following assumptions:
 442         *   - All prefetch biases are multiples of 8 words.
 443         *   - The prefetch biases are less than one page.
 444         *   - The store prefetch bias isn't greater than the load
 445         *     prefetch bias.
 446         */
 447        BUG_ON(pref_bias_copy_load % (8 * copy_word_size));
 448        BUG_ON(pref_bias_copy_store % (8 * copy_word_size));
 449        BUG_ON(PAGE_SIZE < pref_bias_copy_load);
 450        BUG_ON(pref_bias_copy_store > pref_bias_copy_load);
 451
 452        off = PAGE_SIZE - pref_bias_copy_load;
 453        if (off > 0xffff || !pref_bias_copy_load)
 454                pg_addiu(&buf, A2, A0, off);
 455        else
 456                uasm_i_ori(&buf, A2, A0, off);
 457
 458        if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
 459                uasm_i_lui(&buf, AT, uasm_rel_hi(0xa0000000));
 460
 461        off = cache_line_size ? min(8, pref_bias_copy_load / cache_line_size) *
 462                                cache_line_size : 0;
 463        while (off) {
 464                build_copy_load_pref(&buf, -off);
 465                off -= cache_line_size;
 466        }
 467        off = cache_line_size ? min(8, pref_bias_copy_store / cache_line_size) *
 468                                cache_line_size : 0;
 469        while (off) {
 470                build_copy_store_pref(&buf, -off);
 471                off -= cache_line_size;
 472        }
 473        uasm_l_copy_pref_both(&l, buf);
 474        do {
 475                build_copy_load_pref(&buf, off);
 476                build_copy_load(&buf, T0, off);
 477                build_copy_load_pref(&buf, off + copy_word_size);
 478                build_copy_load(&buf, T1, off + copy_word_size);
 479                build_copy_load_pref(&buf, off + 2 * copy_word_size);
 480                build_copy_load(&buf, T2, off + 2 * copy_word_size);
 481                build_copy_load_pref(&buf, off + 3 * copy_word_size);
 482                build_copy_load(&buf, T3, off + 3 * copy_word_size);
 483                build_copy_store_pref(&buf, off);
 484                build_copy_store(&buf, T0, off);
 485                build_copy_store_pref(&buf, off + copy_word_size);
 486                build_copy_store(&buf, T1, off + copy_word_size);
 487                build_copy_store_pref(&buf, off + 2 * copy_word_size);
 488                build_copy_store(&buf, T2, off + 2 * copy_word_size);
 489                build_copy_store_pref(&buf, off + 3 * copy_word_size);
 490                build_copy_store(&buf, T3, off + 3 * copy_word_size);
 491                off += 4 * copy_word_size;
 492        } while (off < half_copy_loop_size);
 493        pg_addiu(&buf, A1, A1, 2 * off);
 494        pg_addiu(&buf, A0, A0, 2 * off);
 495        off = -off;
 496        do {
 497                build_copy_load_pref(&buf, off);
 498                build_copy_load(&buf, T0, off);
 499                build_copy_load_pref(&buf, off + copy_word_size);
 500                build_copy_load(&buf, T1, off + copy_word_size);
 501                build_copy_load_pref(&buf, off + 2 * copy_word_size);
 502                build_copy_load(&buf, T2, off + 2 * copy_word_size);
 503                build_copy_load_pref(&buf, off + 3 * copy_word_size);
 504                build_copy_load(&buf, T3, off + 3 * copy_word_size);
 505                build_copy_store_pref(&buf, off);
 506                build_copy_store(&buf, T0, off);
 507                build_copy_store_pref(&buf, off + copy_word_size);
 508                build_copy_store(&buf, T1, off + copy_word_size);
 509                build_copy_store_pref(&buf, off + 2 * copy_word_size);
 510                build_copy_store(&buf, T2, off + 2 * copy_word_size);
 511                build_copy_store_pref(&buf, off + 3 * copy_word_size);
 512                if (off == -(4 * copy_word_size))
 513                        uasm_il_bne(&buf, &r, A2, A0, label_copy_pref_both);
 514                build_copy_store(&buf, T3, off + 3 * copy_word_size);
 515                off += 4 * copy_word_size;
 516        } while (off < 0);
 517
 518        if (pref_bias_copy_load - pref_bias_copy_store) {
 519                pg_addiu(&buf, A2, A0,
 520                         pref_bias_copy_load - pref_bias_copy_store);
 521                uasm_l_copy_pref_store(&l, buf);
 522                off = 0;
 523                do {
 524                        build_copy_load(&buf, T0, off);
 525                        build_copy_load(&buf, T1, off + copy_word_size);
 526                        build_copy_load(&buf, T2, off + 2 * copy_word_size);
 527                        build_copy_load(&buf, T3, off + 3 * copy_word_size);
 528                        build_copy_store_pref(&buf, off);
 529                        build_copy_store(&buf, T0, off);
 530                        build_copy_store_pref(&buf, off + copy_word_size);
 531                        build_copy_store(&buf, T1, off + copy_word_size);
 532                        build_copy_store_pref(&buf, off + 2 * copy_word_size);
 533                        build_copy_store(&buf, T2, off + 2 * copy_word_size);
 534                        build_copy_store_pref(&buf, off + 3 * copy_word_size);
 535                        build_copy_store(&buf, T3, off + 3 * copy_word_size);
 536                        off += 4 * copy_word_size;
 537                } while (off < half_copy_loop_size);
 538                pg_addiu(&buf, A1, A1, 2 * off);
 539                pg_addiu(&buf, A0, A0, 2 * off);
 540                off = -off;
 541                do {
 542                        build_copy_load(&buf, T0, off);
 543                        build_copy_load(&buf, T1, off + copy_word_size);
 544                        build_copy_load(&buf, T2, off + 2 * copy_word_size);
 545                        build_copy_load(&buf, T3, off + 3 * copy_word_size);
 546                        build_copy_store_pref(&buf, off);
 547                        build_copy_store(&buf, T0, off);
 548                        build_copy_store_pref(&buf, off + copy_word_size);
 549                        build_copy_store(&buf, T1, off + copy_word_size);
 550                        build_copy_store_pref(&buf, off + 2 * copy_word_size);
 551                        build_copy_store(&buf, T2, off + 2 * copy_word_size);
 552                        build_copy_store_pref(&buf, off + 3 * copy_word_size);
 553                        if (off == -(4 * copy_word_size))
 554                                uasm_il_bne(&buf, &r, A2, A0,
 555                                            label_copy_pref_store);
 556                        build_copy_store(&buf, T3, off + 3 * copy_word_size);
 557                        off += 4 * copy_word_size;
 558                } while (off < 0);
 559        }
 560
 561        if (pref_bias_copy_store) {
 562                pg_addiu(&buf, A2, A0, pref_bias_copy_store);
 563                uasm_l_copy_nopref(&l, buf);
 564                off = 0;
 565                do {
 566                        build_copy_load(&buf, T0, off);
 567                        build_copy_load(&buf, T1, off + copy_word_size);
 568                        build_copy_load(&buf, T2, off + 2 * copy_word_size);
 569                        build_copy_load(&buf, T3, off + 3 * copy_word_size);
 570                        build_copy_store(&buf, T0, off);
 571                        build_copy_store(&buf, T1, off + copy_word_size);
 572                        build_copy_store(&buf, T2, off + 2 * copy_word_size);
 573                        build_copy_store(&buf, T3, off + 3 * copy_word_size);
 574                        off += 4 * copy_word_size;
 575                } while (off < half_copy_loop_size);
 576                pg_addiu(&buf, A1, A1, 2 * off);
 577                pg_addiu(&buf, A0, A0, 2 * off);
 578                off = -off;
 579                do {
 580                        build_copy_load(&buf, T0, off);
 581                        build_copy_load(&buf, T1, off + copy_word_size);
 582                        build_copy_load(&buf, T2, off + 2 * copy_word_size);
 583                        build_copy_load(&buf, T3, off + 3 * copy_word_size);
 584                        build_copy_store(&buf, T0, off);
 585                        build_copy_store(&buf, T1, off + copy_word_size);
 586                        build_copy_store(&buf, T2, off + 2 * copy_word_size);
 587                        if (off == -(4 * copy_word_size))
 588                                uasm_il_bne(&buf, &r, A2, A0,
 589                                            label_copy_nopref);
 590                        build_copy_store(&buf, T3, off + 3 * copy_word_size);
 591                        off += 4 * copy_word_size;
 592                } while (off < 0);
 593        }
 594
 595        uasm_i_jr(&buf, RA);
 596        uasm_i_nop(&buf);
 597
 598        BUG_ON(buf > &__copy_page_end);
 599
 600        uasm_resolve_relocs(relocs, labels);
 601
 602        pr_debug("Synthesized copy page handler (%u instructions).\n",
 603                 (u32)(buf - &__copy_page_start));
 604
 605        pr_debug("\t.set push\n");
 606        pr_debug("\t.set noreorder\n");
 607        for (i = 0; i < (buf - &__copy_page_start); i++)
 608                pr_debug("\t.word 0x%08x\n", (&__copy_page_start)[i]);
 609        pr_debug("\t.set pop\n");
 610}
 611
 612#ifdef CONFIG_SIBYTE_DMA_PAGEOPS
 613extern void clear_page_cpu(void *page);
 614extern void copy_page_cpu(void *to, void *from);
 615
 616/*
 617 * Pad descriptors to cacheline, since each is exclusively owned by a
 618 * particular CPU.
 619 */
 620struct dmadscr {
 621        u64 dscr_a;
 622        u64 dscr_b;
 623        u64 pad_a;
 624        u64 pad_b;
 625} ____cacheline_aligned_in_smp page_descr[DM_NUM_CHANNELS];
 626
 627void sb1_dma_init(void)
 628{
 629        int i;
 630
 631        for (i = 0; i < DM_NUM_CHANNELS; i++) {
 632                const u64 base_val = CPHYSADDR((unsigned long)&page_descr[i]) |
 633                                     V_DM_DSCR_BASE_RINGSZ(1);
 634                void *base_reg = IOADDR(A_DM_REGISTER(i, R_DM_DSCR_BASE));
 635
 636                __raw_writeq(base_val, base_reg);
 637                __raw_writeq(base_val | M_DM_DSCR_BASE_RESET, base_reg);
 638                __raw_writeq(base_val | M_DM_DSCR_BASE_ENABL, base_reg);
 639        }
 640}
 641
 642void clear_page(void *page)
 643{
 644        u64 to_phys = CPHYSADDR((unsigned long)page);
 645        unsigned int cpu = smp_processor_id();
 646
 647        /* if the page is not in KSEG0, use old way */
 648        if ((long)KSEGX((unsigned long)page) != (long)CKSEG0)
 649                return clear_page_cpu(page);
 650
 651        page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_ZERO_MEM |
 652                                 M_DM_DSCRA_L2C_DEST | M_DM_DSCRA_INTERRUPT;
 653        page_descr[cpu].dscr_b = V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE);
 654        __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT)));
 655
 656        /*
 657         * Don't really want to do it this way, but there's no
 658         * reliable way to delay completion detection.
 659         */
 660        while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG)))
 661                 & M_DM_DSCR_BASE_INTERRUPT))
 662                ;
 663        __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
 664}
 665
 666void copy_page(void *to, void *from)
 667{
 668        u64 from_phys = CPHYSADDR((unsigned long)from);
 669        u64 to_phys = CPHYSADDR((unsigned long)to);
 670        unsigned int cpu = smp_processor_id();
 671
 672        /* if any page is not in KSEG0, use old way */
 673        if ((long)KSEGX((unsigned long)to) != (long)CKSEG0
 674            || (long)KSEGX((unsigned long)from) != (long)CKSEG0)
 675                return copy_page_cpu(to, from);
 676
 677        page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_L2C_DEST |
 678                                 M_DM_DSCRA_INTERRUPT;
 679        page_descr[cpu].dscr_b = from_phys | V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE);
 680        __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT)));
 681
 682        /*
 683         * Don't really want to do it this way, but there's no
 684         * reliable way to delay completion detection.
 685         */
 686        while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG)))
 687                 & M_DM_DSCR_BASE_INTERRUPT))
 688                ;
 689        __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
 690}
 691
 692#endif /* CONFIG_SIBYTE_DMA_PAGEOPS */
 693