linux/arch/mips/mm/page.c
<<
>>
Prefs
   1/*
   2 * This file is subject to the terms and conditions of the GNU General Public
   3 * License.  See the file "COPYING" in the main directory of this archive
   4 * for more details.
   5 *
   6 * Copyright (C) 2003, 04, 05 Ralf Baechle (ralf@linux-mips.org)
   7 * Copyright (C) 2007  Maciej W. Rozycki
   8 * Copyright (C) 2008  Thiemo Seufer
   9 */
  10#include <linux/init.h>
  11#include <linux/kernel.h>
  12#include <linux/sched.h>
  13#include <linux/smp.h>
  14#include <linux/mm.h>
  15#include <linux/module.h>
  16#include <linux/proc_fs.h>
  17
  18#include <asm/bugs.h>
  19#include <asm/cacheops.h>
  20#include <asm/inst.h>
  21#include <asm/io.h>
  22#include <asm/page.h>
  23#include <asm/pgtable.h>
  24#include <asm/prefetch.h>
  25#include <asm/system.h>
  26#include <asm/bootinfo.h>
  27#include <asm/mipsregs.h>
  28#include <asm/mmu_context.h>
  29#include <asm/cpu.h>
  30#include <asm/war.h>
  31
  32#ifdef CONFIG_SIBYTE_DMA_PAGEOPS
  33#include <asm/sibyte/sb1250.h>
  34#include <asm/sibyte/sb1250_regs.h>
  35#include <asm/sibyte/sb1250_dma.h>
  36#endif
  37
  38#include "uasm.h"
  39
  40/* Registers used in the assembled routines. */
  41#define ZERO 0
  42#define AT 2
  43#define A0 4
  44#define A1 5
  45#define A2 6
  46#define T0 8
  47#define T1 9
  48#define T2 10
  49#define T3 11
  50#define T9 25
  51#define RA 31
  52
  53/* Handle labels (which must be positive integers). */
  54enum label_id {
  55        label_clear_nopref = 1,
  56        label_clear_pref,
  57        label_copy_nopref,
  58        label_copy_pref_both,
  59        label_copy_pref_store,
  60};
  61
  62UASM_L_LA(_clear_nopref)
  63UASM_L_LA(_clear_pref)
  64UASM_L_LA(_copy_nopref)
  65UASM_L_LA(_copy_pref_both)
  66UASM_L_LA(_copy_pref_store)
  67
  68/* We need one branch and therefore one relocation per target label. */
  69static struct uasm_label __cpuinitdata labels[5];
  70static struct uasm_reloc __cpuinitdata relocs[5];
  71
  72#define cpu_is_r4600_v1_x()     ((read_c0_prid() & 0xfffffff0) == 0x00002010)
  73#define cpu_is_r4600_v2_x()     ((read_c0_prid() & 0xfffffff0) == 0x00002020)
  74
  75/*
  76 * Maximum sizes:
  77 *
  78 * R4000 128 bytes S-cache:             0x058 bytes
  79 * R4600 v1.7:                          0x05c bytes
  80 * R4600 v2.0:                          0x060 bytes
  81 * With prefetching, 16 word strides    0x120 bytes
  82 */
  83
  84static u32 clear_page_array[0x120 / 4];
  85
  86#ifdef CONFIG_SIBYTE_DMA_PAGEOPS
  87void clear_page_cpu(void *page) __attribute__((alias("clear_page_array")));
  88#else
  89void clear_page(void *page) __attribute__((alias("clear_page_array")));
  90#endif
  91
  92EXPORT_SYMBOL(clear_page);
  93
  94/*
  95 * Maximum sizes:
  96 *
  97 * R4000 128 bytes S-cache:             0x11c bytes
  98 * R4600 v1.7:                          0x080 bytes
  99 * R4600 v2.0:                          0x07c bytes
 100 * With prefetching, 16 word strides    0x540 bytes
 101 */
 102static u32 copy_page_array[0x540 / 4];
 103
 104#ifdef CONFIG_SIBYTE_DMA_PAGEOPS
 105void
 106copy_page_cpu(void *to, void *from) __attribute__((alias("copy_page_array")));
 107#else
 108void copy_page(void *to, void *from) __attribute__((alias("copy_page_array")));
 109#endif
 110
 111EXPORT_SYMBOL(copy_page);
 112
 113
 114static int pref_bias_clear_store __cpuinitdata;
 115static int pref_bias_copy_load __cpuinitdata;
 116static int pref_bias_copy_store __cpuinitdata;
 117
 118static u32 pref_src_mode __cpuinitdata;
 119static u32 pref_dst_mode __cpuinitdata;
 120
 121static int clear_word_size __cpuinitdata;
 122static int copy_word_size __cpuinitdata;
 123
 124static int half_clear_loop_size __cpuinitdata;
 125static int half_copy_loop_size __cpuinitdata;
 126
 127static int cache_line_size __cpuinitdata;
 128#define cache_line_mask() (cache_line_size - 1)
 129
 130static inline void __cpuinit
 131pg_addiu(u32 **buf, unsigned int reg1, unsigned int reg2, unsigned int off)
 132{
 133        if (cpu_has_64bit_gp_regs && DADDI_WAR && r4k_daddiu_bug()) {
 134                if (off > 0x7fff) {
 135                        uasm_i_lui(buf, T9, uasm_rel_hi(off));
 136                        uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off));
 137                } else
 138                        uasm_i_addiu(buf, T9, ZERO, off);
 139                uasm_i_daddu(buf, reg1, reg2, T9);
 140        } else {
 141                if (off > 0x7fff) {
 142                        uasm_i_lui(buf, T9, uasm_rel_hi(off));
 143                        uasm_i_addiu(buf, T9, T9, uasm_rel_lo(off));
 144                        UASM_i_ADDU(buf, reg1, reg2, T9);
 145                } else
 146                        UASM_i_ADDIU(buf, reg1, reg2, off);
 147        }
 148}
 149
 150static void __cpuinit set_prefetch_parameters(void)
 151{
 152        if (cpu_has_64bit_gp_regs || cpu_has_64bit_zero_reg)
 153                clear_word_size = 8;
 154        else
 155                clear_word_size = 4;
 156
 157        if (cpu_has_64bit_gp_regs)
 158                copy_word_size = 8;
 159        else
 160                copy_word_size = 4;
 161
 162        /*
 163         * The pref's used here are using "streaming" hints, which cause the
 164         * copied data to be kicked out of the cache sooner.  A page copy often
 165         * ends up copying a lot more data than is commonly used, so this seems
 166         * to make sense in terms of reducing cache pollution, but I've no real
 167         * performance data to back this up.
 168         */
 169        if (cpu_has_prefetch) {
 170                /*
 171                 * XXX: Most prefetch bias values in here are based on
 172                 * guesswork.
 173                 */
 174                cache_line_size = cpu_dcache_line_size();
 175                switch (current_cpu_type()) {
 176                case CPU_R5500:
 177                case CPU_TX49XX:
 178                        /* These processors only support the Pref_Load. */
 179                        pref_bias_copy_load = 256;
 180                        break;
 181
 182                case CPU_RM9000:
 183                        /*
 184                         * As a workaround for erratum G105 which make the
 185                         * PrepareForStore hint unusable we fall back to
 186                         * StoreRetained on the RM9000.  Once it is known which
 187                         * versions of the RM9000 we'll be able to condition-
 188                         * alize this.
 189                         */
 190
 191                case CPU_R10000:
 192                case CPU_R12000:
 193                case CPU_R14000:
 194                        /*
 195                         * Those values have been experimentally tuned for an
 196                         * Origin 200.
 197                         */
 198                        pref_bias_clear_store = 512;
 199                        pref_bias_copy_load = 256;
 200                        pref_bias_copy_store = 256;
 201                        pref_src_mode = Pref_LoadStreamed;
 202                        pref_dst_mode = Pref_StoreStreamed;
 203                        break;
 204
 205                case CPU_SB1:
 206                case CPU_SB1A:
 207                        pref_bias_clear_store = 128;
 208                        pref_bias_copy_load = 128;
 209                        pref_bias_copy_store = 128;
 210                        /*
 211                         * SB1 pass1 Pref_LoadStreamed/Pref_StoreStreamed
 212                         * hints are broken.
 213                         */
 214                        if (current_cpu_type() == CPU_SB1 &&
 215                            (current_cpu_data.processor_id & 0xff) < 0x02) {
 216                                pref_src_mode = Pref_Load;
 217                                pref_dst_mode = Pref_Store;
 218                        } else {
 219                                pref_src_mode = Pref_LoadStreamed;
 220                                pref_dst_mode = Pref_StoreStreamed;
 221                        }
 222                        break;
 223
 224                default:
 225                        pref_bias_clear_store = 128;
 226                        pref_bias_copy_load = 256;
 227                        pref_bias_copy_store = 128;
 228                        pref_src_mode = Pref_LoadStreamed;
 229                        pref_dst_mode = Pref_PrepareForStore;
 230                        break;
 231                }
 232        } else {
 233                if (cpu_has_cache_cdex_s)
 234                        cache_line_size = cpu_scache_line_size();
 235                else if (cpu_has_cache_cdex_p)
 236                        cache_line_size = cpu_dcache_line_size();
 237        }
 238        /*
 239         * Too much unrolling will overflow the available space in
 240         * clear_space_array / copy_page_array.
 241         */
 242        half_clear_loop_size = min(16 * clear_word_size,
 243                                   max(cache_line_size >> 1,
 244                                       4 * clear_word_size));
 245        half_copy_loop_size = min(16 * copy_word_size,
 246                                  max(cache_line_size >> 1,
 247                                      4 * copy_word_size));
 248}
 249
 250static void __cpuinit build_clear_store(u32 **buf, int off)
 251{
 252        if (cpu_has_64bit_gp_regs || cpu_has_64bit_zero_reg) {
 253                uasm_i_sd(buf, ZERO, off, A0);
 254        } else {
 255                uasm_i_sw(buf, ZERO, off, A0);
 256        }
 257}
 258
 259static inline void __cpuinit build_clear_pref(u32 **buf, int off)
 260{
 261        if (off & cache_line_mask())
 262                return;
 263
 264        if (pref_bias_clear_store) {
 265                uasm_i_pref(buf, pref_dst_mode, pref_bias_clear_store + off,
 266                            A0);
 267        } else if (cache_line_size == (half_clear_loop_size << 1)) {
 268                if (cpu_has_cache_cdex_s) {
 269                        uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0);
 270                } else if (cpu_has_cache_cdex_p) {
 271                        if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) {
 272                                uasm_i_nop(buf);
 273                                uasm_i_nop(buf);
 274                                uasm_i_nop(buf);
 275                                uasm_i_nop(buf);
 276                        }
 277
 278                        if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
 279                                uasm_i_lw(buf, ZERO, ZERO, AT);
 280
 281                        uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0);
 282                }
 283                }
 284}
 285
 286void __cpuinit build_clear_page(void)
 287{
 288        int off;
 289        u32 *buf = (u32 *)&clear_page_array;
 290        struct uasm_label *l = labels;
 291        struct uasm_reloc *r = relocs;
 292        int i;
 293
 294        memset(labels, 0, sizeof(labels));
 295        memset(relocs, 0, sizeof(relocs));
 296
 297        set_prefetch_parameters();
 298
 299        /*
 300         * This algorithm makes the following assumptions:
 301         *   - The prefetch bias is a multiple of 2 words.
 302         *   - The prefetch bias is less than one page.
 303         */
 304        BUG_ON(pref_bias_clear_store % (2 * clear_word_size));
 305        BUG_ON(PAGE_SIZE < pref_bias_clear_store);
 306
 307        off = PAGE_SIZE - pref_bias_clear_store;
 308        if (off > 0xffff || !pref_bias_clear_store)
 309                pg_addiu(&buf, A2, A0, off);
 310        else
 311                uasm_i_ori(&buf, A2, A0, off);
 312
 313        if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
 314                uasm_i_lui(&buf, AT, 0xa000);
 315
 316        off = cache_line_size ? min(8, pref_bias_clear_store / cache_line_size)
 317                                * cache_line_size : 0;
 318        while (off) {
 319                build_clear_pref(&buf, -off);
 320                off -= cache_line_size;
 321        }
 322        uasm_l_clear_pref(&l, buf);
 323        do {
 324                build_clear_pref(&buf, off);
 325                build_clear_store(&buf, off);
 326                off += clear_word_size;
 327        } while (off < half_clear_loop_size);
 328        pg_addiu(&buf, A0, A0, 2 * off);
 329        off = -off;
 330        do {
 331                build_clear_pref(&buf, off);
 332                if (off == -clear_word_size)
 333                        uasm_il_bne(&buf, &r, A0, A2, label_clear_pref);
 334                build_clear_store(&buf, off);
 335                off += clear_word_size;
 336        } while (off < 0);
 337
 338        if (pref_bias_clear_store) {
 339                pg_addiu(&buf, A2, A0, pref_bias_clear_store);
 340                uasm_l_clear_nopref(&l, buf);
 341                off = 0;
 342                do {
 343                        build_clear_store(&buf, off);
 344                        off += clear_word_size;
 345                } while (off < half_clear_loop_size);
 346                pg_addiu(&buf, A0, A0, 2 * off);
 347                off = -off;
 348                do {
 349                        if (off == -clear_word_size)
 350                                uasm_il_bne(&buf, &r, A0, A2,
 351                                            label_clear_nopref);
 352                        build_clear_store(&buf, off);
 353                        off += clear_word_size;
 354                } while (off < 0);
 355        }
 356
 357        uasm_i_jr(&buf, RA);
 358        uasm_i_nop(&buf);
 359
 360        BUG_ON(buf > clear_page_array + ARRAY_SIZE(clear_page_array));
 361
 362        uasm_resolve_relocs(relocs, labels);
 363
 364        pr_debug("Synthesized clear page handler (%u instructions).\n",
 365                 (u32)(buf - clear_page_array));
 366
 367        pr_debug("\t.set push\n");
 368        pr_debug("\t.set noreorder\n");
 369        for (i = 0; i < (buf - clear_page_array); i++)
 370                pr_debug("\t.word 0x%08x\n", clear_page_array[i]);
 371        pr_debug("\t.set pop\n");
 372}
 373
 374static void __cpuinit build_copy_load(u32 **buf, int reg, int off)
 375{
 376        if (cpu_has_64bit_gp_regs) {
 377                uasm_i_ld(buf, reg, off, A1);
 378        } else {
 379                uasm_i_lw(buf, reg, off, A1);
 380        }
 381}
 382
 383static void __cpuinit build_copy_store(u32 **buf, int reg, int off)
 384{
 385        if (cpu_has_64bit_gp_regs) {
 386                uasm_i_sd(buf, reg, off, A0);
 387        } else {
 388                uasm_i_sw(buf, reg, off, A0);
 389        }
 390}
 391
 392static inline void build_copy_load_pref(u32 **buf, int off)
 393{
 394        if (off & cache_line_mask())
 395                return;
 396
 397        if (pref_bias_copy_load)
 398                uasm_i_pref(buf, pref_src_mode, pref_bias_copy_load + off, A1);
 399}
 400
 401static inline void build_copy_store_pref(u32 **buf, int off)
 402{
 403        if (off & cache_line_mask())
 404                return;
 405
 406        if (pref_bias_copy_store) {
 407                uasm_i_pref(buf, pref_dst_mode, pref_bias_copy_store + off,
 408                            A0);
 409        } else if (cache_line_size == (half_copy_loop_size << 1)) {
 410                if (cpu_has_cache_cdex_s) {
 411                        uasm_i_cache(buf, Create_Dirty_Excl_SD, off, A0);
 412                } else if (cpu_has_cache_cdex_p) {
 413                        if (R4600_V1_HIT_CACHEOP_WAR && cpu_is_r4600_v1_x()) {
 414                                uasm_i_nop(buf);
 415                                uasm_i_nop(buf);
 416                                uasm_i_nop(buf);
 417                                uasm_i_nop(buf);
 418                        }
 419
 420                        if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
 421                                uasm_i_lw(buf, ZERO, ZERO, AT);
 422
 423                        uasm_i_cache(buf, Create_Dirty_Excl_D, off, A0);
 424                }
 425        }
 426}
 427
 428void __cpuinit build_copy_page(void)
 429{
 430        int off;
 431        u32 *buf = (u32 *)&copy_page_array;
 432        struct uasm_label *l = labels;
 433        struct uasm_reloc *r = relocs;
 434        int i;
 435
 436        memset(labels, 0, sizeof(labels));
 437        memset(relocs, 0, sizeof(relocs));
 438
 439        set_prefetch_parameters();
 440
 441        /*
 442         * This algorithm makes the following assumptions:
 443         *   - All prefetch biases are multiples of 8 words.
 444         *   - The prefetch biases are less than one page.
 445         *   - The store prefetch bias isn't greater than the load
 446         *     prefetch bias.
 447         */
 448        BUG_ON(pref_bias_copy_load % (8 * copy_word_size));
 449        BUG_ON(pref_bias_copy_store % (8 * copy_word_size));
 450        BUG_ON(PAGE_SIZE < pref_bias_copy_load);
 451        BUG_ON(pref_bias_copy_store > pref_bias_copy_load);
 452
 453        off = PAGE_SIZE - pref_bias_copy_load;
 454        if (off > 0xffff || !pref_bias_copy_load)
 455                pg_addiu(&buf, A2, A0, off);
 456        else
 457                uasm_i_ori(&buf, A2, A0, off);
 458
 459        if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
 460                uasm_i_lui(&buf, AT, 0xa000);
 461
 462        off = cache_line_size ? min(8, pref_bias_copy_load / cache_line_size) *
 463                                cache_line_size : 0;
 464        while (off) {
 465                build_copy_load_pref(&buf, -off);
 466                off -= cache_line_size;
 467        }
 468        off = cache_line_size ? min(8, pref_bias_copy_store / cache_line_size) *
 469                                cache_line_size : 0;
 470        while (off) {
 471                build_copy_store_pref(&buf, -off);
 472                off -= cache_line_size;
 473        }
 474        uasm_l_copy_pref_both(&l, buf);
 475        do {
 476                build_copy_load_pref(&buf, off);
 477                build_copy_load(&buf, T0, off);
 478                build_copy_load_pref(&buf, off + copy_word_size);
 479                build_copy_load(&buf, T1, off + copy_word_size);
 480                build_copy_load_pref(&buf, off + 2 * copy_word_size);
 481                build_copy_load(&buf, T2, off + 2 * copy_word_size);
 482                build_copy_load_pref(&buf, off + 3 * copy_word_size);
 483                build_copy_load(&buf, T3, off + 3 * copy_word_size);
 484                build_copy_store_pref(&buf, off);
 485                build_copy_store(&buf, T0, off);
 486                build_copy_store_pref(&buf, off + copy_word_size);
 487                build_copy_store(&buf, T1, off + copy_word_size);
 488                build_copy_store_pref(&buf, off + 2 * copy_word_size);
 489                build_copy_store(&buf, T2, off + 2 * copy_word_size);
 490                build_copy_store_pref(&buf, off + 3 * copy_word_size);
 491                build_copy_store(&buf, T3, off + 3 * copy_word_size);
 492                off += 4 * copy_word_size;
 493        } while (off < half_copy_loop_size);
 494        pg_addiu(&buf, A1, A1, 2 * off);
 495        pg_addiu(&buf, A0, A0, 2 * off);
 496        off = -off;
 497        do {
 498                build_copy_load_pref(&buf, off);
 499                build_copy_load(&buf, T0, off);
 500                build_copy_load_pref(&buf, off + copy_word_size);
 501                build_copy_load(&buf, T1, off + copy_word_size);
 502                build_copy_load_pref(&buf, off + 2 * copy_word_size);
 503                build_copy_load(&buf, T2, off + 2 * copy_word_size);
 504                build_copy_load_pref(&buf, off + 3 * copy_word_size);
 505                build_copy_load(&buf, T3, off + 3 * copy_word_size);
 506                build_copy_store_pref(&buf, off);
 507                build_copy_store(&buf, T0, off);
 508                build_copy_store_pref(&buf, off + copy_word_size);
 509                build_copy_store(&buf, T1, off + copy_word_size);
 510                build_copy_store_pref(&buf, off + 2 * copy_word_size);
 511                build_copy_store(&buf, T2, off + 2 * copy_word_size);
 512                build_copy_store_pref(&buf, off + 3 * copy_word_size);
 513                if (off == -(4 * copy_word_size))
 514                        uasm_il_bne(&buf, &r, A2, A0, label_copy_pref_both);
 515                build_copy_store(&buf, T3, off + 3 * copy_word_size);
 516                off += 4 * copy_word_size;
 517        } while (off < 0);
 518
 519        if (pref_bias_copy_load - pref_bias_copy_store) {
 520                pg_addiu(&buf, A2, A0,
 521                         pref_bias_copy_load - pref_bias_copy_store);
 522                uasm_l_copy_pref_store(&l, buf);
 523                off = 0;
 524                do {
 525                        build_copy_load(&buf, T0, off);
 526                        build_copy_load(&buf, T1, off + copy_word_size);
 527                        build_copy_load(&buf, T2, off + 2 * copy_word_size);
 528                        build_copy_load(&buf, T3, off + 3 * copy_word_size);
 529                        build_copy_store_pref(&buf, off);
 530                        build_copy_store(&buf, T0, off);
 531                        build_copy_store_pref(&buf, off + copy_word_size);
 532                        build_copy_store(&buf, T1, off + copy_word_size);
 533                        build_copy_store_pref(&buf, off + 2 * copy_word_size);
 534                        build_copy_store(&buf, T2, off + 2 * copy_word_size);
 535                        build_copy_store_pref(&buf, off + 3 * copy_word_size);
 536                        build_copy_store(&buf, T3, off + 3 * copy_word_size);
 537                        off += 4 * copy_word_size;
 538                } while (off < half_copy_loop_size);
 539                pg_addiu(&buf, A1, A1, 2 * off);
 540                pg_addiu(&buf, A0, A0, 2 * off);
 541                off = -off;
 542                do {
 543                        build_copy_load(&buf, T0, off);
 544                        build_copy_load(&buf, T1, off + copy_word_size);
 545                        build_copy_load(&buf, T2, off + 2 * copy_word_size);
 546                        build_copy_load(&buf, T3, off + 3 * copy_word_size);
 547                        build_copy_store_pref(&buf, off);
 548                        build_copy_store(&buf, T0, off);
 549                        build_copy_store_pref(&buf, off + copy_word_size);
 550                        build_copy_store(&buf, T1, off + copy_word_size);
 551                        build_copy_store_pref(&buf, off + 2 * copy_word_size);
 552                        build_copy_store(&buf, T2, off + 2 * copy_word_size);
 553                        build_copy_store_pref(&buf, off + 3 * copy_word_size);
 554                        if (off == -(4 * copy_word_size))
 555                                uasm_il_bne(&buf, &r, A2, A0,
 556                                            label_copy_pref_store);
 557                        build_copy_store(&buf, T3, off + 3 * copy_word_size);
 558                        off += 4 * copy_word_size;
 559                } while (off < 0);
 560        }
 561
 562        if (pref_bias_copy_store) {
 563                pg_addiu(&buf, A2, A0, pref_bias_copy_store);
 564                uasm_l_copy_nopref(&l, buf);
 565                off = 0;
 566                do {
 567                        build_copy_load(&buf, T0, off);
 568                        build_copy_load(&buf, T1, off + copy_word_size);
 569                        build_copy_load(&buf, T2, off + 2 * copy_word_size);
 570                        build_copy_load(&buf, T3, off + 3 * copy_word_size);
 571                        build_copy_store(&buf, T0, off);
 572                        build_copy_store(&buf, T1, off + copy_word_size);
 573                        build_copy_store(&buf, T2, off + 2 * copy_word_size);
 574                        build_copy_store(&buf, T3, off + 3 * copy_word_size);
 575                        off += 4 * copy_word_size;
 576                } while (off < half_copy_loop_size);
 577                pg_addiu(&buf, A1, A1, 2 * off);
 578                pg_addiu(&buf, A0, A0, 2 * off);
 579                off = -off;
 580                do {
 581                        build_copy_load(&buf, T0, off);
 582                        build_copy_load(&buf, T1, off + copy_word_size);
 583                        build_copy_load(&buf, T2, off + 2 * copy_word_size);
 584                        build_copy_load(&buf, T3, off + 3 * copy_word_size);
 585                        build_copy_store(&buf, T0, off);
 586                        build_copy_store(&buf, T1, off + copy_word_size);
 587                        build_copy_store(&buf, T2, off + 2 * copy_word_size);
 588                        if (off == -(4 * copy_word_size))
 589                                uasm_il_bne(&buf, &r, A2, A0,
 590                                            label_copy_nopref);
 591                        build_copy_store(&buf, T3, off + 3 * copy_word_size);
 592                        off += 4 * copy_word_size;
 593                } while (off < 0);
 594        }
 595
 596        uasm_i_jr(&buf, RA);
 597        uasm_i_nop(&buf);
 598
 599        BUG_ON(buf > copy_page_array + ARRAY_SIZE(copy_page_array));
 600
 601        uasm_resolve_relocs(relocs, labels);
 602
 603        pr_debug("Synthesized copy page handler (%u instructions).\n",
 604                 (u32)(buf - copy_page_array));
 605
 606        pr_debug("\t.set push\n");
 607        pr_debug("\t.set noreorder\n");
 608        for (i = 0; i < (buf - copy_page_array); i++)
 609                pr_debug("\t.word 0x%08x\n", copy_page_array[i]);
 610        pr_debug("\t.set pop\n");
 611}
 612
 613#ifdef CONFIG_SIBYTE_DMA_PAGEOPS
 614
 615/*
 616 * Pad descriptors to cacheline, since each is exclusively owned by a
 617 * particular CPU.
 618 */
 619struct dmadscr {
 620        u64 dscr_a;
 621        u64 dscr_b;
 622        u64 pad_a;
 623        u64 pad_b;
 624} ____cacheline_aligned_in_smp page_descr[DM_NUM_CHANNELS];
 625
 626void sb1_dma_init(void)
 627{
 628        int i;
 629
 630        for (i = 0; i < DM_NUM_CHANNELS; i++) {
 631                const u64 base_val = CPHYSADDR((unsigned long)&page_descr[i]) |
 632                                     V_DM_DSCR_BASE_RINGSZ(1);
 633                void *base_reg = IOADDR(A_DM_REGISTER(i, R_DM_DSCR_BASE));
 634
 635                __raw_writeq(base_val, base_reg);
 636                __raw_writeq(base_val | M_DM_DSCR_BASE_RESET, base_reg);
 637                __raw_writeq(base_val | M_DM_DSCR_BASE_ENABL, base_reg);
 638        }
 639}
 640
 641void clear_page(void *page)
 642{
 643        u64 to_phys = CPHYSADDR((unsigned long)page);
 644        unsigned int cpu = smp_processor_id();
 645
 646        /* if the page is not in KSEG0, use old way */
 647        if ((long)KSEGX((unsigned long)page) != (long)CKSEG0)
 648                return clear_page_cpu(page);
 649
 650        page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_ZERO_MEM |
 651                                 M_DM_DSCRA_L2C_DEST | M_DM_DSCRA_INTERRUPT;
 652        page_descr[cpu].dscr_b = V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE);
 653        __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT)));
 654
 655        /*
 656         * Don't really want to do it this way, but there's no
 657         * reliable way to delay completion detection.
 658         */
 659        while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG)))
 660                 & M_DM_DSCR_BASE_INTERRUPT))
 661                ;
 662        __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
 663}
 664
 665void copy_page(void *to, void *from)
 666{
 667        u64 from_phys = CPHYSADDR((unsigned long)from);
 668        u64 to_phys = CPHYSADDR((unsigned long)to);
 669        unsigned int cpu = smp_processor_id();
 670
 671        /* if any page is not in KSEG0, use old way */
 672        if ((long)KSEGX((unsigned long)to) != (long)CKSEG0
 673            || (long)KSEGX((unsigned long)from) != (long)CKSEG0)
 674                return copy_page_cpu(to, from);
 675
 676        page_descr[cpu].dscr_a = to_phys | M_DM_DSCRA_L2C_DEST |
 677                                 M_DM_DSCRA_INTERRUPT;
 678        page_descr[cpu].dscr_b = from_phys | V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE);
 679        __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT)));
 680
 681        /*
 682         * Don't really want to do it this way, but there's no
 683         * reliable way to delay completion detection.
 684         */
 685        while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG)))
 686                 & M_DM_DSCR_BASE_INTERRUPT))
 687                ;
 688        __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
 689}
 690
 691#endif /* CONFIG_SIBYTE_DMA_PAGEOPS */
 692