linux/arch/sh/mm/cache-sh4.c
<<
>>
Prefs
   1/*
   2 * arch/sh/mm/cache-sh4.c
   3 *
   4 * Copyright (C) 1999, 2000, 2002  Niibe Yutaka
   5 * Copyright (C) 2001 - 2007  Paul Mundt
   6 * Copyright (C) 2003  Richard Curnow
   7 * Copyright (c) 2007 STMicroelectronics (R&D) Ltd.
   8 *
   9 * This file is subject to the terms and conditions of the GNU General Public
  10 * License.  See the file "COPYING" in the main directory of this archive
  11 * for more details.
  12 */
  13#include <linux/init.h>
  14#include <linux/mm.h>
  15#include <linux/io.h>
  16#include <linux/mutex.h>
  17#include <linux/fs.h>
  18#include <asm/mmu_context.h>
  19#include <asm/cacheflush.h>
  20
  21/*
  22 * The maximum number of pages we support up to when doing ranged dcache
  23 * flushing. Anything exceeding this will simply flush the dcache in its
  24 * entirety.
  25 */
  26#define MAX_DCACHE_PAGES        64      /* XXX: Tune for ways */
  27#define MAX_ICACHE_PAGES        32
  28
  29static void __flush_cache_one(unsigned long addr, unsigned long phys,
  30                               unsigned long exec_offset);
  31
  32/*
  33 * This is initialised here to ensure that it is not placed in the BSS.  If
  34 * that were to happen, note that cache_init gets called before the BSS is
  35 * cleared, so this would get nulled out which would be hopeless.
  36 */
  37static void (*__flush_dcache_segment_fn)(unsigned long, unsigned long) =
  38        (void (*)(unsigned long, unsigned long))0xdeadbeef;
  39
  40/*
  41 * Write back the range of D-cache, and purge the I-cache.
  42 *
  43 * Called from kernel/module.c:sys_init_module and routine for a.out format,
  44 * signal handler code and kprobes code
  45 */
  46static void __uses_jump_to_uncached sh4_flush_icache_range(void *args)
  47{
  48        struct flusher_data *data = args;
  49        unsigned long start, end;
  50        unsigned long flags, v;
  51        int i;
  52
  53        start = data->addr1;
  54        end = data->addr2;
  55
  56        /* If there are too many pages then just blow away the caches */
  57        if (((end - start) >> PAGE_SHIFT) >= MAX_ICACHE_PAGES) {
  58                local_flush_cache_all(NULL);
  59                return;
  60        }
  61
  62        /*
  63         * Selectively flush d-cache then invalidate the i-cache.
  64         * This is inefficient, so only use this for small ranges.
  65         */
  66        start &= ~(L1_CACHE_BYTES-1);
  67        end += L1_CACHE_BYTES-1;
  68        end &= ~(L1_CACHE_BYTES-1);
  69
  70        local_irq_save(flags);
  71        jump_to_uncached();
  72
  73        for (v = start; v < end; v += L1_CACHE_BYTES) {
  74                unsigned long icacheaddr;
  75                int j, n;
  76
  77                __ocbwb(v);
  78
  79                icacheaddr = CACHE_IC_ADDRESS_ARRAY | (v &
  80                                cpu_data->icache.entry_mask);
  81
  82                /* Clear i-cache line valid-bit */
  83                n = boot_cpu_data.icache.n_aliases;
  84                for (i = 0; i < cpu_data->icache.ways; i++) {
  85                        for (j = 0; j < n; j++)
  86                                __raw_writel(0, icacheaddr + (j * PAGE_SIZE));
  87                        icacheaddr += cpu_data->icache.way_incr;
  88                }
  89        }
  90
  91        back_to_cached();
  92        local_irq_restore(flags);
  93}
  94
  95static inline void flush_cache_one(unsigned long start, unsigned long phys)
  96{
  97        unsigned long flags, exec_offset = 0;
  98
  99        /*
 100         * All types of SH-4 require PC to be in P2 to operate on the I-cache.
 101         * Some types of SH-4 require PC to be in P2 to operate on the D-cache.
 102         */
 103        if ((boot_cpu_data.flags & CPU_HAS_P2_FLUSH_BUG) ||
 104            (start < CACHE_OC_ADDRESS_ARRAY))
 105                exec_offset = 0x20000000;
 106
 107        local_irq_save(flags);
 108        __flush_cache_one(start | SH_CACHE_ASSOC, P1SEGADDR(phys), exec_offset);
 109        local_irq_restore(flags);
 110}
 111
 112/*
 113 * Write back & invalidate the D-cache of the page.
 114 * (To avoid "alias" issues)
 115 */
 116static void sh4_flush_dcache_page(void *arg)
 117{
 118        struct page *page = arg;
 119#ifndef CONFIG_SMP
 120        struct address_space *mapping = page_mapping(page);
 121
 122        if (mapping && !mapping_mapped(mapping))
 123                set_bit(PG_dcache_dirty, &page->flags);
 124        else
 125#endif
 126        {
 127                unsigned long phys = PHYSADDR(page_address(page));
 128                unsigned long addr = CACHE_OC_ADDRESS_ARRAY;
 129                int i, n;
 130
 131                /* Loop all the D-cache */
 132                n = boot_cpu_data.dcache.n_aliases;
 133                for (i = 0; i < n; i++, addr += PAGE_SIZE)
 134                        flush_cache_one(addr, phys);
 135        }
 136
 137        wmb();
 138}
 139
 140/* TODO: Selective icache invalidation through IC address array.. */
 141static void __uses_jump_to_uncached flush_icache_all(void)
 142{
 143        unsigned long flags, ccr;
 144
 145        local_irq_save(flags);
 146        jump_to_uncached();
 147
 148        /* Flush I-cache */
 149        ccr = ctrl_inl(CCR);
 150        ccr |= CCR_CACHE_ICI;
 151        ctrl_outl(ccr, CCR);
 152
 153        /*
 154         * back_to_cached() will take care of the barrier for us, don't add
 155         * another one!
 156         */
 157
 158        back_to_cached();
 159        local_irq_restore(flags);
 160}
 161
 162static inline void flush_dcache_all(void)
 163{
 164        (*__flush_dcache_segment_fn)(0UL, boot_cpu_data.dcache.way_size);
 165        wmb();
 166}
 167
 168static void sh4_flush_cache_all(void *unused)
 169{
 170        flush_dcache_all();
 171        flush_icache_all();
 172}
 173
 174static void __flush_cache_mm(struct mm_struct *mm, unsigned long start,
 175                             unsigned long end)
 176{
 177        unsigned long d = 0, p = start & PAGE_MASK;
 178        unsigned long alias_mask = boot_cpu_data.dcache.alias_mask;
 179        unsigned long n_aliases = boot_cpu_data.dcache.n_aliases;
 180        unsigned long select_bit;
 181        unsigned long all_aliases_mask;
 182        unsigned long addr_offset;
 183        pgd_t *dir;
 184        pmd_t *pmd;
 185        pud_t *pud;
 186        pte_t *pte;
 187        int i;
 188
 189        dir = pgd_offset(mm, p);
 190        pud = pud_offset(dir, p);
 191        pmd = pmd_offset(pud, p);
 192        end = PAGE_ALIGN(end);
 193
 194        all_aliases_mask = (1 << n_aliases) - 1;
 195
 196        do {
 197                if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) {
 198                        p &= PMD_MASK;
 199                        p += PMD_SIZE;
 200                        pmd++;
 201
 202                        continue;
 203                }
 204
 205                pte = pte_offset_kernel(pmd, p);
 206
 207                do {
 208                        unsigned long phys;
 209                        pte_t entry = *pte;
 210
 211                        if (!(pte_val(entry) & _PAGE_PRESENT)) {
 212                                pte++;
 213                                p += PAGE_SIZE;
 214                                continue;
 215                        }
 216
 217                        phys = pte_val(entry) & PTE_PHYS_MASK;
 218
 219                        if ((p ^ phys) & alias_mask) {
 220                                d |= 1 << ((p & alias_mask) >> PAGE_SHIFT);
 221                                d |= 1 << ((phys & alias_mask) >> PAGE_SHIFT);
 222
 223                                if (d == all_aliases_mask)
 224                                        goto loop_exit;
 225                        }
 226
 227                        pte++;
 228                        p += PAGE_SIZE;
 229                } while (p < end && ((unsigned long)pte & ~PAGE_MASK));
 230                pmd++;
 231        } while (p < end);
 232
 233loop_exit:
 234        addr_offset = 0;
 235        select_bit = 1;
 236
 237        for (i = 0; i < n_aliases; i++) {
 238                if (d & select_bit) {
 239                        (*__flush_dcache_segment_fn)(addr_offset, PAGE_SIZE);
 240                        wmb();
 241                }
 242
 243                select_bit <<= 1;
 244                addr_offset += PAGE_SIZE;
 245        }
 246}
 247
 248/*
 249 * Note : (RPC) since the caches are physically tagged, the only point
 250 * of flush_cache_mm for SH-4 is to get rid of aliases from the
 251 * D-cache.  The assumption elsewhere, e.g. flush_cache_range, is that
 252 * lines can stay resident so long as the virtual address they were
 253 * accessed with (hence cache set) is in accord with the physical
 254 * address (i.e. tag).  It's no different here.  So I reckon we don't
 255 * need to flush the I-cache, since aliases don't matter for that.  We
 256 * should try that.
 257 *
 258 * Caller takes mm->mmap_sem.
 259 */
 260static void sh4_flush_cache_mm(void *arg)
 261{
 262        struct mm_struct *mm = arg;
 263
 264        if (cpu_context(smp_processor_id(), mm) == NO_CONTEXT)
 265                return;
 266
 267        /*
 268         * If cache is only 4k-per-way, there are never any 'aliases'.  Since
 269         * the cache is physically tagged, the data can just be left in there.
 270         */
 271        if (boot_cpu_data.dcache.n_aliases == 0)
 272                return;
 273
 274        /*
 275         * Don't bother groveling around the dcache for the VMA ranges
 276         * if there are too many PTEs to make it worthwhile.
 277         */
 278        if (mm->nr_ptes >= MAX_DCACHE_PAGES)
 279                flush_dcache_all();
 280        else {
 281                struct vm_area_struct *vma;
 282
 283                /*
 284                 * In this case there are reasonably sized ranges to flush,
 285                 * iterate through the VMA list and take care of any aliases.
 286                 */
 287                for (vma = mm->mmap; vma; vma = vma->vm_next)
 288                        __flush_cache_mm(mm, vma->vm_start, vma->vm_end);
 289        }
 290
 291        /* Only touch the icache if one of the VMAs has VM_EXEC set. */
 292        if (mm->exec_vm)
 293                flush_icache_all();
 294}
 295
 296/*
 297 * Write back and invalidate I/D-caches for the page.
 298 *
 299 * ADDR: Virtual Address (U0 address)
 300 * PFN: Physical page number
 301 */
 302static void sh4_flush_cache_page(void *args)
 303{
 304        struct flusher_data *data = args;
 305        struct vm_area_struct *vma;
 306        unsigned long address, pfn, phys;
 307        unsigned int alias_mask;
 308
 309        vma = data->vma;
 310        address = data->addr1;
 311        pfn = data->addr2;
 312        phys = pfn << PAGE_SHIFT;
 313
 314        if (cpu_context(smp_processor_id(), vma->vm_mm) == NO_CONTEXT)
 315                return;
 316
 317        alias_mask = boot_cpu_data.dcache.alias_mask;
 318
 319        /* We only need to flush D-cache when we have alias */
 320        if ((address^phys) & alias_mask) {
 321                /* Loop 4K of the D-cache */
 322                flush_cache_one(
 323                        CACHE_OC_ADDRESS_ARRAY | (address & alias_mask),
 324                        phys);
 325                /* Loop another 4K of the D-cache */
 326                flush_cache_one(
 327                        CACHE_OC_ADDRESS_ARRAY | (phys & alias_mask),
 328                        phys);
 329        }
 330
 331        alias_mask = boot_cpu_data.icache.alias_mask;
 332        if (vma->vm_flags & VM_EXEC) {
 333                /*
 334                 * Evict entries from the portion of the cache from which code
 335                 * may have been executed at this address (virtual).  There's
 336                 * no need to evict from the portion corresponding to the
 337                 * physical address as for the D-cache, because we know the
 338                 * kernel has never executed the code through its identity
 339                 * translation.
 340                 */
 341                flush_cache_one(
 342                        CACHE_IC_ADDRESS_ARRAY | (address & alias_mask),
 343                        phys);
 344        }
 345}
 346
 347/*
 348 * Write back and invalidate D-caches.
 349 *
 350 * START, END: Virtual Address (U0 address)
 351 *
 352 * NOTE: We need to flush the _physical_ page entry.
 353 * Flushing the cache lines for U0 only isn't enough.
 354 * We need to flush for P1 too, which may contain aliases.
 355 */
 356static void sh4_flush_cache_range(void *args)
 357{
 358        struct flusher_data *data = args;
 359        struct vm_area_struct *vma;
 360        unsigned long start, end;
 361
 362        vma = data->vma;
 363        start = data->addr1;
 364        end = data->addr2;
 365
 366        if (cpu_context(smp_processor_id(), vma->vm_mm) == NO_CONTEXT)
 367                return;
 368
 369        /*
 370         * If cache is only 4k-per-way, there are never any 'aliases'.  Since
 371         * the cache is physically tagged, the data can just be left in there.
 372         */
 373        if (boot_cpu_data.dcache.n_aliases == 0)
 374                return;
 375
 376        /*
 377         * Don't bother with the lookup and alias check if we have a
 378         * wide range to cover, just blow away the dcache in its
 379         * entirety instead. -- PFM.
 380         */
 381        if (((end - start) >> PAGE_SHIFT) >= MAX_DCACHE_PAGES)
 382                flush_dcache_all();
 383        else
 384                __flush_cache_mm(vma->vm_mm, start, end);
 385
 386        if (vma->vm_flags & VM_EXEC) {
 387                /*
 388                 * TODO: Is this required???  Need to look at how I-cache
 389                 * coherency is assured when new programs are loaded to see if
 390                 * this matters.
 391                 */
 392                flush_icache_all();
 393        }
 394}
 395
 396/**
 397 * __flush_cache_one
 398 *
 399 * @addr:  address in memory mapped cache array
 400 * @phys:  P1 address to flush (has to match tags if addr has 'A' bit
 401 *         set i.e. associative write)
 402 * @exec_offset: set to 0x20000000 if flush has to be executed from P2
 403 *               region else 0x0
 404 *
 405 * The offset into the cache array implied by 'addr' selects the
 406 * 'colour' of the virtual address range that will be flushed.  The
 407 * operation (purge/write-back) is selected by the lower 2 bits of
 408 * 'phys'.
 409 */
 410static void __flush_cache_one(unsigned long addr, unsigned long phys,
 411                               unsigned long exec_offset)
 412{
 413        int way_count;
 414        unsigned long base_addr = addr;
 415        struct cache_info *dcache;
 416        unsigned long way_incr;
 417        unsigned long a, ea, p;
 418        unsigned long temp_pc;
 419
 420        dcache = &boot_cpu_data.dcache;
 421        /* Write this way for better assembly. */
 422        way_count = dcache->ways;
 423        way_incr = dcache->way_incr;
 424
 425        /*
 426         * Apply exec_offset (i.e. branch to P2 if required.).
 427         *
 428         * FIXME:
 429         *
 430         *      If I write "=r" for the (temp_pc), it puts this in r6 hence
 431         *      trashing exec_offset before it's been added on - why?  Hence
 432         *      "=&r" as a 'workaround'
 433         */
 434        asm volatile("mov.l 1f, %0\n\t"
 435                     "add   %1, %0\n\t"
 436                     "jmp   @%0\n\t"
 437                     "nop\n\t"
 438                     ".balign 4\n\t"
 439                     "1:  .long 2f\n\t"
 440                     "2:\n" : "=&r" (temp_pc) : "r" (exec_offset));
 441
 442        /*
 443         * We know there will be >=1 iteration, so write as do-while to avoid
 444         * pointless nead-of-loop check for 0 iterations.
 445         */
 446        do {
 447                ea = base_addr + PAGE_SIZE;
 448                a = base_addr;
 449                p = phys;
 450
 451                do {
 452                        *(volatile unsigned long *)a = p;
 453                        /*
 454                         * Next line: intentionally not p+32, saves an add, p
 455                         * will do since only the cache tag bits need to
 456                         * match.
 457                         */
 458                        *(volatile unsigned long *)(a+32) = p;
 459                        a += 64;
 460                        p += 64;
 461                } while (a < ea);
 462
 463                base_addr += way_incr;
 464        } while (--way_count != 0);
 465}
 466
 467/*
 468 * Break the 1, 2 and 4 way variants of this out into separate functions to
 469 * avoid nearly all the overhead of having the conditional stuff in the function
 470 * bodies (+ the 1 and 2 way cases avoid saving any registers too).
 471 *
 472 * We want to eliminate unnecessary bus transactions, so this code uses
 473 * a non-obvious technique.
 474 *
 475 * Loop over a cache way sized block of, one cache line at a time. For each
 476 * line, use movca.a to cause the current cache line contents to be written
 477 * back, but without reading anything from main memory. However this has the
 478 * side effect that the cache is now caching that memory location. So follow
 479 * this with a cache invalidate to mark the cache line invalid. And do all
 480 * this with interrupts disabled, to avoid the cache line being accidently
 481 * evicted while it is holding garbage.
 482 *
 483 * This also breaks in a number of circumstances:
 484 * - if there are modifications to the region of memory just above
 485 *   empty_zero_page (for example because a breakpoint has been placed
 486 *   there), then these can be lost.
 487 *
 488 *   This is because the the memory address which the cache temporarily
 489 *   caches in the above description is empty_zero_page. So the
 490 *   movca.l hits the cache (it is assumed that it misses, or at least
 491 *   isn't dirty), modifies the line and then invalidates it, losing the
 492 *   required change.
 493 *
 494 * - If caches are disabled or configured in write-through mode, then
 495 *   the movca.l writes garbage directly into memory.
 496 */
 497static void __flush_dcache_segment_writethrough(unsigned long start,
 498                                                unsigned long extent_per_way)
 499{
 500        unsigned long addr;
 501        int i;
 502
 503        addr = CACHE_OC_ADDRESS_ARRAY | (start & cpu_data->dcache.entry_mask);
 504
 505        while (extent_per_way) {
 506                for (i = 0; i < cpu_data->dcache.ways; i++)
 507                        __raw_writel(0, addr + cpu_data->dcache.way_incr * i);
 508
 509                addr += cpu_data->dcache.linesz;
 510                extent_per_way -= cpu_data->dcache.linesz;
 511        }
 512}
 513
 514static void __flush_dcache_segment_1way(unsigned long start,
 515                                        unsigned long extent_per_way)
 516{
 517        unsigned long orig_sr, sr_with_bl;
 518        unsigned long base_addr;
 519        unsigned long way_incr, linesz, way_size;
 520        struct cache_info *dcache;
 521        register unsigned long a0, a0e;
 522
 523        asm volatile("stc sr, %0" : "=r" (orig_sr));
 524        sr_with_bl = orig_sr | (1<<28);
 525        base_addr = ((unsigned long)&empty_zero_page[0]);
 526
 527        /*
 528         * The previous code aligned base_addr to 16k, i.e. the way_size of all
 529         * existing SH-4 D-caches.  Whilst I don't see a need to have this
 530         * aligned to any better than the cache line size (which it will be
 531         * anyway by construction), let's align it to at least the way_size of
 532         * any existing or conceivable SH-4 D-cache.  -- RPC
 533         */
 534        base_addr = ((base_addr >> 16) << 16);
 535        base_addr |= start;
 536
 537        dcache = &boot_cpu_data.dcache;
 538        linesz = dcache->linesz;
 539        way_incr = dcache->way_incr;
 540        way_size = dcache->way_size;
 541
 542        a0 = base_addr;
 543        a0e = base_addr + extent_per_way;
 544        do {
 545                asm volatile("ldc %0, sr" : : "r" (sr_with_bl));
 546                asm volatile("movca.l r0, @%0\n\t"
 547                             "ocbi @%0" : : "r" (a0));
 548                a0 += linesz;
 549                asm volatile("movca.l r0, @%0\n\t"
 550                             "ocbi @%0" : : "r" (a0));
 551                a0 += linesz;
 552                asm volatile("movca.l r0, @%0\n\t"
 553                             "ocbi @%0" : : "r" (a0));
 554                a0 += linesz;
 555                asm volatile("movca.l r0, @%0\n\t"
 556                             "ocbi @%0" : : "r" (a0));
 557                asm volatile("ldc %0, sr" : : "r" (orig_sr));
 558                a0 += linesz;
 559        } while (a0 < a0e);
 560}
 561
 562static void __flush_dcache_segment_2way(unsigned long start,
 563                                        unsigned long extent_per_way)
 564{
 565        unsigned long orig_sr, sr_with_bl;
 566        unsigned long base_addr;
 567        unsigned long way_incr, linesz, way_size;
 568        struct cache_info *dcache;
 569        register unsigned long a0, a1, a0e;
 570
 571        asm volatile("stc sr, %0" : "=r" (orig_sr));
 572        sr_with_bl = orig_sr | (1<<28);
 573        base_addr = ((unsigned long)&empty_zero_page[0]);
 574
 575        /* See comment under 1-way above */
 576        base_addr = ((base_addr >> 16) << 16);
 577        base_addr |= start;
 578
 579        dcache = &boot_cpu_data.dcache;
 580        linesz = dcache->linesz;
 581        way_incr = dcache->way_incr;
 582        way_size = dcache->way_size;
 583
 584        a0 = base_addr;
 585        a1 = a0 + way_incr;
 586        a0e = base_addr + extent_per_way;
 587        do {
 588                asm volatile("ldc %0, sr" : : "r" (sr_with_bl));
 589                asm volatile("movca.l r0, @%0\n\t"
 590                             "movca.l r0, @%1\n\t"
 591                             "ocbi @%0\n\t"
 592                             "ocbi @%1" : :
 593                             "r" (a0), "r" (a1));
 594                a0 += linesz;
 595                a1 += linesz;
 596                asm volatile("movca.l r0, @%0\n\t"
 597                             "movca.l r0, @%1\n\t"
 598                             "ocbi @%0\n\t"
 599                             "ocbi @%1" : :
 600                             "r" (a0), "r" (a1));
 601                a0 += linesz;
 602                a1 += linesz;
 603                asm volatile("movca.l r0, @%0\n\t"
 604                             "movca.l r0, @%1\n\t"
 605                             "ocbi @%0\n\t"
 606                             "ocbi @%1" : :
 607                             "r" (a0), "r" (a1));
 608                a0 += linesz;
 609                a1 += linesz;
 610                asm volatile("movca.l r0, @%0\n\t"
 611                             "movca.l r0, @%1\n\t"
 612                             "ocbi @%0\n\t"
 613                             "ocbi @%1" : :
 614                             "r" (a0), "r" (a1));
 615                asm volatile("ldc %0, sr" : : "r" (orig_sr));
 616                a0 += linesz;
 617                a1 += linesz;
 618        } while (a0 < a0e);
 619}
 620
 621static void __flush_dcache_segment_4way(unsigned long start,
 622                                        unsigned long extent_per_way)
 623{
 624        unsigned long orig_sr, sr_with_bl;
 625        unsigned long base_addr;
 626        unsigned long way_incr, linesz, way_size;
 627        struct cache_info *dcache;
 628        register unsigned long a0, a1, a2, a3, a0e;
 629
 630        asm volatile("stc sr, %0" : "=r" (orig_sr));
 631        sr_with_bl = orig_sr | (1<<28);
 632        base_addr = ((unsigned long)&empty_zero_page[0]);
 633
 634        /* See comment under 1-way above */
 635        base_addr = ((base_addr >> 16) << 16);
 636        base_addr |= start;
 637
 638        dcache = &boot_cpu_data.dcache;
 639        linesz = dcache->linesz;
 640        way_incr = dcache->way_incr;
 641        way_size = dcache->way_size;
 642
 643        a0 = base_addr;
 644        a1 = a0 + way_incr;
 645        a2 = a1 + way_incr;
 646        a3 = a2 + way_incr;
 647        a0e = base_addr + extent_per_way;
 648        do {
 649                asm volatile("ldc %0, sr" : : "r" (sr_with_bl));
 650                asm volatile("movca.l r0, @%0\n\t"
 651                             "movca.l r0, @%1\n\t"
 652                             "movca.l r0, @%2\n\t"
 653                             "movca.l r0, @%3\n\t"
 654                             "ocbi @%0\n\t"
 655                             "ocbi @%1\n\t"
 656                             "ocbi @%2\n\t"
 657                             "ocbi @%3\n\t" : :
 658                             "r" (a0), "r" (a1), "r" (a2), "r" (a3));
 659                a0 += linesz;
 660                a1 += linesz;
 661                a2 += linesz;
 662                a3 += linesz;
 663                asm volatile("movca.l r0, @%0\n\t"
 664                             "movca.l r0, @%1\n\t"
 665                             "movca.l r0, @%2\n\t"
 666                             "movca.l r0, @%3\n\t"
 667                             "ocbi @%0\n\t"
 668                             "ocbi @%1\n\t"
 669                             "ocbi @%2\n\t"
 670                             "ocbi @%3\n\t" : :
 671                             "r" (a0), "r" (a1), "r" (a2), "r" (a3));
 672                a0 += linesz;
 673                a1 += linesz;
 674                a2 += linesz;
 675                a3 += linesz;
 676                asm volatile("movca.l r0, @%0\n\t"
 677                             "movca.l r0, @%1\n\t"
 678                             "movca.l r0, @%2\n\t"
 679                             "movca.l r0, @%3\n\t"
 680                             "ocbi @%0\n\t"
 681                             "ocbi @%1\n\t"
 682                             "ocbi @%2\n\t"
 683                             "ocbi @%3\n\t" : :
 684                             "r" (a0), "r" (a1), "r" (a2), "r" (a3));
 685                a0 += linesz;
 686                a1 += linesz;
 687                a2 += linesz;
 688                a3 += linesz;
 689                asm volatile("movca.l r0, @%0\n\t"
 690                             "movca.l r0, @%1\n\t"
 691                             "movca.l r0, @%2\n\t"
 692                             "movca.l r0, @%3\n\t"
 693                             "ocbi @%0\n\t"
 694                             "ocbi @%1\n\t"
 695                             "ocbi @%2\n\t"
 696                             "ocbi @%3\n\t" : :
 697                             "r" (a0), "r" (a1), "r" (a2), "r" (a3));
 698                asm volatile("ldc %0, sr" : : "r" (orig_sr));
 699                a0 += linesz;
 700                a1 += linesz;
 701                a2 += linesz;
 702                a3 += linesz;
 703        } while (a0 < a0e);
 704}
 705
 706extern void __weak sh4__flush_region_init(void);
 707
 708/*
 709 * SH-4 has virtually indexed and physically tagged cache.
 710 */
 711void __init sh4_cache_init(void)
 712{
 713        unsigned int wt_enabled = !!(__raw_readl(CCR) & CCR_CACHE_WT);
 714
 715        printk("PVR=%08x CVR=%08x PRR=%08x\n",
 716                ctrl_inl(CCN_PVR),
 717                ctrl_inl(CCN_CVR),
 718                ctrl_inl(CCN_PRR));
 719
 720        if (wt_enabled)
 721                __flush_dcache_segment_fn = __flush_dcache_segment_writethrough;
 722        else {
 723                switch (boot_cpu_data.dcache.ways) {
 724                case 1:
 725                        __flush_dcache_segment_fn = __flush_dcache_segment_1way;
 726                        break;
 727                case 2:
 728                        __flush_dcache_segment_fn = __flush_dcache_segment_2way;
 729                        break;
 730                case 4:
 731                        __flush_dcache_segment_fn = __flush_dcache_segment_4way;
 732                        break;
 733                default:
 734                        panic("unknown number of cache ways\n");
 735                        break;
 736                }
 737        }
 738
 739        local_flush_icache_range        = sh4_flush_icache_range;
 740        local_flush_dcache_page         = sh4_flush_dcache_page;
 741        local_flush_cache_all           = sh4_flush_cache_all;
 742        local_flush_cache_mm            = sh4_flush_cache_mm;
 743        local_flush_cache_dup_mm        = sh4_flush_cache_mm;
 744        local_flush_cache_page          = sh4_flush_cache_page;
 745        local_flush_cache_range         = sh4_flush_cache_range;
 746
 747        sh4__flush_region_init();
 748}
 749