LXR linux/arch/sh64/mm/cache.c

   1/*
   2 * This file is subject to the terms and conditions of the GNU General Public
   3 * License.  See the file "COPYING" in the main directory of this archive
   4 * for more details.
   5 *
   6 * arch/sh64/mm/cache.c
   7 *
   8 * Original version Copyright (C) 2000, 2001  Paolo Alberelli
   9 * Second version Copyright (C) benedict.gaster@superh.com 2002
  10 * Third version Copyright Richard.Curnow@superh.com 2003
  11 * Hacks to third version Copyright (C) 2003 Paul Mundt
  12 */
  13
  14/****************************************************************************/
  15
  16#include <linux/init.h>
  17#include <linux/mman.h>
  18#include <linux/mm.h>
  19#include <linux/threads.h>
  20#include <asm/page.h>
  21#include <asm/pgtable.h>
  22#include <asm/processor.h>
  23#include <asm/cache.h>
  24#include <asm/tlb.h>
  25#include <asm/io.h>
  26#include <asm/uaccess.h>
  27#include <asm/mmu_context.h>
  28#include <asm/pgalloc.h> /* for flush_itlb_range */
  29
  30#include <linux/proc_fs.h>
  31
  32/* This function is in entry.S */
  33extern unsigned long switch_and_save_asid(unsigned long new_asid);
  34
  35/* Wired TLB entry for the D-cache */
  36static unsigned long long dtlb_cache_slot;
  37
  38/**
  39 * sh64_cache_init()
  40 *
  41 * This is pretty much just a straightforward clone of the SH
  42 * detect_cpu_and_cache_system().
  43 *
  44 * This function is responsible for setting up all of the cache
  45 * info dynamically as well as taking care of CPU probing and
  46 * setting up the relevant subtype data.
  47 *
  48 * FIXME: For the time being, we only really support the SH5-101
  49 * out of the box, and don't support dynamic probing for things
  50 * like the SH5-103 or even cut2 of the SH5-101. Implement this
  51 * later!
  52 */
  53int __init sh64_cache_init(void)
  54{
  55        /*
  56         * First, setup some sane values for the I-cache.
  57         */
  58        cpu_data->icache.ways           = 4;
  59        cpu_data->icache.sets           = 256;
  60        cpu_data->icache.linesz         = L1_CACHE_BYTES;
  61
  62        /*
  63         * FIXME: This can probably be cleaned up a bit as well.. for example,
  64         * do we really need the way shift _and_ the way_step_shift ?? Judging
  65         * by the existing code, I would guess no.. is there any valid reason
  66         * why we need to be tracking this around?
  67         */
  68        cpu_data->icache.way_shift      = 13;
  69        cpu_data->icache.entry_shift    = 5;
  70        cpu_data->icache.set_shift      = 4;
  71        cpu_data->icache.way_step_shift = 16;
  72        cpu_data->icache.asid_shift     = 2;
  73
  74        /*
  75         * way offset = cache size / associativity, so just don't factor in
  76         * associativity in the first place..
  77         */
  78        cpu_data->icache.way_ofs        = cpu_data->icache.sets *
  79                                          cpu_data->icache.linesz;
  80
  81        cpu_data->icache.asid_mask      = 0x3fc;
  82        cpu_data->icache.idx_mask       = 0x1fe0;
  83        cpu_data->icache.epn_mask       = 0xffffe000;
  84        cpu_data->icache.flags          = 0;
  85
  86        /*
  87         * Next, setup some sane values for the D-cache.
  88         *
  89         * On the SH5, these are pretty consistent with the I-cache settings,
  90         * so we just copy over the existing definitions.. these can be fixed
  91         * up later, especially if we add runtime CPU probing.
  92         *
  93         * Though in the meantime it saves us from having to duplicate all of
  94         * the above definitions..
  95         */
  96        cpu_data->dcache                = cpu_data->icache;
  97
  98        /*
  99         * Setup any cache-related flags here
 100         */
 101#if defined(CONFIG_DCACHE_WRITE_THROUGH)
 102        set_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags));
 103#elif defined(CONFIG_DCACHE_WRITE_BACK)
 104        set_bit(SH_CACHE_MODE_WB, &(cpu_data->dcache.flags));
 105#endif
 106
 107        /*
 108         * We also need to reserve a slot for the D-cache in the DTLB, so we
 109         * do this now ..
 110         */
 111        dtlb_cache_slot                 = sh64_get_wired_dtlb_entry();
 112
 113        return 0;
 114}
 115
 116#ifdef CONFIG_DCACHE_DISABLED
 117#define sh64_dcache_purge_all()                                 do { } while (0)
 118#define sh64_dcache_purge_coloured_phy_page(paddr, eaddr)       do { } while (0)
 119#define sh64_dcache_purge_user_range(mm, start, end)            do { } while (0)
 120#define sh64_dcache_purge_phy_page(paddr)                       do { } while (0)
 121#define sh64_dcache_purge_virt_page(mm, eaddr)                  do { } while (0)
 122#define sh64_dcache_purge_kernel_range(start, end)              do { } while (0)
 123#define sh64_dcache_wback_current_user_range(start, end)        do { } while (0)
 124#endif
 125
 126/*##########################################################################*/
 127
 128/* From here onwards, a rewrite of the implementation,
 129   by Richard.Curnow@superh.com.
 130
 131   The major changes in this compared to the old version are;
 132   1. use more selective purging through OCBP instead of using ALLOCO to purge
 133      by natural replacement.  This avoids purging out unrelated cache lines
 134      that happen to be in the same set.
 135   2. exploit the APIs copy_user_page and clear_user_page better
 136   3. be more selective about I-cache purging, in particular use invalidate_all
 137      more sparingly.
 138
 139   */
 140
 141/*##########################################################################
 142                               SUPPORT FUNCTIONS
 143  ##########################################################################*/
 144
 145/****************************************************************************/
 146/* The following group of functions deal with mapping and unmapping a temporary
 147   page into the DTLB slot that have been set aside for our exclusive use. */
 148/* In order to accomplish this, we use the generic interface for adding and
 149   removing a wired slot entry as defined in arch/sh64/mm/tlb.c */
 150/****************************************************************************/
 151
 152static unsigned long slot_own_flags;
 153
 154static inline void sh64_setup_dtlb_cache_slot(unsigned long eaddr, unsigned long asid, unsigned long paddr)
 155{
 156        local_irq_save(slot_own_flags);
 157        sh64_setup_tlb_slot(dtlb_cache_slot, eaddr, asid, paddr);
 158}
 159
 160static inline void sh64_teardown_dtlb_cache_slot(void)
 161{
 162        sh64_teardown_tlb_slot(dtlb_cache_slot);
 163        local_irq_restore(slot_own_flags);
 164}
 165
 166/****************************************************************************/
 167
 168#ifndef CONFIG_ICACHE_DISABLED
 169
 170static void __inline__ sh64_icache_inv_all(void)
 171{
 172        unsigned long long addr, flag, data;
 173        unsigned int flags;
 174
 175        addr=ICCR0;
 176        flag=ICCR0_ICI;
 177        data=0;
 178
 179        /* Make this a critical section for safety (probably not strictly necessary.) */
 180        local_irq_save(flags);
 181
 182        /* Without %1 it gets unexplicably wrong */
 183        asm volatile("getcfg    %3, 0, %0\n\t"
 184                        "or     %0, %2, %0\n\t"
 185                        "putcfg %3, 0, %0\n\t"
 186                        "synci"
 187                        : "=&r" (data)
 188                        : "0" (data), "r" (flag), "r" (addr));
 189
 190        local_irq_restore(flags);
 191}
 192
 193static void sh64_icache_inv_kernel_range(unsigned long start, unsigned long end)
 194{
 195        /* Invalidate range of addresses [start,end] from the I-cache, where
 196         * the addresses lie in the kernel superpage. */
 197
 198        unsigned long long ullend, addr, aligned_start;
 199#if (NEFF == 32)
 200        aligned_start = (unsigned long long)(signed long long)(signed long) start;
 201#else
 202#error "NEFF != 32"
 203#endif
 204        aligned_start &= L1_CACHE_ALIGN_MASK;
 205        addr = aligned_start;
 206#if (NEFF == 32)
 207        ullend = (unsigned long long) (signed long long) (signed long) end;
 208#else
 209#error "NEFF != 32"
 210#endif
 211        while (addr <= ullend) {
 212                asm __volatile__ ("icbi %0, 0" : : "r" (addr));
 213                addr += L1_CACHE_BYTES;
 214        }
 215}
 216
 217static void sh64_icache_inv_user_page(struct vm_area_struct *vma, unsigned long eaddr)
 218{
 219        /* If we get called, we know that vma->vm_flags contains VM_EXEC.
 220           Also, eaddr is page-aligned. */
 221
 222        unsigned long long addr, end_addr;
 223        unsigned long flags = 0;
 224        unsigned long running_asid, vma_asid;
 225        addr = eaddr;
 226        end_addr = addr + PAGE_SIZE;
 227
 228        /* Check whether we can use the current ASID for the I-cache
 229           invalidation.  For example, if we're called via
 230           access_process_vm->flush_cache_page->here, (e.g. when reading from
 231           /proc), 'running_asid' will be that of the reader, not of the
 232           victim.
 233
 234           Also, note the risk that we might get pre-empted between the ASID
 235           compare and blocking IRQs, and before we regain control, the
 236           pid->ASID mapping changes.  However, the whole cache will get
 237           invalidated when the mapping is renewed, so the worst that can
 238           happen is that the loop below ends up invalidating somebody else's
 239           cache entries.
 240        */
 241
 242        running_asid = get_asid();
 243        vma_asid = (vma->vm_mm->context & MMU_CONTEXT_ASID_MASK);
 244        if (running_asid != vma_asid) {
 245                local_irq_save(flags);
 246                switch_and_save_asid(vma_asid);
 247        }
 248        while (addr < end_addr) {
 249                /* Worth unrolling a little */
 250                asm __volatile__("icbi %0,  0" : : "r" (addr));
 251                asm __volatile__("icbi %0, 32" : : "r" (addr));
 252                asm __volatile__("icbi %0, 64" : : "r" (addr));
 253                asm __volatile__("icbi %0, 96" : : "r" (addr));
 254                addr += 128;
 255        }
 256        if (running_asid != vma_asid) {
 257                switch_and_save_asid(running_asid);
 258                local_irq_restore(flags);
 259        }
 260}
 261
 262/****************************************************************************/
 263
 264static void sh64_icache_inv_user_page_range(struct mm_struct *mm,
 265                          unsigned long start, unsigned long end)
 266{
 267        /* Used for invalidating big chunks of I-cache, i.e. assume the range
 268           is whole pages.  If 'start' or 'end' is not page aligned, the code
 269           is conservative and invalidates to the ends of the enclosing pages.
 270           This is functionally OK, just a performance loss. */
 271
 272        /* See the comments below in sh64_dcache_purge_user_range() regarding
 273           the choice of algorithm.  However, for the I-cache option (2) isn't
 274           available because there are no physical tags so aliases can't be
 275           resolved.  The icbi instruction has to be used through the user
 276           mapping.   Because icbi is cheaper than ocbp on a cache hit, it
 277           would be cheaper to use the selective code for a large range than is
 278           possible with the D-cache.  Just assume 64 for now as a working
 279           figure.
 280           */
 281
 282        int n_pages;
 283
 284        if (!mm) return;
 285
 286        n_pages = ((end - start) >> PAGE_SHIFT);
 287        if (n_pages >= 64) {
 288                sh64_icache_inv_all();
 289        } else {
 290                unsigned long aligned_start;
 291                unsigned long eaddr;
 292                unsigned long after_last_page_start;
 293                unsigned long mm_asid, current_asid;
 294                unsigned long long flags = 0ULL;
 295
 296                mm_asid = mm->context & MMU_CONTEXT_ASID_MASK;
 297                current_asid = get_asid();
 298
 299                if (mm_asid != current_asid) {
 300                        /* Switch ASID and run the invalidate loop under cli */
 301                        local_irq_save(flags);
 302                        switch_and_save_asid(mm_asid);
 303                }
 304
 305                aligned_start = start & PAGE_MASK;
 306                after_last_page_start = PAGE_SIZE + ((end - 1) & PAGE_MASK);
 307
 308                while (aligned_start < after_last_page_start) {
 309                        struct vm_area_struct *vma;
 310                        unsigned long vma_end;
 311                        vma = find_vma(mm, aligned_start);
 312                        if (!vma || (aligned_start <= vma->vm_end)) {
 313                                /* Avoid getting stuck in an error condition */
 314                                aligned_start += PAGE_SIZE;
 315                                continue;
 316                        }
 317                        vma_end = vma->vm_end;
 318                        if (vma->vm_flags & VM_EXEC) {
 319                                /* Executable */
 320                                eaddr = aligned_start;
 321                                while (eaddr < vma_end) {
 322                                        sh64_icache_inv_user_page(vma, eaddr);
 323                                        eaddr += PAGE_SIZE;
 324                                }
 325                        }
 326                        aligned_start = vma->vm_end; /* Skip to start of next region */
 327                }
 328                if (mm_asid != current_asid) {
 329                        switch_and_save_asid(current_asid);
 330                        local_irq_restore(flags);
 331                }
 332        }
 333}
 334
 335static void sh64_icache_inv_user_small_range(struct mm_struct *mm,
 336                                                unsigned long start, int len)
 337{
 338
 339        /* Invalidate a small range of user context I-cache, not necessarily
 340           page (or even cache-line) aligned. */
 341
 342        unsigned long long eaddr = start;
 343        unsigned long long eaddr_end = start + len;
 344        unsigned long current_asid, mm_asid;
 345        unsigned long long flags;
 346        unsigned long long epage_start;
 347
 348        /* Since this is used inside ptrace, the ASID in the mm context
 349           typically won't match current_asid.  We'll have to switch ASID to do
 350           this.  For safety, and given that the range will be small, do all
 351           this under cli.
 352
 353           Note, there is a hazard that the ASID in mm->context is no longer
 354           actually associated with mm, i.e. if the mm->context has started a
 355           new cycle since mm was last active.  However, this is just a
 356           performance issue: all that happens is that we invalidate lines
 357           belonging to another mm, so the owning process has to refill them
 358           when that mm goes live again.  mm itself can't have any cache
 359           entries because there will have been a flush_cache_all when the new
 360           mm->context cycle started. */
 361
 362        /* Align to start of cache line.  Otherwise, suppose len==8 and start
 363           was at 32N+28 : the last 4 bytes wouldn't get invalidated. */
 364        eaddr = start & L1_CACHE_ALIGN_MASK;
 365        eaddr_end = start + len;
 366
 367        local_irq_save(flags);
 368        mm_asid = mm->context & MMU_CONTEXT_ASID_MASK;
 369        current_asid = switch_and_save_asid(mm_asid);
 370
 371        epage_start = eaddr & PAGE_MASK;
 372
 373        while (eaddr < eaddr_end)
 374        {
 375                asm __volatile__("icbi %0, 0" : : "r" (eaddr));
 376                eaddr += L1_CACHE_BYTES;
 377        }
 378        switch_and_save_asid(current_asid);
 379        local_irq_restore(flags);
 380}
 381
 382static void sh64_icache_inv_current_user_range(unsigned long start, unsigned long end)
 383{
 384        /* The icbi instruction never raises ITLBMISS.  i.e. if there's not a
 385           cache hit on the virtual tag the instruction ends there, without a
 386           TLB lookup. */
 387
 388        unsigned long long aligned_start;
 389        unsigned long long ull_end;
 390        unsigned long long addr;
 391
 392        ull_end = end;
 393
 394        /* Just invalidate over the range using the natural addresses.  TLB
 395           miss handling will be OK (TBC).  Since it's for the current process,
 396           either we're already in the right ASID context, or the ASIDs have
 397           been recycled since we were last active in which case we might just
 398           invalidate another processes I-cache entries : no worries, just a
 399           performance drop for him. */
 400        aligned_start = start & L1_CACHE_ALIGN_MASK;
 401        addr = aligned_start;
 402        while (addr < ull_end) {
 403                asm __volatile__ ("icbi %0, 0" : : "r" (addr));
 404                asm __volatile__ ("nop");
 405                asm __volatile__ ("nop");
 406                addr += L1_CACHE_BYTES;
 407        }
 408}
 409
 410#endif /* !CONFIG_ICACHE_DISABLED */
 411
 412/****************************************************************************/
 413
 414#ifndef CONFIG_DCACHE_DISABLED
 415
 416/* Buffer used as the target of alloco instructions to purge data from cache
 417   sets by natural eviction. -- RPC */
 418#define DUMMY_ALLOCO_AREA_SIZE L1_CACHE_SIZE_BYTES + (1024 * 4)
 419static unsigned char dummy_alloco_area[DUMMY_ALLOCO_AREA_SIZE] __cacheline_aligned = { 0, };
 420
 421/****************************************************************************/
 422
 423static void __inline__ sh64_dcache_purge_sets(int sets_to_purge_base, int n_sets)
 424{
 425        /* Purge all ways in a particular block of sets, specified by the base
 426           set number and number of sets.  Can handle wrap-around, if that's
 427           needed.  */
 428
 429        int dummy_buffer_base_set;
 430        unsigned long long eaddr, eaddr0, eaddr1;
 431        int j;
 432        int set_offset;
 433
 434        dummy_buffer_base_set = ((int)&dummy_alloco_area & cpu_data->dcache.idx_mask) >> cpu_data->dcache.entry_shift;
 435        set_offset = sets_to_purge_base - dummy_buffer_base_set;
 436
 437        for (j=0; j<n_sets; j++, set_offset++) {
 438                set_offset &= (cpu_data->dcache.sets - 1);
 439                eaddr0 = (unsigned long long)dummy_alloco_area + (set_offset << cpu_data->dcache.entry_shift);
 440
 441                /* Do one alloco which hits the required set per cache way.  For
 442                   write-back mode, this will purge the #ways resident lines.   There's
 443                   little point unrolling this loop because the allocos stall more if
 444                   they're too close together. */
 445                eaddr1 = eaddr0 + cpu_data->dcache.way_ofs * cpu_data->dcache.ways;
 446                for (eaddr=eaddr0; eaddr<eaddr1; eaddr+=cpu_data->dcache.way_ofs) {
 447                        asm __volatile__ ("alloco %0, 0" : : "r" (eaddr));
 448                        asm __volatile__ ("synco"); /* TAKum03020 */
 449                }
 450
 451                eaddr1 = eaddr0 + cpu_data->dcache.way_ofs * cpu_data->dcache.ways;
 452                for (eaddr=eaddr0; eaddr<eaddr1; eaddr+=cpu_data->dcache.way_ofs) {
 453                        /* Load from each address.  Required because alloco is a NOP if
 454                           the cache is write-through.  Write-through is a config option. */
 455                        if (test_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags)))
 456                                *(volatile unsigned char *)(int)eaddr;
 457                }
 458        }
 459
 460        /* Don't use OCBI to invalidate the lines.  That costs cycles directly.
 461           If the dummy block is just left resident, it will naturally get
 462           evicted as required.  */
 463
 464        return;
 465}
 466
 467/****************************************************************************/
 468
 469static void sh64_dcache_purge_all(void)
 470{
 471        /* Purge the entire contents of the dcache.  The most efficient way to
 472           achieve this is to use alloco instructions on a region of unused
 473           memory equal in size to the cache, thereby causing the current
 474           contents to be discarded by natural eviction.  The alternative,
 475           namely reading every tag, setting up a mapping for the corresponding
 476           page and doing an OCBP for the line, would be much more expensive.
 477           */
 478
 479        sh64_dcache_purge_sets(0, cpu_data->dcache.sets);
 480
 481        return;
 482
 483}
 484
 485/****************************************************************************/
 486
 487static void sh64_dcache_purge_kernel_range(unsigned long start, unsigned long end)
 488{
 489        /* Purge the range of addresses [start,end] from the D-cache.  The
 490           addresses lie in the superpage mapping.  There's no harm if we
 491           overpurge at either end - just a small performance loss. */
 492        unsigned long long ullend, addr, aligned_start;
 493#if (NEFF == 32)
 494        aligned_start = (unsigned long long)(signed long long)(signed long) start;
 495#else
 496#error "NEFF != 32"
 497#endif
 498        aligned_start &= L1_CACHE_ALIGN_MASK;
 499        addr = aligned_start;
 500#if (NEFF == 32)
 501        ullend = (unsigned long long) (signed long long) (signed long) end;
 502#else
 503#error "NEFF != 32"
 504#endif
 505        while (addr <= ullend) {
 506                asm __volatile__ ("ocbp %0, 0" : : "r" (addr));
 507                addr += L1_CACHE_BYTES;
 508        }
 509        return;
 510}
 511
 512/* Assumes this address (+ (2**n_synbits) pages up from it) aren't used for
 513   anything else in the kernel */
 514#define MAGIC_PAGE0_START 0xffffffffec000000ULL
 515
 516static void sh64_dcache_purge_coloured_phy_page(unsigned long paddr, unsigned long eaddr)
 517{
 518        /* Purge the physical page 'paddr' from the cache.  It's known that any
 519           cache lines requiring attention have the same page colour as the the
 520           address 'eaddr'.
 521
 522           This relies on the fact that the D-cache matches on physical tags
 523           when no virtual tag matches.  So we create an alias for the original
 524           page and purge through that.  (Alternatively, we could have done
 525           this by switching ASID to match the original mapping and purged
 526           through that, but that involves ASID switching cost + probably a
 527           TLBMISS + refill anyway.)
 528           */
 529
 530        unsigned long long magic_page_start;
 531        unsigned long long magic_eaddr, magic_eaddr_end;
 532
 533        magic_page_start = MAGIC_PAGE0_START + (eaddr & CACHE_OC_SYN_MASK);
 534
 535        /* As long as the kernel is not pre-emptible, this doesn't need to be
 536           under cli/sti. */
 537
 538        sh64_setup_dtlb_cache_slot(magic_page_start, get_asid(), paddr);
 539
 540        magic_eaddr = magic_page_start;
 541        magic_eaddr_end = magic_eaddr + PAGE_SIZE;
 542        while (magic_eaddr < magic_eaddr_end) {
 543                /* Little point in unrolling this loop - the OCBPs are blocking
 544                   and won't go any quicker (i.e. the loop overhead is parallel
 545                   to part of the OCBP execution.) */
 546                asm __volatile__ ("ocbp %0, 0" : : "r" (magic_eaddr));
 547                magic_eaddr += L1_CACHE_BYTES;
 548        }
 549
 550        sh64_teardown_dtlb_cache_slot();
 551}
 552
 553/****************************************************************************/
 554
 555static void sh64_dcache_purge_phy_page(unsigned long paddr)
 556{
 557        /* Pure a page given its physical start address, by creating a
 558           temporary 1 page mapping and purging across that.  Even if we know
 559           the virtual address (& vma or mm) of the page, the method here is
 560           more elegant because it avoids issues of coping with page faults on
 561           the purge instructions (i.e. no special-case code required in the
 562           critical path in the TLB miss handling). */
 563
 564        unsigned long long eaddr_start, eaddr, eaddr_end;
 565        int i;
 566
 567        /* As long as the kernel is not pre-emptible, this doesn't need to be
 568           under cli/sti. */
 569
 570        eaddr_start = MAGIC_PAGE0_START;
 571        for (i=0; i < (1 << CACHE_OC_N_SYNBITS); i++) {
 572                sh64_setup_dtlb_cache_slot(eaddr_start, get_asid(), paddr);
 573
 574                eaddr = eaddr_start;
 575                eaddr_end = eaddr + PAGE_SIZE;
 576                while (eaddr < eaddr_end) {
 577                        asm __volatile__ ("ocbp %0, 0" : : "r" (eaddr));
 578                        eaddr += L1_CACHE_BYTES;
 579                }
 580
 581                sh64_teardown_dtlb_cache_slot();
 582                eaddr_start += PAGE_SIZE;
 583        }
 584}
 585
 586static void sh64_dcache_purge_user_pages(struct mm_struct *mm,
 587                                unsigned long addr, unsigned long end)
 588{
 589        pgd_t *pgd;
 590        pmd_t *pmd;
 591        pte_t *pte;
 592        pte_t entry;
 593        spinlock_t *ptl;
 594        unsigned long paddr;
 595
 596        if (!mm)
 597                return; /* No way to find physical address of page */
 598
 599        pgd = pgd_offset(mm, addr);
 600        if (pgd_bad(*pgd))
 601                return;
 602
 603        pmd = pmd_offset(pgd, addr);
 604        if (pmd_none(*pmd) || pmd_bad(*pmd))
 605                return;
 606
 607        pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
 608        do {
 609                entry = *pte;
 610                if (pte_none(entry) || !pte_present(entry))
 611                        continue;
 612                paddr = pte_val(entry) & PAGE_MASK;
 613                sh64_dcache_purge_coloured_phy_page(paddr, addr);
 614        } while (pte++, addr += PAGE_SIZE, addr != end);
 615        pte_unmap_unlock(pte - 1, ptl);
 616}
 617/****************************************************************************/
 618
 619static void sh64_dcache_purge_user_range(struct mm_struct *mm,
 620                          unsigned long start, unsigned long end)
 621{
 622        /* There are at least 5 choices for the implementation of this, with
 623           pros (+), cons(-), comments(*):
 624
 625           1. ocbp each line in the range through the original user's ASID
 626              + no lines spuriously evicted
 627              - tlbmiss handling (must either handle faults on demand => extra
 628                special-case code in tlbmiss critical path), or map the page in
 629                advance (=> flush_tlb_range in advance to avoid multiple hits)
 630              - ASID switching
 631              - expensive for large ranges
 632
 633           2. temporarily map each page in the range to a special effective
 634              address and ocbp through the temporary mapping; relies on the
 635              fact that SH-5 OCB* always do TLB lookup and match on ptags (they
 636              never look at the etags)
 637              + no spurious evictions
 638              - expensive for large ranges
 639              * surely cheaper than (1)
 640
 641           3. walk all the lines in the cache, check the tags, if a match
 642              occurs create a page mapping to ocbp the line through
 643              + no spurious evictions
 644              - tag inspection overhead
 645              - (especially for small ranges)
 646              - potential cost of setting up/tearing down page mapping for
 647                every line that matches the range
 648              * cost partly independent of range size
 649
 650           4. walk all the lines in the cache, check the tags, if a match
 651              occurs use 4 * alloco to purge the line (+3 other probably
 652              innocent victims) by natural eviction
 653              + no tlb mapping overheads
 654              - spurious evictions
 655              - tag inspection overhead
 656
 657           5. implement like flush_cache_all
 658              + no tag inspection overhead
 659              - spurious evictions
 660              - bad for small ranges
 661
 662           (1) can be ruled out as more expensive than (2).  (2) appears best
 663           for small ranges.  The choice between (3), (4) and (5) for large
 664           ranges and the range size for the large/small boundary need
 665           benchmarking to determine.
 666
 667           For now use approach (2) for small ranges and (5) for large ones.
 668
 669           */
 670
 671        int n_pages;
 672
 673        n_pages = ((end - start) >> PAGE_SHIFT);
 674        if (n_pages >= 64 || ((start ^ (end - 1)) & PMD_MASK)) {
 675#if 1
 676                sh64_dcache_purge_all();
 677#else
 678                unsigned long long set, way;
 679                unsigned long mm_asid = mm->context & MMU_CONTEXT_ASID_MASK;
 680                for (set = 0; set < cpu_data->dcache.sets; set++) {
 681                        unsigned long long set_base_config_addr = CACHE_OC_ADDRESS_ARRAY + (set << cpu_data->dcache.set_shift);
 682                        for (way = 0; way < cpu_data->dcache.ways; way++) {
 683                                unsigned long long config_addr = set_base_config_addr + (way << cpu_data->dcache.way_step_shift);
 684                                unsigned long long tag0;
 685                                unsigned long line_valid;
 686
 687                                asm __volatile__("getcfg %1, 0, %0" : "=r" (tag0) : "r" (config_addr));
 688                                line_valid = tag0 & SH_CACHE_VALID;
 689                                if (line_valid) {
 690                                        unsigned long cache_asid;
 691                                        unsigned long epn;
 692
 693                                        cache_asid = (tag0 & cpu_data->dcache.asid_mask) >> cpu_data->dcache.asid_shift;
 694                                        /* The next line needs some
 695                                           explanation.  The virtual tags
 696                                           encode bits [31:13] of the virtual
 697                                           address, bit [12] of the 'tag' being
 698                                           implied by the cache set index. */
 699                                        epn = (tag0 & cpu_data->dcache.epn_mask) | ((set & 0x80) << cpu_data->dcache.entry_shift);
 700
 701                                        if ((cache_asid == mm_asid) && (start <= epn) && (epn < end)) {
 702                                                /* TODO : could optimise this
 703                                                   call by batching multiple
 704                                                   adjacent sets together. */
 705                                                sh64_dcache_purge_sets(set, 1);
 706                                                break; /* Don't waste time inspecting other ways for this set */
 707                                        }
 708                                }
 709                        }
 710                }
 711#endif
 712        } else {
 713                /* Small range, covered by a single page table page */
 714                start &= PAGE_MASK;     /* should already be so */
 715                end = PAGE_ALIGN(end);  /* should already be so */
 716                sh64_dcache_purge_user_pages(mm, start, end);
 717        }
 718        return;
 719}
 720
 721static void sh64_dcache_wback_current_user_range(unsigned long start, unsigned long end)
 722{
 723        unsigned long long aligned_start;
 724        unsigned long long ull_end;
 725        unsigned long long addr;
 726
 727        ull_end = end;
 728
 729        /* Just wback over the range using the natural addresses.  TLB miss
 730           handling will be OK (TBC) : the range has just been written to by
 731           the signal frame setup code, so the PTEs must exist.
 732
 733           Note, if we have CONFIG_PREEMPT and get preempted inside this loop,
 734           it doesn't matter, even if the pid->ASID mapping changes whilst
 735           we're away.  In that case the cache will have been flushed when the
 736           mapping was renewed.  So the writebacks below will be nugatory (and
 737           we'll doubtless have to fault the TLB entry/ies in again with the
 738           new ASID), but it's a rare case.
 739           */
 740        aligned_start = start & L1_CACHE_ALIGN_MASK;
 741        addr = aligned_start;
 742        while (addr < ull_end) {
 743                asm __volatile__ ("ocbwb %0, 0" : : "r" (addr));
 744                addr += L1_CACHE_BYTES;
 745        }
 746}
 747
 748/****************************************************************************/
 749
 750/* These *MUST* lie in an area of virtual address space that's otherwise unused. */
 751#define UNIQUE_EADDR_START 0xe0000000UL
 752#define UNIQUE_EADDR_END   0xe8000000UL
 753
 754static unsigned long sh64_make_unique_eaddr(unsigned long user_eaddr, unsigned long paddr)
 755{
 756        /* Given a physical address paddr, and a user virtual address
 757           user_eaddr which will eventually be mapped to it, create a one-off
 758           kernel-private eaddr mapped to the same paddr.  This is used for
 759           creating special destination pages for copy_user_page and
 760           clear_user_page */
 761
 762        static unsigned long current_pointer = UNIQUE_EADDR_START;
 763        unsigned long coloured_pointer;
 764
 765        if (current_pointer == UNIQUE_EADDR_END) {
 766                sh64_dcache_purge_all();
 767                current_pointer = UNIQUE_EADDR_START;
 768        }
 769
 770        coloured_pointer = (current_pointer & ~CACHE_OC_SYN_MASK) | (user_eaddr & CACHE_OC_SYN_MASK);
 771        sh64_setup_dtlb_cache_slot(coloured_pointer, get_asid(), paddr);
 772
 773        current_pointer += (PAGE_SIZE << CACHE_OC_N_SYNBITS);
 774
 775        return coloured_pointer;
 776}
 777
 778/****************************************************************************/
 779
 780static void sh64_copy_user_page_coloured(void *to, void *from, unsigned long address)
 781{
 782        void *coloured_to;
 783
 784        /* Discard any existing cache entries of the wrong colour.  These are
 785           present quite often, if the kernel has recently used the page
 786           internally, then given it up, then it's been allocated to the user.
 787           */
 788        sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long) to);
 789
 790        coloured_to = (void *) sh64_make_unique_eaddr(address, __pa(to));
 791        sh64_page_copy(from, coloured_to);
 792
 793        sh64_teardown_dtlb_cache_slot();
 794}
 795
 796static void sh64_clear_user_page_coloured(void *to, unsigned long address)
 797{
 798        void *coloured_to;
 799
 800        /* Discard any existing kernel-originated lines of the wrong colour (as
 801           above) */
 802        sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long) to);
 803
 804        coloured_to = (void *) sh64_make_unique_eaddr(address, __pa(to));
 805        sh64_page_clear(coloured_to);
 806
 807        sh64_teardown_dtlb_cache_slot();
 808}
 809
 810#endif /* !CONFIG_DCACHE_DISABLED */
 811
 812/****************************************************************************/
 813
 814/*##########################################################################
 815                            EXTERNALLY CALLABLE API.
 816  ##########################################################################*/
 817
 818/* These functions are described in Documentation/cachetlb.txt.
 819   Each one of these functions varies in behaviour depending on whether the
 820   I-cache and/or D-cache are configured out.
 821
 822   Note that the Linux term 'flush' corresponds to what is termed 'purge' in
 823   the sh/sh64 jargon for the D-cache, i.e. write back dirty data then
 824   invalidate the cache lines, and 'invalidate' for the I-cache.
 825   */
 826
 827#undef FLUSH_TRACE
 828
 829void flush_cache_all(void)
 830{
 831        /* Invalidate the entire contents of both caches, after writing back to
 832           memory any dirty data from the D-cache. */
 833        sh64_dcache_purge_all();
 834        sh64_icache_inv_all();
 835}
 836
 837/****************************************************************************/
 838
 839void flush_cache_mm(struct mm_struct *mm)
 840{
 841        /* Invalidate an entire user-address space from both caches, after
 842           writing back dirty data (e.g. for shared mmap etc). */
 843
 844        /* This could be coded selectively by inspecting all the tags then
 845           doing 4*alloco on any set containing a match (as for
 846           flush_cache_range), but fork/exit/execve (where this is called from)
 847           are expensive anyway. */
 848
 849        /* Have to do a purge here, despite the comments re I-cache below.
 850           There could be odd-coloured dirty data associated with the mm still
 851           in the cache - if this gets written out through natural eviction
 852           after the kernel has reused the page there will be chaos.
 853           */
 854
 855        sh64_dcache_purge_all();
 856
 857        /* The mm being torn down won't ever be active again, so any Icache
 858           lines tagged with its ASID won't be visible for the rest of the
 859           lifetime of this ASID cycle.  Before the ASID gets reused, there
 860           will be a flush_cache_all.  Hence we don't need to touch the
 861           I-cache.  This is similar to the lack of action needed in
 862           flush_tlb_mm - see fault.c. */
 863}
 864
 865/****************************************************************************/
 866
 867void flush_cache_range(struct vm_area_struct *vma, unsigned long start,
 868                       unsigned long end)
 869{
 870        struct mm_struct *mm = vma->vm_mm;
 871
 872        /* Invalidate (from both caches) the range [start,end) of virtual
 873           addresses from the user address space specified by mm, after writing
 874           back any dirty data.
 875
 876           Note, 'end' is 1 byte beyond the end of the range to flush. */
 877
 878        sh64_dcache_purge_user_range(mm, start, end);
 879        sh64_icache_inv_user_page_range(mm, start, end);
 880}
 881
 882/****************************************************************************/
 883
 884void flush_cache_page(struct vm_area_struct *vma, unsigned long eaddr, unsigned long pfn)
 885{
 886        /* Invalidate any entries in either cache for the vma within the user
 887           address space vma->vm_mm for the page starting at virtual address
 888           'eaddr'.   This seems to be used primarily in breaking COW.  Note,
 889           the I-cache must be searched too in case the page in question is
 890           both writable and being executed from (e.g. stack trampolines.)
 891
 892           Note, this is called with pte lock held.
 893           */
 894
 895        sh64_dcache_purge_phy_page(pfn << PAGE_SHIFT);
 896
 897        if (vma->vm_flags & VM_EXEC) {
 898                sh64_icache_inv_user_page(vma, eaddr);
 899        }
 900}
 901
 902/****************************************************************************/
 903
 904#ifndef CONFIG_DCACHE_DISABLED
 905
 906void copy_user_page(void *to, void *from, unsigned long address, struct page *page)
 907{
 908        /* 'from' and 'to' are kernel virtual addresses (within the superpage
 909           mapping of the physical RAM).  'address' is the user virtual address
 910           where the copy 'to' will be mapped after.  This allows a custom
 911           mapping to be used to ensure that the new copy is placed in the
 912           right cache sets for the user to see it without having to bounce it
 913           out via memory.  Note however : the call to flush_page_to_ram in
 914           (generic)/mm/memory.c:(break_cow) undoes all this good work in that one
 915           very important case!
 916
 917           TBD : can we guarantee that on every call, any cache entries for
 918           'from' are in the same colour sets as 'address' also?  i.e. is this
 919           always used just to deal with COW?  (I suspect not). */
 920
 921        /* There are two possibilities here for when the page 'from' was last accessed:
 922           * by the kernel : this is OK, no purge required.
 923           * by the/a user (e.g. for break_COW) : need to purge.
 924
 925           If the potential user mapping at 'address' is the same colour as
 926           'from' there is no need to purge any cache lines from the 'from'
 927           page mapped into cache sets of colour 'address'.  (The copy will be
 928           accessing the page through 'from').
 929           */
 930
 931        if (((address ^ (unsigned long) from) & CACHE_OC_SYN_MASK) != 0) {
 932                sh64_dcache_purge_coloured_phy_page(__pa(from), address);
 933        }
 934
 935        if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0) {
 936                /* No synonym problem on destination */
 937                sh64_page_copy(from, to);
 938        } else {
 939                sh64_copy_user_page_coloured(to, from, address);
 940        }
 941
 942        /* Note, don't need to flush 'from' page from the cache again - it's
 943           done anyway by the generic code */
 944}
 945
 946void clear_user_page(void *to, unsigned long address, struct page *page)
 947{
 948        /* 'to' is a kernel virtual address (within the superpage
 949           mapping of the physical RAM).  'address' is the user virtual address
 950           where the 'to' page will be mapped after.  This allows a custom
 951           mapping to be used to ensure that the new copy is placed in the
 952           right cache sets for the user to see it without having to bounce it
 953           out via memory.
 954        */
 955
 956        if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0) {
 957                /* No synonym problem on destination */
 958                sh64_page_clear(to);
 959        } else {
 960                sh64_clear_user_page_coloured(to, address);
 961        }
 962}
 963
 964#endif /* !CONFIG_DCACHE_DISABLED */
 965
 966/****************************************************************************/
 967
 968void flush_dcache_page(struct page *page)
 969{
 970        sh64_dcache_purge_phy_page(page_to_phys(page));
 971        wmb();
 972}
 973
 974/****************************************************************************/
 975
 976void flush_icache_range(unsigned long start, unsigned long end)
 977{
 978        /* Flush the range [start,end] of kernel virtual adddress space from
 979           the I-cache.  The corresponding range must be purged from the
 980           D-cache also because the SH-5 doesn't have cache snooping between
 981           the caches.  The addresses will be visible through the superpage
 982           mapping, therefore it's guaranteed that there no cache entries for
 983           the range in cache sets of the wrong colour.
 984
 985           Primarily used for cohering the I-cache after a module has
 986           been loaded.  */
 987
 988        /* We also make sure to purge the same range from the D-cache since
 989           flush_page_to_ram() won't be doing this for us! */
 990
 991        sh64_dcache_purge_kernel_range(start, end);
 992        wmb();
 993        sh64_icache_inv_kernel_range(start, end);
 994}
 995
 996/****************************************************************************/
 997
 998void flush_icache_user_range(struct vm_area_struct *vma,
 999                        struct page *page, unsigned long addr, int len)
1000{

1001        /* Flush the range of user (defined by vma->vm_mm) address space
1002           starting at 'addr' for 'len' bytes from the cache.  The range does
1003           not straddle a page boundary, the unique physical page containing
1004           the range is 'page'.  This seems to be used mainly for invalidating
1005           an address range following a poke into the program text through the
1006           ptrace() call from another process (e.g. for BRK instruction
1007           insertion). */
1008
1009        sh64_dcache_purge_coloured_phy_page(page_to_phys(page), addr);
1010        mb();
1011
1012        if (vma->vm_flags & VM_EXEC) {
1013                sh64_icache_inv_user_small_range(vma->vm_mm, addr, len);
1014        }
1015}
1016
1017/*##########################################################################
1018                        ARCH/SH64 PRIVATE CALLABLE API.
1019  ##########################################################################*/
1020
1021void flush_cache_sigtramp(unsigned long start, unsigned long end)
1022{
1023        /* For the address range [start,end), write back the data from the
1024           D-cache and invalidate the corresponding region of the I-cache for
1025           the current process.  Used to flush signal trampolines on the stack
1026           to make them executable. */
1027
1028        sh64_dcache_wback_current_user_range(start, end);
1029        wmb();
1030        sh64_icache_inv_current_user_range(start, end);
1031}
1032
1033