1/* 2 * This file is subject to the terms and conditions of the GNU General Public 3 * License. See the file "COPYING" in the main directory of this archive 4 * for more details. 5 * 6 * arch/sh64/mm/cache.c 7 * 8 * Original version Copyright (C) 2000, 2001 Paolo Alberelli 9 * Second version Copyright (C) benedict.gaster@superh.com 2002 10 * Third version Copyright Richard.Curnow@superh.com 2003 11 * Hacks to third version Copyright (C) 2003 Paul Mundt 12 */ 13 14/****************************************************************************/ 15 16#include <linux/init.h> 17#include <linux/mman.h> 18#include <linux/mm.h> 19#include <linux/threads.h> 20#include <asm/page.h> 21#include <asm/pgtable.h> 22#include <asm/processor.h> 23#include <asm/cache.h> 24#include <asm/tlb.h> 25#include <asm/io.h> 26#include <asm/uaccess.h> 27#include <asm/mmu_context.h> 28#include <asm/pgalloc.h> /* for flush_itlb_range */ 29 30#include <linux/proc_fs.h> 31 32/* This function is in entry.S */ 33extern unsigned long switch_and_save_asid(unsigned long new_asid); 34 35/* Wired TLB entry for the D-cache */ 36static unsigned long long dtlb_cache_slot; 37 38/** 39 * sh64_cache_init() 40 * 41 * This is pretty much just a straightforward clone of the SH 42 * detect_cpu_and_cache_system(). 43 * 44 * This function is responsible for setting up all of the cache 45 * info dynamically as well as taking care of CPU probing and 46 * setting up the relevant subtype data. 47 * 48 * FIXME: For the time being, we only really support the SH5-101 49 * out of the box, and don't support dynamic probing for things 50 * like the SH5-103 or even cut2 of the SH5-101. Implement this 51 * later! 52 */ 53int __init sh64_cache_init(void) 54{ 55 /* 56 * First, setup some sane values for the I-cache. 57 */ 58 cpu_data->icache.ways = 4; 59 cpu_data->icache.sets = 256; 60 cpu_data->icache.linesz = L1_CACHE_BYTES; 61 62 /* 63 * FIXME: This can probably be cleaned up a bit as well.. for example, 64 * do we really need the way shift _and_ the way_step_shift ?? Judging 65 * by the existing code, I would guess no.. is there any valid reason 66 * why we need to be tracking this around? 67 */ 68 cpu_data->icache.way_shift = 13; 69 cpu_data->icache.entry_shift = 5; 70 cpu_data->icache.set_shift = 4; 71 cpu_data->icache.way_step_shift = 16; 72 cpu_data->icache.asid_shift = 2; 73 74 /* 75 * way offset = cache size / associativity, so just don't factor in 76 * associativity in the first place.. 77 */ 78 cpu_data->icache.way_ofs = cpu_data->icache.sets * 79 cpu_data->icache.linesz; 80 81 cpu_data->icache.asid_mask = 0x3fc; 82 cpu_data->icache.idx_mask = 0x1fe0; 83 cpu_data->icache.epn_mask = 0xffffe000; 84 cpu_data->icache.flags = 0; 85 86 /* 87 * Next, setup some sane values for the D-cache. 88 * 89 * On the SH5, these are pretty consistent with the I-cache settings, 90 * so we just copy over the existing definitions.. these can be fixed 91 * up later, especially if we add runtime CPU probing. 92 * 93 * Though in the meantime it saves us from having to duplicate all of 94 * the above definitions.. 95 */ 96 cpu_data->dcache = cpu_data->icache; 97 98 /* 99 * Setup any cache-related flags here 100 */ 101#if defined(CONFIG_DCACHE_WRITE_THROUGH) 102 set_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags)); 103#elif defined(CONFIG_DCACHE_WRITE_BACK) 104 set_bit(SH_CACHE_MODE_WB, &(cpu_data->dcache.flags)); 105#endif 106 107 /* 108 * We also need to reserve a slot for the D-cache in the DTLB, so we 109 * do this now .. 110 */ 111 dtlb_cache_slot = sh64_get_wired_dtlb_entry(); 112 113 return 0; 114} 115 116#ifdef CONFIG_DCACHE_DISABLED 117#define sh64_dcache_purge_all() do { } while (0) 118#define sh64_dcache_purge_coloured_phy_page(paddr, eaddr) do { } while (0) 119#define sh64_dcache_purge_user_range(mm, start, end) do { } while (0) 120#define sh64_dcache_purge_phy_page(paddr) do { } while (0) 121#define sh64_dcache_purge_virt_page(mm, eaddr) do { } while (0) 122#define sh64_dcache_purge_kernel_range(start, end) do { } while (0) 123#define sh64_dcache_wback_current_user_range(start, end) do { } while (0) 124#endif 125 126/*##########################################################################*/ 127 128/* From here onwards, a rewrite of the implementation, 129 by Richard.Curnow@superh.com. 130 131 The major changes in this compared to the old version are; 132 1. use more selective purging through OCBP instead of using ALLOCO to purge 133 by natural replacement. This avoids purging out unrelated cache lines 134 that happen to be in the same set. 135 2. exploit the APIs copy_user_page and clear_user_page better 136 3. be more selective about I-cache purging, in particular use invalidate_all 137 more sparingly. 138 139 */ 140 141/*########################################################################## 142 SUPPORT FUNCTIONS 143 ##########################################################################*/ 144 145/****************************************************************************/ 146/* The following group of functions deal with mapping and unmapping a temporary 147 page into the DTLB slot that have been set aside for our exclusive use. */ 148/* In order to accomplish this, we use the generic interface for adding and 149 removing a wired slot entry as defined in arch/sh64/mm/tlb.c */ 150/****************************************************************************/ 151 152static unsigned long slot_own_flags; 153 154static inline void sh64_setup_dtlb_cache_slot(unsigned long eaddr, unsigned long asid, unsigned long paddr) 155{ 156 local_irq_save(slot_own_flags); 157 sh64_setup_tlb_slot(dtlb_cache_slot, eaddr, asid, paddr); 158} 159 160static inline void sh64_teardown_dtlb_cache_slot(void) 161{ 162 sh64_teardown_tlb_slot(dtlb_cache_slot); 163 local_irq_restore(slot_own_flags); 164} 165 166/****************************************************************************/ 167 168#ifndef CONFIG_ICACHE_DISABLED 169 170static void __inline__ sh64_icache_inv_all(void) 171{ 172 unsigned long long addr, flag, data; 173 unsigned int flags; 174 175 addr=ICCR0; 176 flag=ICCR0_ICI; 177 data=0; 178 179 /* Make this a critical section for safety (probably not strictly necessary.) */ 180 local_irq_save(flags); 181 182 /* Without %1 it gets unexplicably wrong */ 183 asm volatile("getcfg %3, 0, %0\n\t" 184 "or %0, %2, %0\n\t" 185 "putcfg %3, 0, %0\n\t" 186 "synci" 187 : "=&r" (data) 188 : "0" (data), "r" (flag), "r" (addr)); 189 190 local_irq_restore(flags); 191} 192 193static void sh64_icache_inv_kernel_range(unsigned long start, unsigned long end) 194{ 195 /* Invalidate range of addresses [start,end] from the I-cache, where 196 * the addresses lie in the kernel superpage. */ 197 198 unsigned long long ullend, addr, aligned_start; 199#if (NEFF == 32) 200 aligned_start = (unsigned long long)(signed long long)(signed long) start; 201#else 202#error "NEFF != 32" 203#endif 204 aligned_start &= L1_CACHE_ALIGN_MASK; 205 addr = aligned_start; 206#if (NEFF == 32) 207 ullend = (unsigned long long) (signed long long) (signed long) end; 208#else 209#error "NEFF != 32" 210#endif 211 while (addr <= ullend) { 212 asm __volatile__ ("icbi %0, 0" : : "r" (addr)); 213 addr += L1_CACHE_BYTES; 214 } 215} 216 217static void sh64_icache_inv_user_page(struct vm_area_struct *vma, unsigned long eaddr) 218{ 219 /* If we get called, we know that vma->vm_flags contains VM_EXEC. 220 Also, eaddr is page-aligned. */ 221 222 unsigned long long addr, end_addr; 223 unsigned long flags = 0; 224 unsigned long running_asid, vma_asid; 225 addr = eaddr; 226 end_addr = addr + PAGE_SIZE; 227 228 /* Check whether we can use the current ASID for the I-cache 229 invalidation. For example, if we're called via 230 access_process_vm->flush_cache_page->here, (e.g. when reading from 231 /proc), 'running_asid' will be that of the reader, not of the 232 victim. 233 234 Also, note the risk that we might get pre-empted between the ASID 235 compare and blocking IRQs, and before we regain control, the 236 pid->ASID mapping changes. However, the whole cache will get 237 invalidated when the mapping is renewed, so the worst that can 238 happen is that the loop below ends up invalidating somebody else's 239 cache entries. 240 */ 241 242 running_asid = get_asid(); 243 vma_asid = (vma->vm_mm->context & MMU_CONTEXT_ASID_MASK); 244 if (running_asid != vma_asid) { 245 local_irq_save(flags); 246 switch_and_save_asid(vma_asid); 247 } 248 while (addr < end_addr) { 249 /* Worth unrolling a little */ 250 asm __volatile__("icbi %0, 0" : : "r" (addr)); 251 asm __volatile__("icbi %0, 32" : : "r" (addr)); 252 asm __volatile__("icbi %0, 64" : : "r" (addr)); 253 asm __volatile__("icbi %0, 96" : : "r" (addr)); 254 addr += 128; 255 } 256 if (running_asid != vma_asid) { 257 switch_and_save_asid(running_asid); 258 local_irq_restore(flags); 259 } 260} 261 262/****************************************************************************/ 263 264static void sh64_icache_inv_user_page_range(struct mm_struct *mm, 265 unsigned long start, unsigned long end) 266{ 267 /* Used for invalidating big chunks of I-cache, i.e. assume the range 268 is whole pages. If 'start' or 'end' is not page aligned, the code 269 is conservative and invalidates to the ends of the enclosing pages. 270 This is functionally OK, just a performance loss. */ 271 272 /* See the comments below in sh64_dcache_purge_user_range() regarding 273 the choice of algorithm. However, for the I-cache option (2) isn't 274 available because there are no physical tags so aliases can't be 275 resolved. The icbi instruction has to be used through the user 276 mapping. Because icbi is cheaper than ocbp on a cache hit, it 277 would be cheaper to use the selective code for a large range than is 278 possible with the D-cache. Just assume 64 for now as a working 279 figure. 280 */ 281 282 int n_pages; 283 284 if (!mm) return; 285 286 n_pages = ((end - start) >> PAGE_SHIFT); 287 if (n_pages >= 64) { 288 sh64_icache_inv_all(); 289 } else { 290 unsigned long aligned_start; 291 unsigned long eaddr; 292 unsigned long after_last_page_start; 293 unsigned long mm_asid, current_asid; 294 unsigned long long flags = 0ULL; 295 296 mm_asid = mm->context & MMU_CONTEXT_ASID_MASK; 297 current_asid = get_asid(); 298 299 if (mm_asid != current_asid) { 300 /* Switch ASID and run the invalidate loop under cli */ 301 local_irq_save(flags); 302 switch_and_save_asid(mm_asid); 303 } 304 305 aligned_start = start & PAGE_MASK; 306 after_last_page_start = PAGE_SIZE + ((end - 1) & PAGE_MASK); 307 308 while (aligned_start < after_last_page_start) { 309 struct vm_area_struct *vma; 310 unsigned long vma_end; 311 vma = find_vma(mm, aligned_start); 312 if (!vma || (aligned_start <= vma->vm_end)) { 313 /* Avoid getting stuck in an error condition */ 314 aligned_start += PAGE_SIZE; 315 continue; 316 } 317 vma_end = vma->vm_end; 318 if (vma->vm_flags & VM_EXEC) { 319 /* Executable */ 320 eaddr = aligned_start; 321 while (eaddr < vma_end) { 322 sh64_icache_inv_user_page(vma, eaddr); 323 eaddr += PAGE_SIZE; 324 } 325 } 326 aligned_start = vma->vm_end; /* Skip to start of next region */ 327 } 328 if (mm_asid != current_asid) { 329 switch_and_save_asid(current_asid); 330 local_irq_restore(flags); 331 } 332 } 333} 334 335static void sh64_icache_inv_user_small_range(struct mm_struct *mm, 336 unsigned long start, int len) 337{ 338 339 /* Invalidate a small range of user context I-cache, not necessarily 340 page (or even cache-line) aligned. */ 341 342 unsigned long long eaddr = start; 343 unsigned long long eaddr_end = start + len; 344 unsigned long current_asid, mm_asid; 345 unsigned long long flags; 346 unsigned long long epage_start; 347 348 /* Since this is used inside ptrace, the ASID in the mm context 349 typically won't match current_asid. We'll have to switch ASID to do 350 this. For safety, and given that the range will be small, do all 351 this under cli. 352 353 Note, there is a hazard that the ASID in mm->context is no longer 354 actually associated with mm, i.e. if the mm->context has started a 355 new cycle since mm was last active. However, this is just a 356 performance issue: all that happens is that we invalidate lines 357 belonging to another mm, so the owning process has to refill them 358 when that mm goes live again. mm itself can't have any cache 359 entries because there will have been a flush_cache_all when the new 360 mm->context cycle started. */ 361 362 /* Align to start of cache line. Otherwise, suppose len==8 and start 363 was at 32N+28 : the last 4 bytes wouldn't get invalidated. */ 364 eaddr = start & L1_CACHE_ALIGN_MASK; 365 eaddr_end = start + len; 366 367 local_irq_save(flags); 368 mm_asid = mm->context & MMU_CONTEXT_ASID_MASK; 369 current_asid = switch_and_save_asid(mm_asid); 370 371 epage_start = eaddr & PAGE_MASK; 372 373 while (eaddr < eaddr_end) 374 { 375 asm __volatile__("icbi %0, 0" : : "r" (eaddr)); 376 eaddr += L1_CACHE_BYTES; 377 } 378 switch_and_save_asid(current_asid); 379 local_irq_restore(flags); 380} 381 382static void sh64_icache_inv_current_user_range(unsigned long start, unsigned long end) 383{ 384 /* The icbi instruction never raises ITLBMISS. i.e. if there's not a 385 cache hit on the virtual tag the instruction ends there, without a 386 TLB lookup. */ 387 388 unsigned long long aligned_start; 389 unsigned long long ull_end; 390 unsigned long long addr; 391 392 ull_end = end; 393 394 /* Just invalidate over the range using the natural addresses. TLB 395 miss handling will be OK (TBC). Since it's for the current process, 396 either we're already in the right ASID context, or the ASIDs have 397 been recycled since we were last active in which case we might just 398 invalidate another processes I-cache entries : no worries, just a 399 performance drop for him. */ 400 aligned_start = start & L1_CACHE_ALIGN_MASK; 401 addr = aligned_start; 402 while (addr < ull_end) { 403 asm __volatile__ ("icbi %0, 0" : : "r" (addr)); 404 asm __volatile__ ("nop"); 405 asm __volatile__ ("nop"); 406 addr += L1_CACHE_BYTES; 407 } 408} 409 410#endif /* !CONFIG_ICACHE_DISABLED */ 411 412/****************************************************************************/ 413 414#ifndef CONFIG_DCACHE_DISABLED 415 416/* Buffer used as the target of alloco instructions to purge data from cache 417 sets by natural eviction. -- RPC */ 418#define DUMMY_ALLOCO_AREA_SIZE L1_CACHE_SIZE_BYTES + (1024 * 4) 419static unsigned char dummy_alloco_area[DUMMY_ALLOCO_AREA_SIZE] __cacheline_aligned = { 0, }; 420 421/****************************************************************************/ 422 423static void __inline__ sh64_dcache_purge_sets(int sets_to_purge_base, int n_sets) 424{ 425 /* Purge all ways in a particular block of sets, specified by the base 426 set number and number of sets. Can handle wrap-around, if that's 427 needed. */ 428 429 int dummy_buffer_base_set; 430 unsigned long long eaddr, eaddr0, eaddr1; 431 int j; 432 int set_offset; 433 434 dummy_buffer_base_set = ((int)&dummy_alloco_area & cpu_data->dcache.idx_mask) >> cpu_data->dcache.entry_shift; 435 set_offset = sets_to_purge_base - dummy_buffer_base_set; 436 437 for (j=0; j<n_sets; j++, set_offset++) { 438 set_offset &= (cpu_data->dcache.sets - 1); 439 eaddr0 = (unsigned long long)dummy_alloco_area + (set_offset << cpu_data->dcache.entry_shift); 440 441 /* Do one alloco which hits the required set per cache way. For 442 write-back mode, this will purge the #ways resident lines. There's 443 little point unrolling this loop because the allocos stall more if 444 they're too close together. */ 445 eaddr1 = eaddr0 + cpu_data->dcache.way_ofs * cpu_data->dcache.ways; 446 for (eaddr=eaddr0; eaddr<eaddr1; eaddr+=cpu_data->dcache.way_ofs) { 447 asm __volatile__ ("alloco %0, 0" : : "r" (eaddr)); 448 asm __volatile__ ("synco"); /* TAKum03020 */ 449 } 450 451 eaddr1 = eaddr0 + cpu_data->dcache.way_ofs * cpu_data->dcache.ways; 452 for (eaddr=eaddr0; eaddr<eaddr1; eaddr+=cpu_data->dcache.way_ofs) { 453 /* Load from each address. Required because alloco is a NOP if 454 the cache is write-through. Write-through is a config option. */ 455 if (test_bit(SH_CACHE_MODE_WT, &(cpu_data->dcache.flags))) 456 *(volatile unsigned char *)(int)eaddr; 457 } 458 } 459 460 /* Don't use OCBI to invalidate the lines. That costs cycles directly. 461 If the dummy block is just left resident, it will naturally get 462 evicted as required. */ 463 464 return; 465} 466 467/****************************************************************************/ 468 469static void sh64_dcache_purge_all(void) 470{ 471 /* Purge the entire contents of the dcache. The most efficient way to 472 achieve this is to use alloco instructions on a region of unused 473 memory equal in size to the cache, thereby causing the current 474 contents to be discarded by natural eviction. The alternative, 475 namely reading every tag, setting up a mapping for the corresponding 476 page and doing an OCBP for the line, would be much more expensive. 477 */ 478 479 sh64_dcache_purge_sets(0, cpu_data->dcache.sets); 480 481 return; 482 483} 484 485/****************************************************************************/ 486 487static void sh64_dcache_purge_kernel_range(unsigned long start, unsigned long end) 488{ 489 /* Purge the range of addresses [start,end] from the D-cache. The 490 addresses lie in the superpage mapping. There's no harm if we 491 overpurge at either end - just a small performance loss. */ 492 unsigned long long ullend, addr, aligned_start; 493#if (NEFF == 32) 494 aligned_start = (unsigned long long)(signed long long)(signed long) start; 495#else 496#error "NEFF != 32" 497#endif 498 aligned_start &= L1_CACHE_ALIGN_MASK; 499 addr = aligned_start; 500#if (NEFF == 32) 501 ullend = (unsigned long long) (signed long long) (signed long) end; 502#else 503#error "NEFF != 32" 504#endif 505 while (addr <= ullend) { 506 asm __volatile__ ("ocbp %0, 0" : : "r" (addr)); 507 addr += L1_CACHE_BYTES; 508 } 509 return; 510} 511 512/* Assumes this address (+ (2**n_synbits) pages up from it) aren't used for 513 anything else in the kernel */ 514#define MAGIC_PAGE0_START 0xffffffffec000000ULL 515 516static void sh64_dcache_purge_coloured_phy_page(unsigned long paddr, unsigned long eaddr) 517{ 518 /* Purge the physical page 'paddr' from the cache. It's known that any 519 cache lines requiring attention have the same page colour as the the 520 address 'eaddr'. 521 522 This relies on the fact that the D-cache matches on physical tags 523 when no virtual tag matches. So we create an alias for the original 524 page and purge through that. (Alternatively, we could have done 525 this by switching ASID to match the original mapping and purged 526 through that, but that involves ASID switching cost + probably a 527 TLBMISS + refill anyway.) 528 */ 529 530 unsigned long long magic_page_start; 531 unsigned long long magic_eaddr, magic_eaddr_end; 532 533 magic_page_start = MAGIC_PAGE0_START + (eaddr & CACHE_OC_SYN_MASK); 534 535 /* As long as the kernel is not pre-emptible, this doesn't need to be 536 under cli/sti. */ 537 538 sh64_setup_dtlb_cache_slot(magic_page_start, get_asid(), paddr); 539 540 magic_eaddr = magic_page_start; 541 magic_eaddr_end = magic_eaddr + PAGE_SIZE; 542 while (magic_eaddr < magic_eaddr_end) { 543 /* Little point in unrolling this loop - the OCBPs are blocking 544 and won't go any quicker (i.e. the loop overhead is parallel 545 to part of the OCBP execution.) */ 546 asm __volatile__ ("ocbp %0, 0" : : "r" (magic_eaddr)); 547 magic_eaddr += L1_CACHE_BYTES; 548 } 549 550 sh64_teardown_dtlb_cache_slot(); 551} 552 553/****************************************************************************/ 554 555static void sh64_dcache_purge_phy_page(unsigned long paddr) 556{ 557 /* Pure a page given its physical start address, by creating a 558 temporary 1 page mapping and purging across that. Even if we know 559 the virtual address (& vma or mm) of the page, the method here is 560 more elegant because it avoids issues of coping with page faults on 561 the purge instructions (i.e. no special-case code required in the 562 critical path in the TLB miss handling). */ 563 564 unsigned long long eaddr_start, eaddr, eaddr_end; 565 int i; 566 567 /* As long as the kernel is not pre-emptible, this doesn't need to be 568 under cli/sti. */ 569 570 eaddr_start = MAGIC_PAGE0_START; 571 for (i=0; i < (1 << CACHE_OC_N_SYNBITS); i++) { 572 sh64_setup_dtlb_cache_slot(eaddr_start, get_asid(), paddr); 573 574 eaddr = eaddr_start; 575 eaddr_end = eaddr + PAGE_SIZE; 576 while (eaddr < eaddr_end) { 577 asm __volatile__ ("ocbp %0, 0" : : "r" (eaddr)); 578 eaddr += L1_CACHE_BYTES; 579 } 580 581 sh64_teardown_dtlb_cache_slot(); 582 eaddr_start += PAGE_SIZE; 583 } 584} 585 586static void sh64_dcache_purge_user_pages(struct mm_struct *mm, 587 unsigned long addr, unsigned long end) 588{ 589 pgd_t *pgd; 590 pmd_t *pmd; 591 pte_t *pte; 592 pte_t entry; 593 spinlock_t *ptl; 594 unsigned long paddr; 595 596 if (!mm) 597 return; /* No way to find physical address of page */ 598 599 pgd = pgd_offset(mm, addr); 600 if (pgd_bad(*pgd)) 601 return; 602 603 pmd = pmd_offset(pgd, addr); 604 if (pmd_none(*pmd) || pmd_bad(*pmd)) 605 return; 606 607 pte = pte_offset_map_lock(mm, pmd, addr, &ptl); 608 do { 609 entry = *pte; 610 if (pte_none(entry) || !pte_present(entry)) 611 continue; 612 paddr = pte_val(entry) & PAGE_MASK; 613 sh64_dcache_purge_coloured_phy_page(paddr, addr); 614 } while (pte++, addr += PAGE_SIZE, addr != end); 615 pte_unmap_unlock(pte - 1, ptl); 616} 617/****************************************************************************/ 618 619static void sh64_dcache_purge_user_range(struct mm_struct *mm, 620 unsigned long start, unsigned long end) 621{ 622 /* There are at least 5 choices for the implementation of this, with 623 pros (+), cons(-), comments(*): 624 625 1. ocbp each line in the range through the original user's ASID 626 + no lines spuriously evicted 627 - tlbmiss handling (must either handle faults on demand => extra 628 special-case code in tlbmiss critical path), or map the page in 629 advance (=> flush_tlb_range in advance to avoid multiple hits) 630 - ASID switching 631 - expensive for large ranges 632 633 2. temporarily map each page in the range to a special effective 634 address and ocbp through the temporary mapping; relies on the 635 fact that SH-5 OCB* always do TLB lookup and match on ptags (they 636 never look at the etags) 637 + no spurious evictions 638 - expensive for large ranges 639 * surely cheaper than (1) 640 641 3. walk all the lines in the cache, check the tags, if a match 642 occurs create a page mapping to ocbp the line through 643 + no spurious evictions 644 - tag inspection overhead 645 - (especially for small ranges) 646 - potential cost of setting up/tearing down page mapping for 647 every line that matches the range 648 * cost partly independent of range size 649 650 4. walk all the lines in the cache, check the tags, if a match 651 occurs use 4 * alloco to purge the line (+3 other probably 652 innocent victims) by natural eviction 653 + no tlb mapping overheads 654 - spurious evictions 655 - tag inspection overhead 656 657 5. implement like flush_cache_all 658 + no tag inspection overhead 659 - spurious evictions 660 - bad for small ranges 661 662 (1) can be ruled out as more expensive than (2). (2) appears best 663 for small ranges. The choice between (3), (4) and (5) for large 664 ranges and the range size for the large/small boundary need 665 benchmarking to determine. 666 667 For now use approach (2) for small ranges and (5) for large ones. 668 669 */ 670 671 int n_pages; 672 673 n_pages = ((end - start) >> PAGE_SHIFT); 674 if (n_pages >= 64 || ((start ^ (end - 1)) & PMD_MASK)) { 675#if 1 676 sh64_dcache_purge_all(); 677#else 678 unsigned long long set, way; 679 unsigned long mm_asid = mm->context & MMU_CONTEXT_ASID_MASK; 680 for (set = 0; set < cpu_data->dcache.sets; set++) { 681 unsigned long long set_base_config_addr = CACHE_OC_ADDRESS_ARRAY + (set << cpu_data->dcache.set_shift); 682 for (way = 0; way < cpu_data->dcache.ways; way++) { 683 unsigned long long config_addr = set_base_config_addr + (way << cpu_data->dcache.way_step_shift); 684 unsigned long long tag0; 685 unsigned long line_valid; 686 687 asm __volatile__("getcfg %1, 0, %0" : "=r" (tag0) : "r" (config_addr)); 688 line_valid = tag0 & SH_CACHE_VALID; 689 if (line_valid) { 690 unsigned long cache_asid; 691 unsigned long epn; 692 693 cache_asid = (tag0 & cpu_data->dcache.asid_mask) >> cpu_data->dcache.asid_shift; 694 /* The next line needs some 695 explanation. The virtual tags 696 encode bits [31:13] of the virtual 697 address, bit [12] of the 'tag' being 698 implied by the cache set index. */ 699 epn = (tag0 & cpu_data->dcache.epn_mask) | ((set & 0x80) << cpu_data->dcache.entry_shift); 700 701 if ((cache_asid == mm_asid) && (start <= epn) && (epn < end)) { 702 /* TODO : could optimise this 703 call by batching multiple 704 adjacent sets together. */ 705 sh64_dcache_purge_sets(set, 1); 706 break; /* Don't waste time inspecting other ways for this set */ 707 } 708 } 709 } 710 } 711#endif 712 } else { 713 /* Small range, covered by a single page table page */ 714 start &= PAGE_MASK; /* should already be so */ 715 end = PAGE_ALIGN(end); /* should already be so */ 716 sh64_dcache_purge_user_pages(mm, start, end); 717 } 718 return; 719} 720 721static void sh64_dcache_wback_current_user_range(unsigned long start, unsigned long end) 722{ 723 unsigned long long aligned_start; 724 unsigned long long ull_end; 725 unsigned long long addr; 726 727 ull_end = end; 728 729 /* Just wback over the range using the natural addresses. TLB miss 730 handling will be OK (TBC) : the range has just been written to by 731 the signal frame setup code, so the PTEs must exist. 732 733 Note, if we have CONFIG_PREEMPT and get preempted inside this loop, 734 it doesn't matter, even if the pid->ASID mapping changes whilst 735 we're away. In that case the cache will have been flushed when the 736 mapping was renewed. So the writebacks below will be nugatory (and 737 we'll doubtless have to fault the TLB entry/ies in again with the 738 new ASID), but it's a rare case. 739 */ 740 aligned_start = start & L1_CACHE_ALIGN_MASK; 741 addr = aligned_start; 742 while (addr < ull_end) { 743 asm __volatile__ ("ocbwb %0, 0" : : "r" (addr)); 744 addr += L1_CACHE_BYTES; 745 } 746} 747 748/****************************************************************************/ 749 750/* These *MUST* lie in an area of virtual address space that's otherwise unused. */ 751#define UNIQUE_EADDR_START 0xe0000000UL 752#define UNIQUE_EADDR_END 0xe8000000UL 753 754static unsigned long sh64_make_unique_eaddr(unsigned long user_eaddr, unsigned long paddr) 755{ 756 /* Given a physical address paddr, and a user virtual address 757 user_eaddr which will eventually be mapped to it, create a one-off 758 kernel-private eaddr mapped to the same paddr. This is used for 759 creating special destination pages for copy_user_page and 760 clear_user_page */ 761 762 static unsigned long current_pointer = UNIQUE_EADDR_START; 763 unsigned long coloured_pointer; 764 765 if (current_pointer == UNIQUE_EADDR_END) { 766 sh64_dcache_purge_all(); 767 current_pointer = UNIQUE_EADDR_START; 768 } 769 770 coloured_pointer = (current_pointer & ~CACHE_OC_SYN_MASK) | (user_eaddr & CACHE_OC_SYN_MASK); 771 sh64_setup_dtlb_cache_slot(coloured_pointer, get_asid(), paddr); 772 773 current_pointer += (PAGE_SIZE << CACHE_OC_N_SYNBITS); 774 775 return coloured_pointer; 776} 777 778/****************************************************************************/ 779 780static void sh64_copy_user_page_coloured(void *to, void *from, unsigned long address) 781{ 782 void *coloured_to; 783 784 /* Discard any existing cache entries of the wrong colour. These are 785 present quite often, if the kernel has recently used the page 786 internally, then given it up, then it's been allocated to the user. 787 */ 788 sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long) to); 789 790 coloured_to = (void *) sh64_make_unique_eaddr(address, __pa(to)); 791 sh64_page_copy(from, coloured_to); 792 793 sh64_teardown_dtlb_cache_slot(); 794} 795 796static void sh64_clear_user_page_coloured(void *to, unsigned long address) 797{ 798 void *coloured_to; 799 800 /* Discard any existing kernel-originated lines of the wrong colour (as 801 above) */ 802 sh64_dcache_purge_coloured_phy_page(__pa(to), (unsigned long) to); 803 804 coloured_to = (void *) sh64_make_unique_eaddr(address, __pa(to)); 805 sh64_page_clear(coloured_to); 806 807 sh64_teardown_dtlb_cache_slot(); 808} 809 810#endif /* !CONFIG_DCACHE_DISABLED */ 811 812/****************************************************************************/ 813 814/*########################################################################## 815 EXTERNALLY CALLABLE API. 816 ##########################################################################*/ 817 818/* These functions are described in Documentation/cachetlb.txt. 819 Each one of these functions varies in behaviour depending on whether the 820 I-cache and/or D-cache are configured out. 821 822 Note that the Linux term 'flush' corresponds to what is termed 'purge' in 823 the sh/sh64 jargon for the D-cache, i.e. write back dirty data then 824 invalidate the cache lines, and 'invalidate' for the I-cache. 825 */ 826 827#undef FLUSH_TRACE 828 829void flush_cache_all(void) 830{ 831 /* Invalidate the entire contents of both caches, after writing back to 832 memory any dirty data from the D-cache. */ 833 sh64_dcache_purge_all(); 834 sh64_icache_inv_all(); 835} 836 837/****************************************************************************/ 838 839void flush_cache_mm(struct mm_struct *mm) 840{ 841 /* Invalidate an entire user-address space from both caches, after 842 writing back dirty data (e.g. for shared mmap etc). */ 843 844 /* This could be coded selectively by inspecting all the tags then 845 doing 4*alloco on any set containing a match (as for 846 flush_cache_range), but fork/exit/execve (where this is called from) 847 are expensive anyway. */ 848 849 /* Have to do a purge here, despite the comments re I-cache below. 850 There could be odd-coloured dirty data associated with the mm still 851 in the cache - if this gets written out through natural eviction 852 after the kernel has reused the page there will be chaos. 853 */ 854 855 sh64_dcache_purge_all(); 856 857 /* The mm being torn down won't ever be active again, so any Icache 858 lines tagged with its ASID won't be visible for the rest of the 859 lifetime of this ASID cycle. Before the ASID gets reused, there 860 will be a flush_cache_all. Hence we don't need to touch the 861 I-cache. This is similar to the lack of action needed in 862 flush_tlb_mm - see fault.c. */ 863} 864 865/****************************************************************************/ 866 867void flush_cache_range(struct vm_area_struct *vma, unsigned long start, 868 unsigned long end) 869{ 870 struct mm_struct *mm = vma->vm_mm; 871 872 /* Invalidate (from both caches) the range [start,end) of virtual 873 addresses from the user address space specified by mm, after writing 874 back any dirty data. 875 876 Note, 'end' is 1 byte beyond the end of the range to flush. */ 877 878 sh64_dcache_purge_user_range(mm, start, end); 879 sh64_icache_inv_user_page_range(mm, start, end); 880} 881 882/****************************************************************************/ 883 884void flush_cache_page(struct vm_area_struct *vma, unsigned long eaddr, unsigned long pfn) 885{ 886 /* Invalidate any entries in either cache for the vma within the user 887 address space vma->vm_mm for the page starting at virtual address 888 'eaddr'. This seems to be used primarily in breaking COW. Note, 889 the I-cache must be searched too in case the page in question is 890 both writable and being executed from (e.g. stack trampolines.) 891 892 Note, this is called with pte lock held. 893 */ 894 895 sh64_dcache_purge_phy_page(pfn << PAGE_SHIFT); 896 897 if (vma->vm_flags & VM_EXEC) { 898 sh64_icache_inv_user_page(vma, eaddr); 899 } 900} 901 902/****************************************************************************/ 903 904#ifndef CONFIG_DCACHE_DISABLED 905 906void copy_user_page(void *to, void *from, unsigned long address, struct page *page) 907{ 908 /* 'from' and 'to' are kernel virtual addresses (within the superpage 909 mapping of the physical RAM). 'address' is the user virtual address 910 where the copy 'to' will be mapped after. This allows a custom 911 mapping to be used to ensure that the new copy is placed in the 912 right cache sets for the user to see it without having to bounce it 913 out via memory. Note however : the call to flush_page_to_ram in 914 (generic)/mm/memory.c:(break_cow) undoes all this good work in that one 915 very important case! 916 917 TBD : can we guarantee that on every call, any cache entries for 918 'from' are in the same colour sets as 'address' also? i.e. is this 919 always used just to deal with COW? (I suspect not). */ 920 921 /* There are two possibilities here for when the page 'from' was last accessed: 922 * by the kernel : this is OK, no purge required. 923 * by the/a user (e.g. for break_COW) : need to purge. 924 925 If the potential user mapping at 'address' is the same colour as 926 'from' there is no need to purge any cache lines from the 'from' 927 page mapped into cache sets of colour 'address'. (The copy will be 928 accessing the page through 'from'). 929 */ 930 931 if (((address ^ (unsigned long) from) & CACHE_OC_SYN_MASK) != 0) { 932 sh64_dcache_purge_coloured_phy_page(__pa(from), address); 933 } 934 935 if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0) { 936 /* No synonym problem on destination */ 937 sh64_page_copy(from, to); 938 } else { 939 sh64_copy_user_page_coloured(to, from, address); 940 } 941 942 /* Note, don't need to flush 'from' page from the cache again - it's 943 done anyway by the generic code */ 944} 945 946void clear_user_page(void *to, unsigned long address, struct page *page) 947{ 948 /* 'to' is a kernel virtual address (within the superpage 949 mapping of the physical RAM). 'address' is the user virtual address 950 where the 'to' page will be mapped after. This allows a custom 951 mapping to be used to ensure that the new copy is placed in the 952 right cache sets for the user to see it without having to bounce it 953 out via memory. 954 */ 955 956 if (((address ^ (unsigned long) to) & CACHE_OC_SYN_MASK) == 0) { 957 /* No synonym problem on destination */ 958 sh64_page_clear(to); 959 } else { 960 sh64_clear_user_page_coloured(to, address); 961 } 962} 963 964#endif /* !CONFIG_DCACHE_DISABLED */ 965 966/****************************************************************************/ 967 968void flush_dcache_page(struct page *page) 969{ 970 sh64_dcache_purge_phy_page(page_to_phys(page)); 971 wmb(); 972} 973 974/****************************************************************************/ 975 976void flush_icache_range(unsigned long start, unsigned long end) 977{ 978 /* Flush the range [start,end] of kernel virtual adddress space from 979 the I-cache. The corresponding range must be purged from the 980 D-cache also because the SH-5 doesn't have cache snooping between 981 the caches. The addresses will be visible through the superpage 982 mapping, therefore it's guaranteed that there no cache entries for 983 the range in cache sets of the wrong colour. 984 985 Primarily used for cohering the I-cache after a module has 986 been loaded. */ 987 988 /* We also make sure to purge the same range from the D-cache since 989 flush_page_to_ram() won't be doing this for us! */ 990 991 sh64_dcache_purge_kernel_range(start, end); 992 wmb(); 993 sh64_icache_inv_kernel_range(start, end); 994} 995 996/****************************************************************************/ 997 998void flush_icache_user_range(struct vm_area_struct *vma, 999 struct page *page, unsigned long addr, int len) 1000{
1001 /* Flush the range of user (defined by vma->vm_mm) address space 1002 starting at 'addr' for 'len' bytes from the cache. The range does 1003 not straddle a page boundary, the unique physical page containing 1004 the range is 'page'. This seems to be used mainly for invalidating 1005 an address range following a poke into the program text through the 1006 ptrace() call from another process (e.g. for BRK instruction 1007 insertion). */ 1008 1009 sh64_dcache_purge_coloured_phy_page(page_to_phys(page), addr); 1010 mb(); 1011 1012 if (vma->vm_flags & VM_EXEC) { 1013 sh64_icache_inv_user_small_range(vma->vm_mm, addr, len); 1014 } 1015} 1016 1017/*########################################################################## 1018 ARCH/SH64 PRIVATE CALLABLE API. 1019 ##########################################################################*/ 1020 1021void flush_cache_sigtramp(unsigned long start, unsigned long end) 1022{ 1023 /* For the address range [start,end), write back the data from the 1024 D-cache and invalidate the corresponding region of the I-cache for 1025 the current process. Used to flush signal trampolines on the stack 1026 to make them executable. */ 1027 1028 sh64_dcache_wback_current_user_range(start, end); 1029 wmb(); 1030 sh64_icache_inv_current_user_range(start, end); 1031} 1032 1033