linux/arch/tile/lib/memcpy_tile64.c
<<
>>
Prefs
   1/*
   2 * Copyright 2010 Tilera Corporation. All Rights Reserved.
   3 *
   4 *   This program is free software; you can redistribute it and/or
   5 *   modify it under the terms of the GNU General Public License
   6 *   as published by the Free Software Foundation, version 2.
   7 *
   8 *   This program is distributed in the hope that it will be useful, but
   9 *   WITHOUT ANY WARRANTY; without even the implied warranty of
  10 *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
  11 *   NON INFRINGEMENT.  See the GNU General Public License for
  12 *   more details.
  13 */
  14
  15#include <linux/string.h>
  16#include <linux/smp.h>
  17#include <linux/module.h>
  18#include <linux/uaccess.h>
  19#include <asm/fixmap.h>
  20#include <asm/kmap_types.h>
  21#include <asm/tlbflush.h>
  22#include <hv/hypervisor.h>
  23#include <arch/chip.h>
  24
  25
  26#if !CHIP_HAS_COHERENT_LOCAL_CACHE()
  27
  28/* Defined in memcpy.S */
  29extern unsigned long __memcpy_asm(void *to, const void *from, unsigned long n);
  30extern unsigned long __copy_to_user_inatomic_asm(
  31        void __user *to, const void *from, unsigned long n);
  32extern unsigned long __copy_from_user_inatomic_asm(
  33        void *to, const void __user *from, unsigned long n);
  34extern unsigned long __copy_from_user_zeroing_asm(
  35        void *to, const void __user *from, unsigned long n);
  36
  37typedef unsigned long (*memcpy_t)(void *, const void *, unsigned long);
  38
  39/* Size above which to consider TLB games for performance */
  40#define LARGE_COPY_CUTOFF 2048
  41
  42/* Communicate to the simulator what we are trying to do. */
  43#define sim_allow_multiple_caching(b) \
  44  __insn_mtspr(SPR_SIM_CONTROL, \
  45   SIM_CONTROL_ALLOW_MULTIPLE_CACHING | ((b) << _SIM_CONTROL_OPERATOR_BITS))
  46
  47/*
  48 * Copy memory by briefly enabling incoherent cacheline-at-a-time mode.
  49 *
  50 * We set up our own source and destination PTEs that we fully control.
  51 * This is the only way to guarantee that we don't race with another
  52 * thread that is modifying the PTE; we can't afford to try the
  53 * copy_{to,from}_user() technique of catching the interrupt, since
  54 * we must run with interrupts disabled to avoid the risk of some
  55 * other code seeing the incoherent data in our cache.  (Recall that
  56 * our cache is indexed by PA, so even if the other code doesn't use
  57 * our kmap_atomic virtual addresses, they'll still hit in cache using
  58 * the normal VAs that aren't supposed to hit in cache.)
  59 */
  60static void memcpy_multicache(void *dest, const void *source,
  61                              pte_t dst_pte, pte_t src_pte, int len)
  62{
  63        int idx;
  64        unsigned long flags, newsrc, newdst;
  65        pmd_t *pmdp;
  66        pte_t *ptep;
  67        int type0, type1;
  68        int cpu = get_cpu();
  69
  70        /*
  71         * Disable interrupts so that we don't recurse into memcpy()
  72         * in an interrupt handler, nor accidentally reference
  73         * the PA of the source from an interrupt routine.  Also
  74         * notify the simulator that we're playing games so we don't
  75         * generate spurious coherency warnings.
  76         */
  77        local_irq_save(flags);
  78        sim_allow_multiple_caching(1);
  79
  80        /* Set up the new dest mapping */
  81        type0 = kmap_atomic_idx_push();
  82        idx = FIX_KMAP_BEGIN + (KM_TYPE_NR * cpu) + type0;
  83        newdst = __fix_to_virt(idx) + ((unsigned long)dest & (PAGE_SIZE-1));
  84        pmdp = pmd_offset(pud_offset(pgd_offset_k(newdst), newdst), newdst);
  85        ptep = pte_offset_kernel(pmdp, newdst);
  86        if (pte_val(*ptep) != pte_val(dst_pte)) {
  87                set_pte(ptep, dst_pte);
  88                local_flush_tlb_page(NULL, newdst, PAGE_SIZE);
  89        }
  90
  91        /* Set up the new source mapping */
  92        type1 = kmap_atomic_idx_push();
  93        idx += (type0 - type1);
  94        src_pte = hv_pte_set_nc(src_pte);
  95        src_pte = hv_pte_clear_writable(src_pte);  /* be paranoid */
  96        newsrc = __fix_to_virt(idx) + ((unsigned long)source & (PAGE_SIZE-1));
  97        pmdp = pmd_offset(pud_offset(pgd_offset_k(newsrc), newsrc), newsrc);
  98        ptep = pte_offset_kernel(pmdp, newsrc);
  99        __set_pte(ptep, src_pte);   /* set_pte() would be confused by this */
 100        local_flush_tlb_page(NULL, newsrc, PAGE_SIZE);
 101
 102        /* Actually move the data. */
 103        __memcpy_asm((void *)newdst, (const void *)newsrc, len);
 104
 105        /*
 106         * Remap the source as locally-cached and not OLOC'ed so that
 107         * we can inval without also invaling the remote cpu's cache.
 108         * This also avoids known errata with inv'ing cacheable oloc data.
 109         */
 110        src_pte = hv_pte_set_mode(src_pte, HV_PTE_MODE_CACHE_NO_L3);
 111        src_pte = hv_pte_set_writable(src_pte); /* need write access for inv */
 112        __set_pte(ptep, src_pte);   /* set_pte() would be confused by this */
 113        local_flush_tlb_page(NULL, newsrc, PAGE_SIZE);
 114
 115        /*
 116         * Do the actual invalidation, covering the full L2 cache line
 117         * at the end since __memcpy_asm() is somewhat aggressive.
 118         */
 119        __inv_buffer((void *)newsrc, len);
 120
 121        /*
 122         * We're done: notify the simulator that all is back to normal,
 123         * and re-enable interrupts and pre-emption.
 124         */
 125        kmap_atomic_idx_pop();
 126        kmap_atomic_idx_pop();
 127        sim_allow_multiple_caching(0);
 128        local_irq_restore(flags);
 129        put_cpu();
 130}
 131
 132/*
 133 * Identify large copies from remotely-cached memory, and copy them
 134 * via memcpy_multicache() if they look good, otherwise fall back
 135 * to the particular kind of copying passed as the memcpy_t function.
 136 */
 137static unsigned long fast_copy(void *dest, const void *source, int len,
 138                               memcpy_t func)
 139{
 140        /*
 141         * Check if it's big enough to bother with.  We may end up doing a
 142         * small copy via TLB manipulation if we're near a page boundary,
 143         * but presumably we'll make it up when we hit the second page.
 144         */
 145        while (len >= LARGE_COPY_CUTOFF) {
 146                int copy_size, bytes_left_on_page;
 147                pte_t *src_ptep, *dst_ptep;
 148                pte_t src_pte, dst_pte;
 149                struct page *src_page, *dst_page;
 150
 151                /* Is the source page oloc'ed to a remote cpu? */
 152retry_source:
 153                src_ptep = virt_to_pte(current->mm, (unsigned long)source);
 154                if (src_ptep == NULL)
 155                        break;
 156                src_pte = *src_ptep;
 157                if (!hv_pte_get_present(src_pte) ||
 158                    !hv_pte_get_readable(src_pte) ||
 159                    hv_pte_get_mode(src_pte) != HV_PTE_MODE_CACHE_TILE_L3)
 160                        break;
 161                if (get_remote_cache_cpu(src_pte) == smp_processor_id())
 162                        break;
 163                src_page = pfn_to_page(pte_pfn(src_pte));
 164                get_page(src_page);
 165                if (pte_val(src_pte) != pte_val(*src_ptep)) {
 166                        put_page(src_page);
 167                        goto retry_source;
 168                }
 169                if (pte_huge(src_pte)) {
 170                        /* Adjust the PTE to correspond to a small page */
 171                        int pfn = pte_pfn(src_pte);
 172                        pfn += (((unsigned long)source & (HPAGE_SIZE-1))
 173                                >> PAGE_SHIFT);
 174                        src_pte = pfn_pte(pfn, src_pte);
 175                        src_pte = pte_mksmall(src_pte);
 176                }
 177
 178                /* Is the destination page writable? */
 179retry_dest:
 180                dst_ptep = virt_to_pte(current->mm, (unsigned long)dest);
 181                if (dst_ptep == NULL) {
 182                        put_page(src_page);
 183                        break;
 184                }
 185                dst_pte = *dst_ptep;
 186                if (!hv_pte_get_present(dst_pte) ||
 187                    !hv_pte_get_writable(dst_pte)) {
 188                        put_page(src_page);
 189                        break;
 190                }
 191                dst_page = pfn_to_page(pte_pfn(dst_pte));
 192                if (dst_page == src_page) {
 193                        /*
 194                         * Source and dest are on the same page; this
 195                         * potentially exposes us to incoherence if any
 196                         * part of src and dest overlap on a cache line.
 197                         * Just give up rather than trying to be precise.
 198                         */
 199                        put_page(src_page);
 200                        break;
 201                }
 202                get_page(dst_page);
 203                if (pte_val(dst_pte) != pte_val(*dst_ptep)) {
 204                        put_page(dst_page);
 205                        goto retry_dest;
 206                }
 207                if (pte_huge(dst_pte)) {
 208                        /* Adjust the PTE to correspond to a small page */
 209                        int pfn = pte_pfn(dst_pte);
 210                        pfn += (((unsigned long)dest & (HPAGE_SIZE-1))
 211                                >> PAGE_SHIFT);
 212                        dst_pte = pfn_pte(pfn, dst_pte);
 213                        dst_pte = pte_mksmall(dst_pte);
 214                }
 215
 216                /* All looks good: create a cachable PTE and copy from it */
 217                copy_size = len;
 218                bytes_left_on_page =
 219                        PAGE_SIZE - (((int)source) & (PAGE_SIZE-1));
 220                if (copy_size > bytes_left_on_page)
 221                        copy_size = bytes_left_on_page;
 222                bytes_left_on_page =
 223                        PAGE_SIZE - (((int)dest) & (PAGE_SIZE-1));
 224                if (copy_size > bytes_left_on_page)
 225                        copy_size = bytes_left_on_page;
 226                memcpy_multicache(dest, source, dst_pte, src_pte, copy_size);
 227
 228                /* Release the pages */
 229                put_page(dst_page);
 230                put_page(src_page);
 231
 232                /* Continue on the next page */
 233                dest += copy_size;
 234                source += copy_size;
 235                len -= copy_size;
 236        }
 237
 238        return func(dest, source, len);
 239}
 240
 241void *memcpy(void *to, const void *from, __kernel_size_t n)
 242{
 243        if (n < LARGE_COPY_CUTOFF)
 244                return (void *)__memcpy_asm(to, from, n);
 245        else
 246                return (void *)fast_copy(to, from, n, __memcpy_asm);
 247}
 248
 249unsigned long __copy_to_user_inatomic(void __user *to, const void *from,
 250                                      unsigned long n)
 251{
 252        if (n < LARGE_COPY_CUTOFF)
 253                return __copy_to_user_inatomic_asm(to, from, n);
 254        else
 255                return fast_copy(to, from, n, __copy_to_user_inatomic_asm);
 256}
 257
 258unsigned long __copy_from_user_inatomic(void *to, const void __user *from,
 259                                        unsigned long n)
 260{
 261        if (n < LARGE_COPY_CUTOFF)
 262                return __copy_from_user_inatomic_asm(to, from, n);
 263        else
 264                return fast_copy(to, from, n, __copy_from_user_inatomic_asm);
 265}
 266
 267unsigned long __copy_from_user_zeroing(void *to, const void __user *from,
 268                                       unsigned long n)
 269{
 270        if (n < LARGE_COPY_CUTOFF)
 271                return __copy_from_user_zeroing_asm(to, from, n);
 272        else
 273                return fast_copy(to, from, n, __copy_from_user_zeroing_asm);
 274}
 275
 276#endif /* !CHIP_HAS_COHERENT_LOCAL_CACHE() */
 277