linux/arch/sparc/mm/gup.c
<<
>>
Prefs
   1/*
   2 * Lockless get_user_pages_fast for sparc, cribbed from powerpc
   3 *
   4 * Copyright (C) 2008 Nick Piggin
   5 * Copyright (C) 2008 Novell Inc.
   6 */
   7
   8#include <linux/sched.h>
   9#include <linux/mm.h>
  10#include <linux/vmstat.h>
  11#include <linux/pagemap.h>
  12#include <linux/rwsem.h>
  13#include <asm/pgtable.h>
  14
  15/*
  16 * The performance critical leaf functions are made noinline otherwise gcc
  17 * inlines everything into a single function which results in too much
  18 * register pressure.
  19 */
  20static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
  21                unsigned long end, int write, struct page **pages, int *nr)
  22{
  23        unsigned long mask, result;
  24        pte_t *ptep;
  25
  26        if (tlb_type == hypervisor) {
  27                result = _PAGE_PRESENT_4V|_PAGE_P_4V;
  28                if (write)
  29                        result |= _PAGE_WRITE_4V;
  30        } else {
  31                result = _PAGE_PRESENT_4U|_PAGE_P_4U;
  32                if (write)
  33                        result |= _PAGE_WRITE_4U;
  34        }
  35        mask = result | _PAGE_SPECIAL;
  36
  37        ptep = pte_offset_kernel(&pmd, addr);
  38        do {
  39                struct page *page, *head;
  40                pte_t pte = *ptep;
  41
  42                if ((pte_val(pte) & mask) != result)
  43                        return 0;
  44                VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
  45
  46                /* The hugepage case is simplified on sparc64 because
  47                 * we encode the sub-page pfn offsets into the
  48                 * hugepage PTEs.  We could optimize this in the future
  49                 * use page_cache_add_speculative() for the hugepage case.
  50                 */
  51                page = pte_page(pte);
  52                head = compound_head(page);
  53                if (!page_cache_get_speculative(head))
  54                        return 0;
  55                if (unlikely(pte_val(pte) != pte_val(*ptep))) {
  56                        put_page(head);
  57                        return 0;
  58                }
  59                if (head != page)
  60                        get_huge_page_tail(page);
  61
  62                pages[*nr] = page;
  63                (*nr)++;
  64        } while (ptep++, addr += PAGE_SIZE, addr != end);
  65
  66        return 1;
  67}
  68
  69static int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
  70                        unsigned long end, int write, struct page **pages,
  71                        int *nr)
  72{
  73        struct page *head, *page, *tail;
  74        int refs;
  75
  76        if (!(pmd_val(pmd) & _PAGE_VALID))
  77                return 0;
  78
  79        if (write && !pmd_write(pmd))
  80                return 0;
  81
  82        refs = 0;
  83        head = pmd_page(pmd);
  84        page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
  85        tail = page;
  86        do {
  87                VM_BUG_ON(compound_head(page) != head);
  88                pages[*nr] = page;
  89                (*nr)++;
  90                page++;
  91                refs++;
  92        } while (addr += PAGE_SIZE, addr != end);
  93
  94        if (!page_cache_add_speculative(head, refs)) {
  95                *nr -= refs;
  96                return 0;
  97        }
  98
  99        if (unlikely(pmd_val(pmd) != pmd_val(*pmdp))) {
 100                *nr -= refs;
 101                while (refs--)
 102                        put_page(head);
 103                return 0;
 104        }
 105
 106        /* Any tail page need their mapcount reference taken before we
 107         * return.
 108         */
 109        while (refs--) {
 110                if (PageTail(tail))
 111                        get_huge_page_tail(tail);
 112                tail++;
 113        }
 114
 115        return 1;
 116}
 117
 118static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
 119                int write, struct page **pages, int *nr)
 120{
 121        unsigned long next;
 122        pmd_t *pmdp;
 123
 124        pmdp = pmd_offset(&pud, addr);
 125        do {
 126                pmd_t pmd = *pmdp;
 127
 128                next = pmd_addr_end(addr, end);
 129                if (pmd_none(pmd) || pmd_trans_splitting(pmd))
 130                        return 0;
 131                if (unlikely(pmd_large(pmd))) {
 132                        if (!gup_huge_pmd(pmdp, pmd, addr, next,
 133                                          write, pages, nr))
 134                                return 0;
 135                } else if (!gup_pte_range(pmd, addr, next, write,
 136                                          pages, nr))
 137                        return 0;
 138        } while (pmdp++, addr = next, addr != end);
 139
 140        return 1;
 141}
 142
 143static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
 144                int write, struct page **pages, int *nr)
 145{
 146        unsigned long next;
 147        pud_t *pudp;
 148
 149        pudp = pud_offset(&pgd, addr);
 150        do {
 151                pud_t pud = *pudp;
 152
 153                next = pud_addr_end(addr, end);
 154                if (pud_none(pud))
 155                        return 0;
 156                if (!gup_pmd_range(pud, addr, next, write, pages, nr))
 157                        return 0;
 158        } while (pudp++, addr = next, addr != end);
 159
 160        return 1;
 161}
 162
 163int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
 164                          struct page **pages)
 165{
 166        struct mm_struct *mm = current->mm;
 167        unsigned long addr, len, end;
 168        unsigned long next, flags;
 169        pgd_t *pgdp;
 170        int nr = 0;
 171
 172        start &= PAGE_MASK;
 173        addr = start;
 174        len = (unsigned long) nr_pages << PAGE_SHIFT;
 175        end = start + len;
 176
 177        local_irq_save(flags);
 178        pgdp = pgd_offset(mm, addr);
 179        do {
 180                pgd_t pgd = *pgdp;
 181
 182                next = pgd_addr_end(addr, end);
 183                if (pgd_none(pgd))
 184                        break;
 185                if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
 186                        break;
 187        } while (pgdp++, addr = next, addr != end);
 188        local_irq_restore(flags);
 189
 190        return nr;
 191}
 192
 193int get_user_pages_fast(unsigned long start, int nr_pages, int write,
 194                        struct page **pages)
 195{
 196        struct mm_struct *mm = current->mm;
 197        unsigned long addr, len, end;
 198        unsigned long next;
 199        pgd_t *pgdp;
 200        int nr = 0;
 201
 202        start &= PAGE_MASK;
 203        addr = start;
 204        len = (unsigned long) nr_pages << PAGE_SHIFT;
 205        end = start + len;
 206
 207        /*
 208         * XXX: batch / limit 'nr', to avoid large irq off latency
 209         * needs some instrumenting to determine the common sizes used by
 210         * important workloads (eg. DB2), and whether limiting the batch size
 211         * will decrease performance.
 212         *
 213         * It seems like we're in the clear for the moment. Direct-IO is
 214         * the main guy that batches up lots of get_user_pages, and even
 215         * they are limited to 64-at-a-time which is not so many.
 216         */
 217        /*
 218         * This doesn't prevent pagetable teardown, but does prevent
 219         * the pagetables from being freed on sparc.
 220         *
 221         * So long as we atomically load page table pointers versus teardown,
 222         * we can follow the address down to the the page and take a ref on it.
 223         */
 224        local_irq_disable();
 225
 226        pgdp = pgd_offset(mm, addr);
 227        do {
 228                pgd_t pgd = *pgdp;
 229
 230                next = pgd_addr_end(addr, end);
 231                if (pgd_none(pgd))
 232                        goto slow;
 233                if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
 234                        goto slow;
 235        } while (pgdp++, addr = next, addr != end);
 236
 237        local_irq_enable();
 238
 239        VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
 240        return nr;
 241
 242        {
 243                int ret;
 244
 245slow:
 246                local_irq_enable();
 247
 248                /* Try to get the remaining pages with get_user_pages */
 249                start += nr << PAGE_SHIFT;
 250                pages += nr;
 251
 252                ret = get_user_pages_unlocked(current, mm, start,
 253                        (end - start) >> PAGE_SHIFT, write, 0, pages);
 254
 255                /* Have to be a bit careful with return values */
 256                if (nr > 0) {
 257                        if (ret < 0)
 258                                ret = nr;
 259                        else
 260                                ret += nr;
 261                }
 262
 263                return ret;
 264        }
 265}
 266