linux/arch/sparc/mm/gup.c
<<
>>
Prefs
   1/*
   2 * Lockless get_user_pages_fast for sparc, cribbed from powerpc
   3 *
   4 * Copyright (C) 2008 Nick Piggin
   5 * Copyright (C) 2008 Novell Inc.
   6 */
   7
   8#include <linux/sched.h>
   9#include <linux/mm.h>
  10#include <linux/vmstat.h>
  11#include <linux/pagemap.h>
  12#include <linux/rwsem.h>
  13#include <asm/pgtable.h>
  14
  15/*
  16 * The performance critical leaf functions are made noinline otherwise gcc
  17 * inlines everything into a single function which results in too much
  18 * register pressure.
  19 */
  20static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
  21                unsigned long end, int write, struct page **pages, int *nr)
  22{
  23        unsigned long mask, result;
  24        pte_t *ptep;
  25
  26        if (tlb_type == hypervisor) {
  27                result = _PAGE_PRESENT_4V|_PAGE_P_4V;
  28                if (write)
  29                        result |= _PAGE_WRITE_4V;
  30        } else {
  31                result = _PAGE_PRESENT_4U|_PAGE_P_4U;
  32                if (write)
  33                        result |= _PAGE_WRITE_4U;
  34        }
  35        mask = result | _PAGE_SPECIAL;
  36
  37        ptep = pte_offset_kernel(&pmd, addr);
  38        do {
  39                struct page *page, *head;
  40                pte_t pte = *ptep;
  41
  42                if ((pte_val(pte) & mask) != result)
  43                        return 0;
  44                VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
  45
  46                /* The hugepage case is simplified on sparc64 because
  47                 * we encode the sub-page pfn offsets into the
  48                 * hugepage PTEs.  We could optimize this in the future
  49                 * use page_cache_add_speculative() for the hugepage case.
  50                 */
  51                page = pte_page(pte);
  52                head = compound_head(page);
  53                if (!page_cache_get_speculative(head))
  54                        return 0;
  55                if (unlikely(pte_val(pte) != pte_val(*ptep))) {
  56                        put_page(head);
  57                        return 0;
  58                }
  59                if (head != page)
  60                        get_huge_page_tail(page);
  61
  62                pages[*nr] = page;
  63                (*nr)++;
  64        } while (ptep++, addr += PAGE_SIZE, addr != end);
  65
  66        return 1;
  67}
  68
  69static int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
  70                        unsigned long end, int write, struct page **pages,
  71                        int *nr)
  72{
  73        struct page *head, *page, *tail;
  74        u32 mask;
  75        int refs;
  76
  77        mask = PMD_HUGE_PRESENT;
  78        if (write)
  79                mask |= PMD_HUGE_WRITE;
  80        if ((pmd_val(pmd) & mask) != mask)
  81                return 0;
  82
  83        refs = 0;
  84        head = pmd_page(pmd);
  85        page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
  86        tail = page;
  87        do {
  88                VM_BUG_ON(compound_head(page) != head);
  89                pages[*nr] = page;
  90                (*nr)++;
  91                page++;
  92                refs++;
  93        } while (addr += PAGE_SIZE, addr != end);
  94
  95        if (!page_cache_add_speculative(head, refs)) {
  96                *nr -= refs;
  97                return 0;
  98        }
  99
 100        if (unlikely(pmd_val(pmd) != pmd_val(*pmdp))) {
 101                *nr -= refs;
 102                while (refs--)
 103                        put_page(head);
 104                return 0;
 105        }
 106
 107        /* Any tail page need their mapcount reference taken before we
 108         * return.
 109         */
 110        while (refs--) {
 111                if (PageTail(tail))
 112                        get_huge_page_tail(tail);
 113                tail++;
 114        }
 115
 116        return 1;
 117}
 118
 119static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
 120                int write, struct page **pages, int *nr)
 121{
 122        unsigned long next;
 123        pmd_t *pmdp;
 124
 125        pmdp = pmd_offset(&pud, addr);
 126        do {
 127                pmd_t pmd = *pmdp;
 128
 129                next = pmd_addr_end(addr, end);
 130                if (pmd_none(pmd) || pmd_trans_splitting(pmd))
 131                        return 0;
 132                if (unlikely(pmd_large(pmd))) {
 133                        if (!gup_huge_pmd(pmdp, pmd, addr, next,
 134                                          write, pages, nr))
 135                                return 0;
 136                } else if (!gup_pte_range(pmd, addr, next, write,
 137                                          pages, nr))
 138                        return 0;
 139        } while (pmdp++, addr = next, addr != end);
 140
 141        return 1;
 142}
 143
 144static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
 145                int write, struct page **pages, int *nr)
 146{
 147        unsigned long next;
 148        pud_t *pudp;
 149
 150        pudp = pud_offset(&pgd, addr);
 151        do {
 152                pud_t pud = *pudp;
 153
 154                next = pud_addr_end(addr, end);
 155                if (pud_none(pud))
 156                        return 0;
 157                if (!gup_pmd_range(pud, addr, next, write, pages, nr))
 158                        return 0;
 159        } while (pudp++, addr = next, addr != end);
 160
 161        return 1;
 162}
 163
 164int get_user_pages_fast(unsigned long start, int nr_pages, int write,
 165                        struct page **pages)
 166{
 167        struct mm_struct *mm = current->mm;
 168        unsigned long addr, len, end;
 169        unsigned long next;
 170        pgd_t *pgdp;
 171        int nr = 0;
 172
 173        start &= PAGE_MASK;
 174        addr = start;
 175        len = (unsigned long) nr_pages << PAGE_SHIFT;
 176        end = start + len;
 177
 178        /*
 179         * XXX: batch / limit 'nr', to avoid large irq off latency
 180         * needs some instrumenting to determine the common sizes used by
 181         * important workloads (eg. DB2), and whether limiting the batch size
 182         * will decrease performance.
 183         *
 184         * It seems like we're in the clear for the moment. Direct-IO is
 185         * the main guy that batches up lots of get_user_pages, and even
 186         * they are limited to 64-at-a-time which is not so many.
 187         */
 188        /*
 189         * This doesn't prevent pagetable teardown, but does prevent
 190         * the pagetables from being freed on sparc.
 191         *
 192         * So long as we atomically load page table pointers versus teardown,
 193         * we can follow the address down to the the page and take a ref on it.
 194         */
 195        local_irq_disable();
 196
 197        pgdp = pgd_offset(mm, addr);
 198        do {
 199                pgd_t pgd = *pgdp;
 200
 201                next = pgd_addr_end(addr, end);
 202                if (pgd_none(pgd))
 203                        goto slow;
 204                if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
 205                        goto slow;
 206        } while (pgdp++, addr = next, addr != end);
 207
 208        local_irq_enable();
 209
 210        VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT);
 211        return nr;
 212
 213        {
 214                int ret;
 215
 216slow:
 217                local_irq_enable();
 218
 219                /* Try to get the remaining pages with get_user_pages */
 220                start += nr << PAGE_SHIFT;
 221                pages += nr;
 222
 223                down_read(&mm->mmap_sem);
 224                ret = get_user_pages(current, mm, start,
 225                        (end - start) >> PAGE_SHIFT, write, 0, pages, NULL);
 226                up_read(&mm->mmap_sem);
 227
 228                /* Have to be a bit careful with return values */
 229                if (nr > 0) {
 230                        if (ret < 0)
 231                                ret = nr;
 232                        else
 233                                ret += nr;
 234                }
 235
 236                return ret;
 237        }
 238}
 239