LXR linux/include/asm-generic/tlb.h

   1/* include/asm-generic/tlb.h
   2 *
   3 *      Generic TLB shootdown code
   4 *
   5 * Copyright 2001 Red Hat, Inc.
   6 * Based on code from mm/memory.c Copyright Linus Torvalds and others.
   7 *
   8 * Copyright 2011 Red Hat, Inc., Peter Zijlstra
   9 *
  10 * This program is free software; you can redistribute it and/or
  11 * modify it under the terms of the GNU General Public License
  12 * as published by the Free Software Foundation; either version
  13 * 2 of the License, or (at your option) any later version.
  14 */
  15#ifndef _ASM_GENERIC__TLB_H
  16#define _ASM_GENERIC__TLB_H
  17
  18#include <linux/mmu_notifier.h>
  19#include <linux/swap.h>
  20#include <asm/pgalloc.h>
  21#include <asm/tlbflush.h>
  22
  23/*
  24 * Blindly accessing user memory from NMI context can be dangerous
  25 * if we're in the middle of switching the current user task or switching
  26 * the loaded mm.
  27 */
  28#ifndef nmi_uaccess_okay
  29# define nmi_uaccess_okay() true
  30#endif
  31
  32#ifdef CONFIG_MMU
  33
  34#ifdef CONFIG_HAVE_RCU_TABLE_FREE
  35/*
  36 * Semi RCU freeing of the page directories.
  37 *
  38 * This is needed by some architectures to implement software pagetable walkers.
  39 *
  40 * gup_fast() and other software pagetable walkers do a lockless page-table
  41 * walk and therefore needs some synchronization with the freeing of the page
  42 * directories. The chosen means to accomplish that is by disabling IRQs over
  43 * the walk.
  44 *
  45 * Architectures that use IPIs to flush TLBs will then automagically DTRT,
  46 * since we unlink the page, flush TLBs, free the page. Since the disabling of
  47 * IRQs delays the completion of the TLB flush we can never observe an already
  48 * freed page.
  49 *
  50 * Architectures that do not have this (PPC) need to delay the freeing by some
  51 * other means, this is that means.
  52 *
  53 * What we do is batch the freed directory pages (tables) and RCU free them.
  54 * We use the sched RCU variant, as that guarantees that IRQ/preempt disabling
  55 * holds off grace periods.
  56 *
  57 * However, in order to batch these pages we need to allocate storage, this
  58 * allocation is deep inside the MM code and can thus easily fail on memory
  59 * pressure. To guarantee progress we fall back to single table freeing, see
  60 * the implementation of tlb_remove_table_one().
  61 *
  62 */
  63struct mmu_table_batch {
  64        struct rcu_head         rcu;
  65        unsigned int            nr;
  66        void                    *tables[0];
  67};
  68
  69#define MAX_TABLE_BATCH         \
  70        ((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *))
  71
  72extern void tlb_table_flush(struct mmu_gather *tlb);
  73extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
  74
  75#endif
  76
  77/*
  78 * If we can't allocate a page to make a big batch of page pointers
  79 * to work on, then just handle a few from the on-stack structure.
  80 */
  81#define MMU_GATHER_BUNDLE       8
  82
  83struct mmu_gather_batch {
  84        struct mmu_gather_batch *next;
  85        unsigned int            nr;
  86        unsigned int            max;
  87        struct page             *pages[0];
  88};
  89
  90#define MAX_GATHER_BATCH        \
  91        ((PAGE_SIZE - sizeof(struct mmu_gather_batch)) / sizeof(void *))
  92
  93/*
  94 * Limit the maximum number of mmu_gather batches to reduce a risk of soft
  95 * lockups for non-preemptible kernels on huge machines when a lot of memory
  96 * is zapped during unmapping.
  97 * 10K pages freed at once should be safe even without a preemption point.
  98 */
  99#define MAX_GATHER_BATCH_COUNT  (10000UL/MAX_GATHER_BATCH)
 100
 101/* struct mmu_gather is an opaque type used by the mm code for passing around
 102 * any data needed by arch specific code for tlb_remove_page.
 103 */
 104struct mmu_gather {
 105        struct mm_struct        *mm;
 106#ifdef CONFIG_HAVE_RCU_TABLE_FREE
 107        struct mmu_table_batch  *batch;
 108#endif
 109        unsigned long           start;
 110        unsigned long           end;
 111        /* we are in the middle of an operation to clear
 112         * a full mm and can make some optimizations */
 113        unsigned int            fullmm : 1,
 114        /* we have performed an operation which
 115         * requires a complete flush of the tlb */
 116                                need_flush_all : 1;
 117
 118        struct mmu_gather_batch *active;
 119        struct mmu_gather_batch local;
 120        struct page             *__pages[MMU_GATHER_BUNDLE];
 121        unsigned int            batch_count;
 122        int page_size;
 123};
 124
 125#define HAVE_GENERIC_MMU_GATHER
 126
 127void arch_tlb_gather_mmu(struct mmu_gather *tlb,
 128        struct mm_struct *mm, unsigned long start, unsigned long end);
 129void tlb_flush_mmu(struct mmu_gather *tlb);
 130void arch_tlb_finish_mmu(struct mmu_gather *tlb,
 131                         unsigned long start, unsigned long end, bool force);
 132extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page,
 133                                   int page_size);
 134
 135static inline void __tlb_adjust_range(struct mmu_gather *tlb,
 136                                      unsigned long address,
 137                                      unsigned int range_size)
 138{
 139        tlb->start = min(tlb->start, address);
 140        tlb->end = max(tlb->end, address + range_size);
 141}
 142
 143static inline void __tlb_reset_range(struct mmu_gather *tlb)
 144{
 145        if (tlb->fullmm) {
 146                tlb->start = tlb->end = ~0;
 147        } else {
 148                tlb->start = TASK_SIZE;
 149                tlb->end = 0;
 150        }
 151}
 152
 153static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
 154{
 155        if (!tlb->end)
 156                return;
 157
 158        tlb_flush(tlb);
 159        mmu_notifier_invalidate_range(tlb->mm, tlb->start, tlb->end);
 160        __tlb_reset_range(tlb);
 161}
 162
 163static inline void tlb_remove_page_size(struct mmu_gather *tlb,
 164                                        struct page *page, int page_size)
 165{
 166        if (__tlb_remove_page_size(tlb, page, page_size))
 167                tlb_flush_mmu(tlb);
 168}
 169
 170static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 171{
 172        return __tlb_remove_page_size(tlb, page, PAGE_SIZE);
 173}
 174
 175/* tlb_remove_page
 176 *      Similar to __tlb_remove_page but will call tlb_flush_mmu() itself when
 177 *      required.
 178 */
 179static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 180{
 181        return tlb_remove_page_size(tlb, page, PAGE_SIZE);
 182}
 183
 184#ifndef tlb_remove_check_page_size_change
 185#define tlb_remove_check_page_size_change tlb_remove_check_page_size_change
 186static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
 187                                                     unsigned int page_size)
 188{
 189        /*
 190         * We don't care about page size change, just update
 191         * mmu_gather page size here so that debug checks
 192         * doesn't throw false warning.
 193         */
 194#ifdef CONFIG_DEBUG_VM
 195        tlb->page_size = page_size;
 196#endif
 197}
 198#endif
 199
 200/*
 201 * In the case of tlb vma handling, we can optimise these away in the
 202 * case where we're doing a full MM flush.  When we're doing a munmap,
 203 * the vmas are adjusted to only cover the region to be torn down.
 204 */
 205#ifndef tlb_start_vma
 206#define tlb_start_vma(tlb, vma) do { } while (0)
 207#endif
 208
 209#define __tlb_end_vma(tlb, vma)                                 \
 210        do {                                                    \
 211                if (!tlb->fullmm)                               \
 212                        tlb_flush_mmu_tlbonly(tlb);             \
 213        } while (0)
 214
 215#ifndef tlb_end_vma
 216#define tlb_end_vma     __tlb_end_vma
 217#endif
 218
 219#ifndef __tlb_remove_tlb_entry
 220#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
 221#endif
 222
 223/**
 224 * tlb_remove_tlb_entry - remember a pte unmapping for later tlb invalidation.
 225 *
 226 * Record the fact that pte's were really unmapped by updating the range,
 227 * so we can later optimise away the tlb invalidate.   This helps when
 228 * userspace is unmapping already-unmapped pages, which happens quite a lot.
 229 */
 230#define tlb_remove_tlb_entry(tlb, ptep, address)                \
 231        do {                                                    \
 232                __tlb_adjust_range(tlb, address, PAGE_SIZE);    \
 233                __tlb_remove_tlb_entry(tlb, ptep, address);     \
 234        } while (0)
 235
 236#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address)             \
 237        do {                                                         \
 238                __tlb_adjust_range(tlb, address, huge_page_size(h)); \
 239                __tlb_remove_tlb_entry(tlb, ptep, address);          \
 240        } while (0)
 241
 242/**
 243 * tlb_remove_pmd_tlb_entry - remember a pmd mapping for later tlb invalidation
 244 * This is a nop so far, because only x86 needs it.
 245 */
 246#ifndef __tlb_remove_pmd_tlb_entry
 247#define __tlb_remove_pmd_tlb_entry(tlb, pmdp, address) do {} while (0)
 248#endif
 249
 250#define tlb_remove_pmd_tlb_entry(tlb, pmdp, address)                    \
 251        do {                                                            \
 252                __tlb_adjust_range(tlb, address, HPAGE_PMD_SIZE);       \
 253                __tlb_remove_pmd_tlb_entry(tlb, pmdp, address);         \
 254        } while (0)
 255
 256/**
 257 * tlb_remove_pud_tlb_entry - remember a pud mapping for later tlb
 258 * invalidation. This is a nop so far, because only x86 needs it.
 259 */
 260#ifndef __tlb_remove_pud_tlb_entry
 261#define __tlb_remove_pud_tlb_entry(tlb, pudp, address) do {} while (0)
 262#endif
 263
 264#define tlb_remove_pud_tlb_entry(tlb, pudp, address)                    \
 265        do {                                                            \
 266                __tlb_adjust_range(tlb, address, HPAGE_PUD_SIZE);       \
 267                __tlb_remove_pud_tlb_entry(tlb, pudp, address);         \
 268        } while (0)
 269
 270/*
 271 * For things like page tables caches (ie caching addresses "inside" the
 272 * page tables, like x86 does), for legacy reasons, flushing an
 273 * individual page had better flush the page table caches behind it. This
 274 * is definitely how x86 works, for example. And if you have an
 275 * architected non-legacy page table cache (which I'm not aware of
 276 * anybody actually doing), you're going to have some architecturally
 277 * explicit flushing for that, likely *separate* from a regular TLB entry
 278 * flush, and thus you'd need more than just some range expansion..
 279 *
 280 * So if we ever find an architecture
 281 * that would want something that odd, I think it is up to that
 282 * architecture to do its own odd thing, not cause pain for others
 283 * http://lkml.kernel.org/r/CA+55aFzBggoXtNXQeng5d_mRoDnaMBE5Y+URs+PHR67nUpMtaw@mail.gmail.com
 284 *
 285 * For now w.r.t page table cache, mark the range_size as PAGE_SIZE
 286 */
 287
 288#ifndef pte_free_tlb
 289#define pte_free_tlb(tlb, ptep, address)                        \
 290        do {                                                    \
 291                __tlb_adjust_range(tlb, address, PAGE_SIZE);    \
 292                __pte_free_tlb(tlb, ptep, address);             \
 293        } while (0)
 294#endif
 295
 296#ifndef pmd_free_tlb
 297#define pmd_free_tlb(tlb, pmdp, address)                        \
 298        do {                                                    \
 299                __tlb_adjust_range(tlb, address, PAGE_SIZE);            \
 300                __pmd_free_tlb(tlb, pmdp, address);             \
 301        } while (0)
 302#endif
 303
 304#ifndef __ARCH_HAS_4LEVEL_HACK
 305#ifndef pud_free_tlb
 306#define pud_free_tlb(tlb, pudp, address)                        \
 307        do {                                                    \
 308                __tlb_adjust_range(tlb, address, PAGE_SIZE);    \
 309                __pud_free_tlb(tlb, pudp, address);             \
 310        } while (0)
 311#endif
 312#endif
 313
 314#ifndef __ARCH_HAS_5LEVEL_HACK
 315#ifndef p4d_free_tlb
 316#define p4d_free_tlb(tlb, pudp, address)                        \
 317        do {                                                    \
 318                __tlb_adjust_range(tlb, address, PAGE_SIZE);            \
 319                __p4d_free_tlb(tlb, pudp, address);             \
 320        } while (0)
 321#endif
 322#endif
 323
 324#endif /* CONFIG_MMU */
 325
 326#define tlb_migrate_finish(mm) do {} while (0)
 327
 328#endif /* _ASM_GENERIC__TLB_H */
 329