LXR linux/include/asm-generic/tlb.h

   1/* include/asm-generic/tlb.h
   2 *
   3 *      Generic TLB shootdown code
   4 *
   5 * Copyright 2001 Red Hat, Inc.
   6 * Based on code from mm/memory.c Copyright Linus Torvalds and others.
   7 *
   8 * Copyright 2011 Red Hat, Inc., Peter Zijlstra
   9 *
  10 * This program is free software; you can redistribute it and/or
  11 * modify it under the terms of the GNU General Public License
  12 * as published by the Free Software Foundation; either version
  13 * 2 of the License, or (at your option) any later version.
  14 */
  15#ifndef _ASM_GENERIC__TLB_H
  16#define _ASM_GENERIC__TLB_H
  17
  18#include <linux/mmu_notifier.h>
  19#include <linux/swap.h>
  20#include <asm/pgalloc.h>
  21#include <asm/tlbflush.h>
  22
  23#ifdef CONFIG_MMU
  24
  25#ifdef CONFIG_HAVE_RCU_TABLE_FREE
  26/*
  27 * Semi RCU freeing of the page directories.
  28 *
  29 * This is needed by some architectures to implement software pagetable walkers.
  30 *
  31 * gup_fast() and other software pagetable walkers do a lockless page-table
  32 * walk and therefore needs some synchronization with the freeing of the page
  33 * directories. The chosen means to accomplish that is by disabling IRQs over
  34 * the walk.
  35 *
  36 * Architectures that use IPIs to flush TLBs will then automagically DTRT,
  37 * since we unlink the page, flush TLBs, free the page. Since the disabling of
  38 * IRQs delays the completion of the TLB flush we can never observe an already
  39 * freed page.
  40 *
  41 * Architectures that do not have this (PPC) need to delay the freeing by some
  42 * other means, this is that means.
  43 *
  44 * What we do is batch the freed directory pages (tables) and RCU free them.
  45 * We use the sched RCU variant, as that guarantees that IRQ/preempt disabling
  46 * holds off grace periods.
  47 *
  48 * However, in order to batch these pages we need to allocate storage, this
  49 * allocation is deep inside the MM code and can thus easily fail on memory
  50 * pressure. To guarantee progress we fall back to single table freeing, see
  51 * the implementation of tlb_remove_table_one().
  52 *
  53 */
  54struct mmu_table_batch {
  55        struct rcu_head         rcu;
  56        unsigned int            nr;
  57        void                    *tables[0];
  58};
  59
  60#define MAX_TABLE_BATCH         \
  61        ((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *))
  62
  63extern void tlb_table_flush(struct mmu_gather *tlb);
  64extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
  65
  66#endif
  67
  68/*
  69 * If we can't allocate a page to make a big batch of page pointers
  70 * to work on, then just handle a few from the on-stack structure.
  71 */
  72#define MMU_GATHER_BUNDLE       8
  73
  74struct mmu_gather_batch {
  75        struct mmu_gather_batch *next;
  76        unsigned int            nr;
  77        unsigned int            max;
  78        struct page             *pages[0];
  79};
  80
  81#define MAX_GATHER_BATCH        \
  82        ((PAGE_SIZE - sizeof(struct mmu_gather_batch)) / sizeof(void *))
  83
  84/*
  85 * Limit the maximum number of mmu_gather batches to reduce a risk of soft
  86 * lockups for non-preemptible kernels on huge machines when a lot of memory
  87 * is zapped during unmapping.
  88 * 10K pages freed at once should be safe even without a preemption point.
  89 */
  90#define MAX_GATHER_BATCH_COUNT  (10000UL/MAX_GATHER_BATCH)
  91
  92/* struct mmu_gather is an opaque type used by the mm code for passing around
  93 * any data needed by arch specific code for tlb_remove_page.
  94 */
  95struct mmu_gather {
  96        struct mm_struct        *mm;
  97#ifdef CONFIG_HAVE_RCU_TABLE_FREE
  98        struct mmu_table_batch  *batch;
  99#endif
 100        unsigned long           start;
 101        unsigned long           end;
 102        /*
 103         * we are in the middle of an operation to clear
 104         * a full mm and can make some optimizations
 105         */
 106        unsigned int            fullmm : 1;
 107
 108        /*
 109         * we have performed an operation which
 110         * requires a complete flush of the tlb
 111         */
 112        unsigned int            need_flush_all : 1;
 113
 114        /*
 115         * we have removed page directories
 116         */
 117        unsigned int            freed_tables : 1;
 118
 119        /*
 120         * at which levels have we cleared entries?
 121         */
 122        unsigned int            cleared_ptes : 1;
 123        unsigned int            cleared_pmds : 1;
 124        unsigned int            cleared_puds : 1;
 125        unsigned int            cleared_p4ds : 1;
 126
 127        struct mmu_gather_batch *active;
 128        struct mmu_gather_batch local;
 129        struct page             *__pages[MMU_GATHER_BUNDLE];
 130        unsigned int            batch_count;
 131        int page_size;
 132};
 133
 134#define HAVE_GENERIC_MMU_GATHER
 135
 136void arch_tlb_gather_mmu(struct mmu_gather *tlb,
 137        struct mm_struct *mm, unsigned long start, unsigned long end);
 138void tlb_flush_mmu(struct mmu_gather *tlb);
 139void arch_tlb_finish_mmu(struct mmu_gather *tlb,
 140                         unsigned long start, unsigned long end, bool force);
 141void tlb_flush_mmu_free(struct mmu_gather *tlb);
 142extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page,
 143                                   int page_size);
 144
 145static inline void __tlb_adjust_range(struct mmu_gather *tlb,
 146                                      unsigned long address,
 147                                      unsigned int range_size)
 148{
 149        tlb->start = min(tlb->start, address);
 150        tlb->end = max(tlb->end, address + range_size);
 151}
 152
 153static inline void __tlb_reset_range(struct mmu_gather *tlb)
 154{
 155        if (tlb->fullmm) {
 156                tlb->start = tlb->end = ~0;
 157        } else {
 158                tlb->start = TASK_SIZE;
 159                tlb->end = 0;
 160        }
 161        tlb->freed_tables = 0;
 162        tlb->cleared_ptes = 0;
 163        tlb->cleared_pmds = 0;
 164        tlb->cleared_puds = 0;
 165        tlb->cleared_p4ds = 0;
 166}
 167
 168static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
 169{
 170        if (!tlb->end)
 171                return;
 172
 173        tlb_flush(tlb);
 174        mmu_notifier_invalidate_range(tlb->mm, tlb->start, tlb->end);
 175        __tlb_reset_range(tlb);
 176}
 177
 178static inline void tlb_remove_page_size(struct mmu_gather *tlb,
 179                                        struct page *page, int page_size)
 180{
 181        if (__tlb_remove_page_size(tlb, page, page_size))
 182                tlb_flush_mmu(tlb);
 183}
 184
 185static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 186{
 187        return __tlb_remove_page_size(tlb, page, PAGE_SIZE);
 188}
 189
 190/* tlb_remove_page
 191 *      Similar to __tlb_remove_page but will call tlb_flush_mmu() itself when
 192 *      required.
 193 */
 194static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 195{
 196        return tlb_remove_page_size(tlb, page, PAGE_SIZE);
 197}
 198
 199#ifndef tlb_remove_check_page_size_change
 200#define tlb_remove_check_page_size_change tlb_remove_check_page_size_change
 201static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
 202                                                     unsigned int page_size)
 203{
 204        /*
 205         * We don't care about page size change, just update
 206         * mmu_gather page size here so that debug checks
 207         * doesn't throw false warning.
 208         */
 209#ifdef CONFIG_DEBUG_VM
 210        tlb->page_size = page_size;
 211#endif
 212}
 213#endif
 214
 215static inline unsigned long tlb_get_unmap_shift(struct mmu_gather *tlb)
 216{
 217        if (tlb->cleared_ptes)
 218                return PAGE_SHIFT;
 219        if (tlb->cleared_pmds)
 220                return PMD_SHIFT;
 221        if (tlb->cleared_puds)
 222                return PUD_SHIFT;
 223        if (tlb->cleared_p4ds)
 224                return P4D_SHIFT;
 225
 226        return PAGE_SHIFT;
 227}
 228
 229static inline unsigned long tlb_get_unmap_size(struct mmu_gather *tlb)
 230{
 231        return 1UL << tlb_get_unmap_shift(tlb);
 232}
 233
 234/*
 235 * In the case of tlb vma handling, we can optimise these away in the
 236 * case where we're doing a full MM flush.  When we're doing a munmap,
 237 * the vmas are adjusted to only cover the region to be torn down.
 238 */
 239#ifndef tlb_start_vma
 240#define tlb_start_vma(tlb, vma) do { } while (0)
 241#endif
 242
 243#define __tlb_end_vma(tlb, vma)                                 \
 244        do {                                                    \
 245                if (!tlb->fullmm)                               \
 246                        tlb_flush_mmu_tlbonly(tlb);             \
 247        } while (0)
 248
 249#ifndef tlb_end_vma
 250#define tlb_end_vma     __tlb_end_vma
 251#endif
 252
 253#ifndef __tlb_remove_tlb_entry
 254#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
 255#endif
 256
 257/**
 258 * tlb_remove_tlb_entry - remember a pte unmapping for later tlb invalidation.
 259 *
 260 * Record the fact that pte's were really unmapped by updating the range,
 261 * so we can later optimise away the tlb invalidate.   This helps when
 262 * userspace is unmapping already-unmapped pages, which happens quite a lot.
 263 */
 264#define tlb_remove_tlb_entry(tlb, ptep, address)                \
 265        do {                                                    \
 266                __tlb_adjust_range(tlb, address, PAGE_SIZE);    \
 267                tlb->cleared_ptes = 1;                          \
 268                __tlb_remove_tlb_entry(tlb, ptep, address);     \
 269        } while (0)
 270
 271#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address)        \
 272        do {                                                    \
 273                unsigned long _sz = huge_page_size(h);          \
 274                __tlb_adjust_range(tlb, address, _sz);          \
 275                if (_sz == PMD_SIZE)                            \
 276                        tlb->cleared_pmds = 1;                  \
 277                else if (_sz == PUD_SIZE)                       \
 278                        tlb->cleared_puds = 1;                  \
 279                __tlb_remove_tlb_entry(tlb, ptep, address);     \
 280        } while (0)
 281
 282/**
 283 * tlb_remove_pmd_tlb_entry - remember a pmd mapping for later tlb invalidation
 284 * This is a nop so far, because only x86 needs it.
 285 */
 286#ifndef __tlb_remove_pmd_tlb_entry
 287#define __tlb_remove_pmd_tlb_entry(tlb, pmdp, address) do {} while (0)
 288#endif
 289
 290#define tlb_remove_pmd_tlb_entry(tlb, pmdp, address)                    \
 291        do {                                                            \
 292                __tlb_adjust_range(tlb, address, HPAGE_PMD_SIZE);       \
 293                tlb->cleared_pmds = 1;                                  \
 294                __tlb_remove_pmd_tlb_entry(tlb, pmdp, address);         \
 295        } while (0)
 296
 297/**
 298 * tlb_remove_pud_tlb_entry - remember a pud mapping for later tlb
 299 * invalidation. This is a nop so far, because only x86 needs it.
 300 */
 301#ifndef __tlb_remove_pud_tlb_entry
 302#define __tlb_remove_pud_tlb_entry(tlb, pudp, address) do {} while (0)
 303#endif
 304
 305#define tlb_remove_pud_tlb_entry(tlb, pudp, address)                    \
 306        do {                                                            \
 307                __tlb_adjust_range(tlb, address, HPAGE_PUD_SIZE);       \
 308                tlb->cleared_puds = 1;                                  \
 309                __tlb_remove_pud_tlb_entry(tlb, pudp, address);         \
 310        } while (0)
 311
 312/*
 313 * For things like page tables caches (ie caching addresses "inside" the
 314 * page tables, like x86 does), for legacy reasons, flushing an
 315 * individual page had better flush the page table caches behind it. This
 316 * is definitely how x86 works, for example. And if you have an
 317 * architected non-legacy page table cache (which I'm not aware of
 318 * anybody actually doing), you're going to have some architecturally
 319 * explicit flushing for that, likely *separate* from a regular TLB entry
 320 * flush, and thus you'd need more than just some range expansion..
 321 *
 322 * So if we ever find an architecture
 323 * that would want something that odd, I think it is up to that
 324 * architecture to do its own odd thing, not cause pain for others
 325 * http://lkml.kernel.org/r/CA+55aFzBggoXtNXQeng5d_mRoDnaMBE5Y+URs+PHR67nUpMtaw@mail.gmail.com
 326 *
 327 * For now w.r.t page table cache, mark the range_size as PAGE_SIZE
 328 */
 329
 330#ifndef pte_free_tlb
 331#define pte_free_tlb(tlb, ptep, address)                        \
 332        do {                                                    \
 333                __tlb_adjust_range(tlb, address, PAGE_SIZE);    \
 334                tlb->freed_tables = 1;                          \
 335                tlb->cleared_pmds = 1;                          \
 336                __pte_free_tlb(tlb, ptep, address);             \
 337        } while (0)
 338#endif
 339
 340#ifndef pmd_free_tlb
 341#define pmd_free_tlb(tlb, pmdp, address)                        \
 342        do {                                                    \
 343                __tlb_adjust_range(tlb, address, PAGE_SIZE);    \
 344                tlb->freed_tables = 1;                          \
 345                tlb->cleared_puds = 1;                          \
 346                __pmd_free_tlb(tlb, pmdp, address);             \
 347        } while (0)
 348#endif
 349
 350#ifndef __ARCH_HAS_4LEVEL_HACK
 351#ifndef pud_free_tlb
 352#define pud_free_tlb(tlb, pudp, address)                        \
 353        do {                                                    \
 354                __tlb_adjust_range(tlb, address, PAGE_SIZE);    \
 355                tlb->freed_tables = 1;                          \
 356                tlb->cleared_p4ds = 1;                          \
 357                __pud_free_tlb(tlb, pudp, address);             \
 358        } while (0)
 359#endif
 360#endif
 361
 362#ifndef __ARCH_HAS_5LEVEL_HACK
 363#ifndef p4d_free_tlb
 364#define p4d_free_tlb(tlb, pudp, address)                        \
 365        do {                                                    \
 366                __tlb_adjust_range(tlb, address, PAGE_SIZE);    \
 367                tlb->freed_tables = 1;                          \
 368                __p4d_free_tlb(tlb, pudp, address);             \
 369        } while (0)
 370#endif
 371#endif
 372
 373#endif /* CONFIG_MMU */
 374
 375#define tlb_migrate_finish(mm) do {} while (0)
 376
 377#endif /* _ASM_GENERIC__TLB_H */
 378