linux/mm/mmu_gather.c
<<
>>
Prefs
   1#include <linux/gfp.h>
   2#include <linux/highmem.h>
   3#include <linux/kernel.h>
   4#include <linux/mmdebug.h>
   5#include <linux/mm_types.h>
   6#include <linux/mm_inline.h>
   7#include <linux/pagemap.h>
   8#include <linux/rcupdate.h>
   9#include <linux/smp.h>
  10#include <linux/swap.h>
  11
  12#include <asm/pgalloc.h>
  13#include <asm/tlb.h>
  14
  15#ifndef CONFIG_MMU_GATHER_NO_GATHER
  16
  17static bool tlb_next_batch(struct mmu_gather *tlb)
  18{
  19        struct mmu_gather_batch *batch;
  20
  21        batch = tlb->active;
  22        if (batch->next) {
  23                tlb->active = batch->next;
  24                return true;
  25        }
  26
  27        if (tlb->batch_count == MAX_GATHER_BATCH_COUNT)
  28                return false;
  29
  30        batch = (void *)__get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0);
  31        if (!batch)
  32                return false;
  33
  34        tlb->batch_count++;
  35        batch->next = NULL;
  36        batch->nr   = 0;
  37        batch->max  = MAX_GATHER_BATCH;
  38
  39        tlb->active->next = batch;
  40        tlb->active = batch;
  41
  42        return true;
  43}
  44
  45static void tlb_batch_pages_flush(struct mmu_gather *tlb)
  46{
  47        struct mmu_gather_batch *batch;
  48
  49        for (batch = &tlb->local; batch && batch->nr; batch = batch->next) {
  50                struct page **pages = batch->pages;
  51
  52                do {
  53                        /*
  54                         * limit free batch count when PAGE_SIZE > 4K
  55                         */
  56                        unsigned int nr = min(512U, batch->nr);
  57
  58                        free_pages_and_swap_cache(pages, nr);
  59                        pages += nr;
  60                        batch->nr -= nr;
  61
  62                        cond_resched();
  63                } while (batch->nr);
  64        }
  65        tlb->active = &tlb->local;
  66}
  67
  68static void tlb_batch_list_free(struct mmu_gather *tlb)
  69{
  70        struct mmu_gather_batch *batch, *next;
  71
  72        for (batch = tlb->local.next; batch; batch = next) {
  73                next = batch->next;
  74                free_pages((unsigned long)batch, 0);
  75        }
  76        tlb->local.next = NULL;
  77}
  78
  79bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size)
  80{
  81        struct mmu_gather_batch *batch;
  82
  83        VM_BUG_ON(!tlb->end);
  84
  85#ifdef CONFIG_MMU_GATHER_PAGE_SIZE
  86        VM_WARN_ON(tlb->page_size != page_size);
  87#endif
  88
  89        batch = tlb->active;
  90        /*
  91         * Add the page and check if we are full. If so
  92         * force a flush.
  93         */
  94        batch->pages[batch->nr++] = page;
  95        if (batch->nr == batch->max) {
  96                if (!tlb_next_batch(tlb))
  97                        return true;
  98                batch = tlb->active;
  99        }
 100        VM_BUG_ON_PAGE(batch->nr > batch->max, page);
 101
 102        return false;
 103}
 104
 105#endif /* MMU_GATHER_NO_GATHER */
 106
 107#ifdef CONFIG_MMU_GATHER_TABLE_FREE
 108
 109static void __tlb_remove_table_free(struct mmu_table_batch *batch)
 110{
 111        int i;
 112
 113        for (i = 0; i < batch->nr; i++)
 114                __tlb_remove_table(batch->tables[i]);
 115
 116        free_page((unsigned long)batch);
 117}
 118
 119#ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE
 120
 121/*
 122 * Semi RCU freeing of the page directories.
 123 *
 124 * This is needed by some architectures to implement software pagetable walkers.
 125 *
 126 * gup_fast() and other software pagetable walkers do a lockless page-table
 127 * walk and therefore needs some synchronization with the freeing of the page
 128 * directories. The chosen means to accomplish that is by disabling IRQs over
 129 * the walk.
 130 *
 131 * Architectures that use IPIs to flush TLBs will then automagically DTRT,
 132 * since we unlink the page, flush TLBs, free the page. Since the disabling of
 133 * IRQs delays the completion of the TLB flush we can never observe an already
 134 * freed page.
 135 *
 136 * Architectures that do not have this (PPC) need to delay the freeing by some
 137 * other means, this is that means.
 138 *
 139 * What we do is batch the freed directory pages (tables) and RCU free them.
 140 * We use the sched RCU variant, as that guarantees that IRQ/preempt disabling
 141 * holds off grace periods.
 142 *
 143 * However, in order to batch these pages we need to allocate storage, this
 144 * allocation is deep inside the MM code and can thus easily fail on memory
 145 * pressure. To guarantee progress we fall back to single table freeing, see
 146 * the implementation of tlb_remove_table_one().
 147 *
 148 */
 149
 150static void tlb_remove_table_smp_sync(void *arg)
 151{
 152        /* Simply deliver the interrupt */
 153}
 154
 155static void tlb_remove_table_sync_one(void)
 156{
 157        /*
 158         * This isn't an RCU grace period and hence the page-tables cannot be
 159         * assumed to be actually RCU-freed.
 160         *
 161         * It is however sufficient for software page-table walkers that rely on
 162         * IRQ disabling.
 163         */
 164        smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
 165}
 166
 167static void tlb_remove_table_rcu(struct rcu_head *head)
 168{
 169        __tlb_remove_table_free(container_of(head, struct mmu_table_batch, rcu));
 170}
 171
 172static void tlb_remove_table_free(struct mmu_table_batch *batch)
 173{
 174        call_rcu(&batch->rcu, tlb_remove_table_rcu);
 175}
 176
 177#else /* !CONFIG_MMU_GATHER_RCU_TABLE_FREE */
 178
 179static void tlb_remove_table_sync_one(void) { }
 180
 181static void tlb_remove_table_free(struct mmu_table_batch *batch)
 182{
 183        __tlb_remove_table_free(batch);
 184}
 185
 186#endif /* CONFIG_MMU_GATHER_RCU_TABLE_FREE */
 187
 188/*
 189 * If we want tlb_remove_table() to imply TLB invalidates.
 190 */
 191static inline void tlb_table_invalidate(struct mmu_gather *tlb)
 192{
 193        if (tlb_needs_table_invalidate()) {
 194                /*
 195                 * Invalidate page-table caches used by hardware walkers. Then
 196                 * we still need to RCU-sched wait while freeing the pages
 197                 * because software walkers can still be in-flight.
 198                 */
 199                tlb_flush_mmu_tlbonly(tlb);
 200        }
 201}
 202
 203static void tlb_remove_table_one(void *table)
 204{
 205        tlb_remove_table_sync_one();
 206        __tlb_remove_table(table);
 207}
 208
 209static void tlb_table_flush(struct mmu_gather *tlb)
 210{
 211        struct mmu_table_batch **batch = &tlb->batch;
 212
 213        if (*batch) {
 214                tlb_table_invalidate(tlb);
 215                tlb_remove_table_free(*batch);
 216                *batch = NULL;
 217        }
 218}
 219
 220void tlb_remove_table(struct mmu_gather *tlb, void *table)
 221{
 222        struct mmu_table_batch **batch = &tlb->batch;
 223
 224        if (*batch == NULL) {
 225                *batch = (struct mmu_table_batch *)__get_free_page(GFP_NOWAIT | __GFP_NOWARN);
 226                if (*batch == NULL) {
 227                        tlb_table_invalidate(tlb);
 228                        tlb_remove_table_one(table);
 229                        return;
 230                }
 231                (*batch)->nr = 0;
 232        }
 233
 234        (*batch)->tables[(*batch)->nr++] = table;
 235        if ((*batch)->nr == MAX_TABLE_BATCH)
 236                tlb_table_flush(tlb);
 237}
 238
 239static inline void tlb_table_init(struct mmu_gather *tlb)
 240{
 241        tlb->batch = NULL;
 242}
 243
 244#else /* !CONFIG_MMU_GATHER_TABLE_FREE */
 245
 246static inline void tlb_table_flush(struct mmu_gather *tlb) { }
 247static inline void tlb_table_init(struct mmu_gather *tlb) { }
 248
 249#endif /* CONFIG_MMU_GATHER_TABLE_FREE */
 250
 251static void tlb_flush_mmu_free(struct mmu_gather *tlb)
 252{
 253        tlb_table_flush(tlb);
 254#ifndef CONFIG_MMU_GATHER_NO_GATHER
 255        tlb_batch_pages_flush(tlb);
 256#endif
 257}
 258
 259void tlb_flush_mmu(struct mmu_gather *tlb)
 260{
 261        tlb_flush_mmu_tlbonly(tlb);
 262        tlb_flush_mmu_free(tlb);
 263}
 264
 265static void __tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
 266                             bool fullmm)
 267{
 268        tlb->mm = mm;
 269        tlb->fullmm = fullmm;
 270
 271#ifndef CONFIG_MMU_GATHER_NO_GATHER
 272        tlb->need_flush_all = 0;
 273        tlb->local.next = NULL;
 274        tlb->local.nr   = 0;
 275        tlb->local.max  = ARRAY_SIZE(tlb->__pages);
 276        tlb->active     = &tlb->local;
 277        tlb->batch_count = 0;
 278#endif
 279
 280        tlb_table_init(tlb);
 281#ifdef CONFIG_MMU_GATHER_PAGE_SIZE
 282        tlb->page_size = 0;
 283#endif
 284
 285        __tlb_reset_range(tlb);
 286        inc_tlb_flush_pending(tlb->mm);
 287}
 288
 289/**
 290 * tlb_gather_mmu - initialize an mmu_gather structure for page-table tear-down
 291 * @tlb: the mmu_gather structure to initialize
 292 * @mm: the mm_struct of the target address space
 293 *
 294 * Called to initialize an (on-stack) mmu_gather structure for page-table
 295 * tear-down from @mm.
 296 */
 297void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm)
 298{
 299        __tlb_gather_mmu(tlb, mm, false);
 300}
 301
 302/**
 303 * tlb_gather_mmu_fullmm - initialize an mmu_gather structure for page-table tear-down
 304 * @tlb: the mmu_gather structure to initialize
 305 * @mm: the mm_struct of the target address space
 306 *
 307 * In this case, @mm is without users and we're going to destroy the
 308 * full address space (exit/execve).
 309 *
 310 * Called to initialize an (on-stack) mmu_gather structure for page-table
 311 * tear-down from @mm.
 312 */
 313void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm)
 314{
 315        __tlb_gather_mmu(tlb, mm, true);
 316}
 317
 318/**
 319 * tlb_finish_mmu - finish an mmu_gather structure
 320 * @tlb: the mmu_gather structure to finish
 321 *
 322 * Called at the end of the shootdown operation to free up any resources that
 323 * were required.
 324 */
 325void tlb_finish_mmu(struct mmu_gather *tlb)
 326{
 327        /*
 328         * If there are parallel threads are doing PTE changes on same range
 329         * under non-exclusive lock (e.g., mmap_lock read-side) but defer TLB
 330         * flush by batching, one thread may end up seeing inconsistent PTEs
 331         * and result in having stale TLB entries.  So flush TLB forcefully
 332         * if we detect parallel PTE batching threads.
 333         *
 334         * However, some syscalls, e.g. munmap(), may free page tables, this
 335         * needs force flush everything in the given range. Otherwise this
 336         * may result in having stale TLB entries for some architectures,
 337         * e.g. aarch64, that could specify flush what level TLB.
 338         */
 339        if (mm_tlb_flush_nested(tlb->mm)) {
 340                /*
 341                 * The aarch64 yields better performance with fullmm by
 342                 * avoiding multiple CPUs spamming TLBI messages at the
 343                 * same time.
 344                 *
 345                 * On x86 non-fullmm doesn't yield significant difference
 346                 * against fullmm.
 347                 */
 348                tlb->fullmm = 1;
 349                __tlb_reset_range(tlb);
 350                tlb->freed_tables = 1;
 351        }
 352
 353        tlb_flush_mmu(tlb);
 354
 355#ifndef CONFIG_MMU_GATHER_NO_GATHER
 356        tlb_batch_list_free(tlb);
 357#endif
 358        dec_tlb_flush_pending(tlb->mm);
 359}
 360