LXR linux/arch/powerpc/mm/book3s64/mmu

   1/*
   2 *  MMU context allocation for 64-bit kernels.
   3 *
   4 *  Copyright (C) 2004 Anton Blanchard, IBM Corp. <anton@samba.org>
   5 *
   6 *  This program is free software; you can redistribute it and/or
   7 *  modify it under the terms of the GNU General Public License
   8 *  as published by the Free Software Foundation; either version
   9 *  2 of the License, or (at your option) any later version.
  10 *
  11 */
  12
  13#include <linux/sched.h>
  14#include <linux/kernel.h>
  15#include <linux/errno.h>
  16#include <linux/string.h>
  17#include <linux/types.h>
  18#include <linux/mm.h>
  19#include <linux/pkeys.h>
  20#include <linux/spinlock.h>
  21#include <linux/idr.h>
  22#include <linux/export.h>
  23#include <linux/gfp.h>
  24#include <linux/slab.h>
  25#include <linux/cpu.h>
  26
  27#include <asm/mmu_context.h>
  28#include <asm/pgalloc.h>
  29
  30static DEFINE_IDA(mmu_context_ida);
  31
  32static int alloc_context_id(int min_id, int max_id)
  33{
  34        return ida_alloc_range(&mmu_context_ida, min_id, max_id, GFP_KERNEL);
  35}
  36
  37void hash__reserve_context_id(int id)
  38{
  39        int result = ida_alloc_range(&mmu_context_ida, id, id, GFP_KERNEL);
  40
  41        WARN(result != id, "mmu: Failed to reserve context id %d (rc %d)\n", id, result);
  42}
  43
  44int hash__alloc_context_id(void)
  45{
  46        unsigned long max;
  47
  48        if (mmu_has_feature(MMU_FTR_68_BIT_VA))
  49                max = MAX_USER_CONTEXT;
  50        else
  51                max = MAX_USER_CONTEXT_65BIT_VA;
  52
  53        return alloc_context_id(MIN_USER_CONTEXT, max);
  54}
  55EXPORT_SYMBOL_GPL(hash__alloc_context_id);
  56
  57static int realloc_context_ids(mm_context_t *ctx)
  58{
  59        int i, id;
  60
  61        /*
  62         * id 0 (aka. ctx->id) is special, we always allocate a new one, even if
  63         * there wasn't one allocated previously (which happens in the exec
  64         * case where ctx is newly allocated).
  65         *
  66         * We have to be a bit careful here. We must keep the existing ids in
  67         * the array, so that we can test if they're non-zero to decide if we
  68         * need to allocate a new one. However in case of error we must free the
  69         * ids we've allocated but *not* any of the existing ones (or risk a
  70         * UAF). That's why we decrement i at the start of the error handling
  71         * loop, to skip the id that we just tested but couldn't reallocate.
  72         */
  73        for (i = 0; i < ARRAY_SIZE(ctx->extended_id); i++) {
  74                if (i == 0 || ctx->extended_id[i]) {
  75                        id = hash__alloc_context_id();
  76                        if (id < 0)
  77                                goto error;
  78
  79                        ctx->extended_id[i] = id;
  80                }
  81        }
  82
  83        /* The caller expects us to return id */
  84        return ctx->id;
  85
  86error:
  87        for (i--; i >= 0; i--) {
  88                if (ctx->extended_id[i])
  89                        ida_free(&mmu_context_ida, ctx->extended_id[i]);
  90        }
  91
  92        return id;
  93}
  94
  95static int hash__init_new_context(struct mm_struct *mm)
  96{
  97        int index;
  98
  99        /*
 100         * The old code would re-promote on fork, we don't do that when using
 101         * slices as it could cause problem promoting slices that have been
 102         * forced down to 4K.
 103         *
 104         * For book3s we have MMU_NO_CONTEXT set to be ~0. Hence check
 105         * explicitly against context.id == 0. This ensures that we properly
 106         * initialize context slice details for newly allocated mm's (which will
 107         * have id == 0) and don't alter context slice inherited via fork (which
 108         * will have id != 0).
 109         *
 110         * We should not be calling init_new_context() on init_mm. Hence a
 111         * check against 0 is OK.
 112         */
 113        if (mm->context.id == 0)
 114                slice_init_new_context_exec(mm);
 115
 116        index = realloc_context_ids(&mm->context);
 117        if (index < 0)
 118                return index;
 119
 120        subpage_prot_init_new_context(mm);
 121
 122        pkey_mm_init(mm);
 123        return index;
 124}
 125
 126static int radix__init_new_context(struct mm_struct *mm)
 127{
 128        unsigned long rts_field;
 129        int index, max_id;
 130
 131        max_id = (1 << mmu_pid_bits) - 1;
 132        index = alloc_context_id(mmu_base_pid, max_id);
 133        if (index < 0)
 134                return index;
 135
 136        /*
 137         * set the process table entry,
 138         */
 139        rts_field = radix__get_tree_size();
 140        process_tb[index].prtb0 = cpu_to_be64(rts_field | __pa(mm->pgd) | RADIX_PGD_INDEX_SIZE);
 141
 142        /*
 143         * Order the above store with subsequent update of the PID
 144         * register (at which point HW can start loading/caching
 145         * the entry) and the corresponding load by the MMU from
 146         * the L2 cache.
 147         */
 148        asm volatile("ptesync;isync" : : : "memory");
 149
 150        mm->context.npu_context = NULL;
 151
 152        return index;
 153}
 154
 155int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 156{
 157        int index;
 158
 159        if (radix_enabled())
 160                index = radix__init_new_context(mm);
 161        else
 162                index = hash__init_new_context(mm);
 163
 164        if (index < 0)
 165                return index;
 166
 167        mm->context.id = index;
 168
 169        mm->context.pte_frag = NULL;
 170        mm->context.pmd_frag = NULL;
 171#ifdef CONFIG_SPAPR_TCE_IOMMU
 172        mm_iommu_init(mm);
 173#endif
 174        atomic_set(&mm->context.active_cpus, 0);
 175        atomic_set(&mm->context.copros, 0);
 176
 177        return 0;
 178}
 179
 180void __destroy_context(int context_id)
 181{
 182        ida_free(&mmu_context_ida, context_id);
 183}
 184EXPORT_SYMBOL_GPL(__destroy_context);
 185
 186static void destroy_contexts(mm_context_t *ctx)
 187{
 188        int index, context_id;
 189
 190        for (index = 0; index < ARRAY_SIZE(ctx->extended_id); index++) {
 191                context_id = ctx->extended_id[index];
 192                if (context_id)
 193                        ida_free(&mmu_context_ida, context_id);
 194        }
 195}
 196
 197static void pte_frag_destroy(void *pte_frag)
 198{
 199        int count;
 200        struct page *page;
 201
 202        page = virt_to_page(pte_frag);
 203        /* drop all the pending references */
 204        count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT;
 205        /* We allow PTE_FRAG_NR fragments from a PTE page */
 206        if (atomic_sub_and_test(PTE_FRAG_NR - count, &page->pt_frag_refcount)) {
 207                pgtable_page_dtor(page);
 208                __free_page(page);
 209        }
 210}
 211
 212static void pmd_frag_destroy(void *pmd_frag)
 213{
 214        int count;
 215        struct page *page;
 216
 217        page = virt_to_page(pmd_frag);
 218        /* drop all the pending references */
 219        count = ((unsigned long)pmd_frag & ~PAGE_MASK) >> PMD_FRAG_SIZE_SHIFT;
 220        /* We allow PTE_FRAG_NR fragments from a PTE page */
 221        if (atomic_sub_and_test(PMD_FRAG_NR - count, &page->pt_frag_refcount)) {
 222                pgtable_pmd_page_dtor(page);
 223                __free_page(page);
 224        }
 225}
 226
 227static void destroy_pagetable_page(struct mm_struct *mm)
 228{
 229        void *frag;
 230
 231        frag = mm->context.pte_frag;
 232        if (frag)
 233                pte_frag_destroy(frag);
 234
 235        frag = mm->context.pmd_frag;
 236        if (frag)
 237                pmd_frag_destroy(frag);
 238        return;
 239}
 240
 241void destroy_context(struct mm_struct *mm)
 242{
 243#ifdef CONFIG_SPAPR_TCE_IOMMU
 244        WARN_ON_ONCE(!list_empty(&mm->context.iommu_group_mem_list));
 245#endif
 246        /*
 247         * For tasks which were successfully initialized we end up calling
 248         * arch_exit_mmap() which clears the process table entry. And
 249         * arch_exit_mmap() is called before the required fullmm TLB flush
 250         * which does a RIC=2 flush. Hence for an initialized task, we do clear
 251         * any cached process table entries.
 252         *
 253         * The condition below handles the error case during task init. We have
 254         * set the process table entry early and if we fail a task
 255         * initialization, we need to ensure the process table entry is zeroed.
 256         * We need not worry about process table entry caches because the task
 257         * never ran with the PID value.
 258         */
 259        if (radix_enabled())
 260                process_tb[mm->context.id].prtb0 = 0;
 261        else
 262                subpage_prot_free(mm);
 263        destroy_pagetable_page(mm);
 264        destroy_contexts(&mm->context);
 265        mm->context.id = MMU_NO_CONTEXT;
 266}
 267
 268void arch_exit_mmap(struct mm_struct *mm)
 269{
 270        if (radix_enabled()) {
 271                /*
 272                 * Radix doesn't have a valid bit in the process table
 273                 * entries. However we know that at least P9 implementation
 274                 * will avoid caching an entry with an invalid RTS field,
 275                 * and 0 is invalid. So this will do.
 276                 *
 277                 * This runs before the "fullmm" tlb flush in exit_mmap,
 278                 * which does a RIC=2 tlbie to clear the process table
 279                 * entry. See the "fullmm" comments in tlb-radix.c.
 280                 *
 281                 * No barrier required here after the store because
 282                 * this process will do the invalidate, which starts with
 283                 * ptesync.
 284                 */
 285                process_tb[mm->context.id].prtb0 = 0;
 286        }
 287}
 288
 289#ifdef CONFIG_PPC_RADIX_MMU
 290void radix__switch_mmu_context(struct mm_struct *prev, struct mm_struct *next)
 291{
 292        mtspr(SPRN_PID, next->context.id);
 293        isync();
 294}
 295#endif
 296
 297/**
 298 * cleanup_cpu_mmu_context - Clean up MMU details for this CPU (newly offlined)
 299 *
 300 * This clears the CPU from mm_cpumask for all processes, and then flushes the
 301 * local TLB to ensure TLB coherency in case the CPU is onlined again.
 302 *
 303 * KVM guest translations are not necessarily flushed here. If KVM started
 304 * using mm_cpumask or the Linux APIs which do, this would have to be resolved.
 305 */
 306#ifdef CONFIG_HOTPLUG_CPU
 307void cleanup_cpu_mmu_context(void)
 308{
 309        int cpu = smp_processor_id();
 310
 311        clear_tasks_mm_cpumask(cpu);
 312        tlbiel_all();
 313}
 314#endif
 315