linux/arch/powerpc/mm/book3s64/mmu_context.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *  MMU context allocation for 64-bit kernels.
   4 *
   5 *  Copyright (C) 2004 Anton Blanchard, IBM Corp. <anton@samba.org>
   6 */
   7
   8#include <linux/sched.h>
   9#include <linux/kernel.h>
  10#include <linux/errno.h>
  11#include <linux/string.h>
  12#include <linux/types.h>
  13#include <linux/mm.h>
  14#include <linux/pkeys.h>
  15#include <linux/spinlock.h>
  16#include <linux/idr.h>
  17#include <linux/export.h>
  18#include <linux/gfp.h>
  19#include <linux/slab.h>
  20#include <linux/cpu.h>
  21
  22#include <asm/mmu_context.h>
  23#include <asm/pgalloc.h>
  24
  25#include "internal.h"
  26
  27static DEFINE_IDA(mmu_context_ida);
  28
  29static int alloc_context_id(int min_id, int max_id)
  30{
  31        return ida_alloc_range(&mmu_context_ida, min_id, max_id, GFP_KERNEL);
  32}
  33
  34void hash__reserve_context_id(int id)
  35{
  36        int result = ida_alloc_range(&mmu_context_ida, id, id, GFP_KERNEL);
  37
  38        WARN(result != id, "mmu: Failed to reserve context id %d (rc %d)\n", id, result);
  39}
  40
  41int hash__alloc_context_id(void)
  42{
  43        unsigned long max;
  44
  45        if (mmu_has_feature(MMU_FTR_68_BIT_VA))
  46                max = MAX_USER_CONTEXT;
  47        else
  48                max = MAX_USER_CONTEXT_65BIT_VA;
  49
  50        return alloc_context_id(MIN_USER_CONTEXT, max);
  51}
  52EXPORT_SYMBOL_GPL(hash__alloc_context_id);
  53
  54static int realloc_context_ids(mm_context_t *ctx)
  55{
  56        int i, id;
  57
  58        /*
  59         * id 0 (aka. ctx->id) is special, we always allocate a new one, even if
  60         * there wasn't one allocated previously (which happens in the exec
  61         * case where ctx is newly allocated).
  62         *
  63         * We have to be a bit careful here. We must keep the existing ids in
  64         * the array, so that we can test if they're non-zero to decide if we
  65         * need to allocate a new one. However in case of error we must free the
  66         * ids we've allocated but *not* any of the existing ones (or risk a
  67         * UAF). That's why we decrement i at the start of the error handling
  68         * loop, to skip the id that we just tested but couldn't reallocate.
  69         */
  70        for (i = 0; i < ARRAY_SIZE(ctx->extended_id); i++) {
  71                if (i == 0 || ctx->extended_id[i]) {
  72                        id = hash__alloc_context_id();
  73                        if (id < 0)
  74                                goto error;
  75
  76                        ctx->extended_id[i] = id;
  77                }
  78        }
  79
  80        /* The caller expects us to return id */
  81        return ctx->id;
  82
  83error:
  84        for (i--; i >= 0; i--) {
  85                if (ctx->extended_id[i])
  86                        ida_free(&mmu_context_ida, ctx->extended_id[i]);
  87        }
  88
  89        return id;
  90}
  91
  92static int hash__init_new_context(struct mm_struct *mm)
  93{
  94        int index;
  95
  96        mm->context.hash_context = kmalloc(sizeof(struct hash_mm_context),
  97                                           GFP_KERNEL);
  98        if (!mm->context.hash_context)
  99                return -ENOMEM;
 100
 101        /*
 102         * The old code would re-promote on fork, we don't do that when using
 103         * slices as it could cause problem promoting slices that have been
 104         * forced down to 4K.
 105         *
 106         * For book3s we have MMU_NO_CONTEXT set to be ~0. Hence check
 107         * explicitly against context.id == 0. This ensures that we properly
 108         * initialize context slice details for newly allocated mm's (which will
 109         * have id == 0) and don't alter context slice inherited via fork (which
 110         * will have id != 0).
 111         *
 112         * We should not be calling init_new_context() on init_mm. Hence a
 113         * check against 0 is OK.
 114         */
 115        if (mm->context.id == 0) {
 116                memset(mm->context.hash_context, 0, sizeof(struct hash_mm_context));
 117                slice_init_new_context_exec(mm);
 118        } else {
 119                /* This is fork. Copy hash_context details from current->mm */
 120                memcpy(mm->context.hash_context, current->mm->context.hash_context, sizeof(struct hash_mm_context));
 121#ifdef CONFIG_PPC_SUBPAGE_PROT
 122                /* inherit subpage prot details if we have one. */
 123                if (current->mm->context.hash_context->spt) {
 124                        mm->context.hash_context->spt = kmalloc(sizeof(struct subpage_prot_table),
 125                                                                GFP_KERNEL);
 126                        if (!mm->context.hash_context->spt) {
 127                                kfree(mm->context.hash_context);
 128                                return -ENOMEM;
 129                        }
 130                }
 131#endif
 132        }
 133
 134        index = realloc_context_ids(&mm->context);
 135        if (index < 0) {
 136#ifdef CONFIG_PPC_SUBPAGE_PROT
 137                kfree(mm->context.hash_context->spt);
 138#endif
 139                kfree(mm->context.hash_context);
 140                return index;
 141        }
 142
 143        pkey_mm_init(mm);
 144        return index;
 145}
 146
 147void hash__setup_new_exec(void)
 148{
 149        slice_setup_new_exec();
 150
 151        slb_setup_new_exec();
 152}
 153
 154static int radix__init_new_context(struct mm_struct *mm)
 155{
 156        unsigned long rts_field;
 157        int index, max_id;
 158
 159        max_id = (1 << mmu_pid_bits) - 1;
 160        index = alloc_context_id(mmu_base_pid, max_id);
 161        if (index < 0)
 162                return index;
 163
 164        /*
 165         * set the process table entry,
 166         */
 167        rts_field = radix__get_tree_size();
 168        process_tb[index].prtb0 = cpu_to_be64(rts_field | __pa(mm->pgd) | RADIX_PGD_INDEX_SIZE);
 169
 170        /*
 171         * Order the above store with subsequent update of the PID
 172         * register (at which point HW can start loading/caching
 173         * the entry) and the corresponding load by the MMU from
 174         * the L2 cache.
 175         */
 176        asm volatile("ptesync;isync" : : : "memory");
 177
 178        mm->context.hash_context = NULL;
 179
 180        return index;
 181}
 182
 183int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 184{
 185        int index;
 186
 187        if (radix_enabled())
 188                index = radix__init_new_context(mm);
 189        else
 190                index = hash__init_new_context(mm);
 191
 192        if (index < 0)
 193                return index;
 194
 195        mm->context.id = index;
 196
 197        mm->context.pte_frag = NULL;
 198        mm->context.pmd_frag = NULL;
 199#ifdef CONFIG_SPAPR_TCE_IOMMU
 200        mm_iommu_init(mm);
 201#endif
 202        atomic_set(&mm->context.active_cpus, 0);
 203        atomic_set(&mm->context.copros, 0);
 204
 205        return 0;
 206}
 207
 208void __destroy_context(int context_id)
 209{
 210        ida_free(&mmu_context_ida, context_id);
 211}
 212EXPORT_SYMBOL_GPL(__destroy_context);
 213
 214static void destroy_contexts(mm_context_t *ctx)
 215{
 216        int index, context_id;
 217
 218        for (index = 0; index < ARRAY_SIZE(ctx->extended_id); index++) {
 219                context_id = ctx->extended_id[index];
 220                if (context_id)
 221                        ida_free(&mmu_context_ida, context_id);
 222        }
 223        kfree(ctx->hash_context);
 224}
 225
 226static void pmd_frag_destroy(void *pmd_frag)
 227{
 228        int count;
 229        struct page *page;
 230
 231        page = virt_to_page(pmd_frag);
 232        /* drop all the pending references */
 233        count = ((unsigned long)pmd_frag & ~PAGE_MASK) >> PMD_FRAG_SIZE_SHIFT;
 234        /* We allow PTE_FRAG_NR fragments from a PTE page */
 235        if (atomic_sub_and_test(PMD_FRAG_NR - count, &page->pt_frag_refcount)) {
 236                pgtable_pmd_page_dtor(page);
 237                __free_page(page);
 238        }
 239}
 240
 241static void destroy_pagetable_cache(struct mm_struct *mm)
 242{
 243        void *frag;
 244
 245        frag = mm->context.pte_frag;
 246        if (frag)
 247                pte_frag_destroy(frag);
 248
 249        frag = mm->context.pmd_frag;
 250        if (frag)
 251                pmd_frag_destroy(frag);
 252        return;
 253}
 254
 255void destroy_context(struct mm_struct *mm)
 256{
 257#ifdef CONFIG_SPAPR_TCE_IOMMU
 258        WARN_ON_ONCE(!list_empty(&mm->context.iommu_group_mem_list));
 259#endif
 260        /*
 261         * For tasks which were successfully initialized we end up calling
 262         * arch_exit_mmap() which clears the process table entry. And
 263         * arch_exit_mmap() is called before the required fullmm TLB flush
 264         * which does a RIC=2 flush. Hence for an initialized task, we do clear
 265         * any cached process table entries.
 266         *
 267         * The condition below handles the error case during task init. We have
 268         * set the process table entry early and if we fail a task
 269         * initialization, we need to ensure the process table entry is zeroed.
 270         * We need not worry about process table entry caches because the task
 271         * never ran with the PID value.
 272         */
 273        if (radix_enabled())
 274                process_tb[mm->context.id].prtb0 = 0;
 275        else
 276                subpage_prot_free(mm);
 277        destroy_contexts(&mm->context);
 278        mm->context.id = MMU_NO_CONTEXT;
 279}
 280
 281void arch_exit_mmap(struct mm_struct *mm)
 282{
 283        destroy_pagetable_cache(mm);
 284
 285        if (radix_enabled()) {
 286                /*
 287                 * Radix doesn't have a valid bit in the process table
 288                 * entries. However we know that at least P9 implementation
 289                 * will avoid caching an entry with an invalid RTS field,
 290                 * and 0 is invalid. So this will do.
 291                 *
 292                 * This runs before the "fullmm" tlb flush in exit_mmap,
 293                 * which does a RIC=2 tlbie to clear the process table
 294                 * entry. See the "fullmm" comments in tlb-radix.c.
 295                 *
 296                 * No barrier required here after the store because
 297                 * this process will do the invalidate, which starts with
 298                 * ptesync.
 299                 */
 300                process_tb[mm->context.id].prtb0 = 0;
 301        }
 302}
 303
 304#ifdef CONFIG_PPC_RADIX_MMU
 305void radix__switch_mmu_context(struct mm_struct *prev, struct mm_struct *next)
 306{
 307        mtspr(SPRN_PID, next->context.id);
 308        isync();
 309}
 310#endif
 311
 312/**
 313 * cleanup_cpu_mmu_context - Clean up MMU details for this CPU (newly offlined)
 314 *
 315 * This clears the CPU from mm_cpumask for all processes, and then flushes the
 316 * local TLB to ensure TLB coherency in case the CPU is onlined again.
 317 *
 318 * KVM guest translations are not necessarily flushed here. If KVM started
 319 * using mm_cpumask or the Linux APIs which do, this would have to be resolved.
 320 */
 321#ifdef CONFIG_HOTPLUG_CPU
 322void cleanup_cpu_mmu_context(void)
 323{
 324        int cpu = smp_processor_id();
 325
 326        clear_tasks_mm_cpumask(cpu);
 327        tlbiel_all();
 328}
 329#endif
 330