linux/drivers/misc/cxl/fault.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 * Copyright 2014 IBM Corp.
   4 */
   5
   6#include <linux/workqueue.h>
   7#include <linux/sched/signal.h>
   8#include <linux/sched/mm.h>
   9#include <linux/pid.h>
  10#include <linux/mm.h>
  11#include <linux/moduleparam.h>
  12
  13#undef MODULE_PARAM_PREFIX
  14#define MODULE_PARAM_PREFIX "cxl" "."
  15#include <asm/current.h>
  16#include <asm/copro.h>
  17#include <asm/mmu.h>
  18
  19#include "cxl.h"
  20#include "trace.h"
  21
  22static bool sste_matches(struct cxl_sste *sste, struct copro_slb *slb)
  23{
  24        return ((sste->vsid_data == cpu_to_be64(slb->vsid)) &&
  25                (sste->esid_data == cpu_to_be64(slb->esid)));
  26}
  27
  28/*
  29 * This finds a free SSTE for the given SLB, or returns NULL if it's already in
  30 * the segment table.
  31 */
  32static struct cxl_sste *find_free_sste(struct cxl_context *ctx,
  33                                       struct copro_slb *slb)
  34{
  35        struct cxl_sste *primary, *sste, *ret = NULL;
  36        unsigned int mask = (ctx->sst_size >> 7) - 1; /* SSTP0[SegTableSize] */
  37        unsigned int entry;
  38        unsigned int hash;
  39
  40        if (slb->vsid & SLB_VSID_B_1T)
  41                hash = (slb->esid >> SID_SHIFT_1T) & mask;
  42        else /* 256M */
  43                hash = (slb->esid >> SID_SHIFT) & mask;
  44
  45        primary = ctx->sstp + (hash << 3);
  46
  47        for (entry = 0, sste = primary; entry < 8; entry++, sste++) {
  48                if (!ret && !(be64_to_cpu(sste->esid_data) & SLB_ESID_V))
  49                        ret = sste;
  50                if (sste_matches(sste, slb))
  51                        return NULL;
  52        }
  53        if (ret)
  54                return ret;
  55
  56        /* Nothing free, select an entry to cast out */
  57        ret = primary + ctx->sst_lru;
  58        ctx->sst_lru = (ctx->sst_lru + 1) & 0x7;
  59
  60        return ret;
  61}
  62
  63static void cxl_load_segment(struct cxl_context *ctx, struct copro_slb *slb)
  64{
  65        /* mask is the group index, we search primary and secondary here. */
  66        struct cxl_sste *sste;
  67        unsigned long flags;
  68
  69        spin_lock_irqsave(&ctx->sste_lock, flags);
  70        sste = find_free_sste(ctx, slb);
  71        if (!sste)
  72                goto out_unlock;
  73
  74        pr_devel("CXL Populating SST[%li]: %#llx %#llx\n",
  75                        sste - ctx->sstp, slb->vsid, slb->esid);
  76        trace_cxl_ste_write(ctx, sste - ctx->sstp, slb->esid, slb->vsid);
  77
  78        sste->vsid_data = cpu_to_be64(slb->vsid);
  79        sste->esid_data = cpu_to_be64(slb->esid);
  80out_unlock:
  81        spin_unlock_irqrestore(&ctx->sste_lock, flags);
  82}
  83
  84static int cxl_fault_segment(struct cxl_context *ctx, struct mm_struct *mm,
  85                             u64 ea)
  86{
  87        struct copro_slb slb = {0,0};
  88        int rc;
  89
  90        if (!(rc = copro_calculate_slb(mm, ea, &slb))) {
  91                cxl_load_segment(ctx, &slb);
  92        }
  93
  94        return rc;
  95}
  96
  97static void cxl_ack_ae(struct cxl_context *ctx)
  98{
  99        unsigned long flags;
 100
 101        cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_AE, 0);
 102
 103        spin_lock_irqsave(&ctx->lock, flags);
 104        ctx->pending_fault = true;
 105        ctx->fault_addr = ctx->dar;
 106        ctx->fault_dsisr = ctx->dsisr;
 107        spin_unlock_irqrestore(&ctx->lock, flags);
 108
 109        wake_up_all(&ctx->wq);
 110}
 111
 112static int cxl_handle_segment_miss(struct cxl_context *ctx,
 113                                   struct mm_struct *mm, u64 ea)
 114{
 115        int rc;
 116
 117        pr_devel("CXL interrupt: Segment fault pe: %i ea: %#llx\n", ctx->pe, ea);
 118        trace_cxl_ste_miss(ctx, ea);
 119
 120        if ((rc = cxl_fault_segment(ctx, mm, ea)))
 121                cxl_ack_ae(ctx);
 122        else {
 123
 124                mb(); /* Order seg table write to TFC MMIO write */
 125                cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_R, 0);
 126        }
 127
 128        return IRQ_HANDLED;
 129}
 130
 131int cxl_handle_mm_fault(struct mm_struct *mm, u64 dsisr, u64 dar)
 132{
 133        vm_fault_t flt = 0;
 134        int result;
 135        unsigned long access, flags, inv_flags = 0;
 136
 137        /*
 138         * Add the fault handling cpu to task mm cpumask so that we
 139         * can do a safe lockless page table walk when inserting the
 140         * hash page table entry. This function get called with a
 141         * valid mm for user space addresses. Hence using the if (mm)
 142         * check is sufficient here.
 143         */
 144        if (mm && !cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm))) {
 145                cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
 146                /*
 147                 * We need to make sure we walk the table only after
 148                 * we update the cpumask. The other side of the barrier
 149                 * is explained in serialize_against_pte_lookup()
 150                 */
 151                smp_mb();
 152        }
 153        if ((result = copro_handle_mm_fault(mm, dar, dsisr, &flt))) {
 154                pr_devel("copro_handle_mm_fault failed: %#x\n", result);
 155                return result;
 156        }
 157
 158        if (!radix_enabled()) {
 159                /*
 160                 * update_mmu_cache() will not have loaded the hash since current->trap
 161                 * is not a 0x400 or 0x300, so just call hash_page_mm() here.
 162                 */
 163                access = _PAGE_PRESENT | _PAGE_READ;
 164                if (dsisr & CXL_PSL_DSISR_An_S)
 165                        access |= _PAGE_WRITE;
 166
 167                if (!mm && (get_region_id(dar) != USER_REGION_ID))
 168                        access |= _PAGE_PRIVILEGED;
 169
 170                if (dsisr & DSISR_NOHPTE)
 171                        inv_flags |= HPTE_NOHPTE_UPDATE;
 172
 173                local_irq_save(flags);
 174                hash_page_mm(mm, dar, access, 0x300, inv_flags);
 175                local_irq_restore(flags);
 176        }
 177        return 0;
 178}
 179
 180static void cxl_handle_page_fault(struct cxl_context *ctx,
 181                                  struct mm_struct *mm,
 182                                  u64 dsisr, u64 dar)
 183{
 184        trace_cxl_pte_miss(ctx, dsisr, dar);
 185
 186        if (cxl_handle_mm_fault(mm, dsisr, dar)) {
 187                cxl_ack_ae(ctx);
 188        } else {
 189                pr_devel("Page fault successfully handled for pe: %i!\n", ctx->pe);
 190                cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_R, 0);
 191        }
 192}
 193
 194/*
 195 * Returns the mm_struct corresponding to the context ctx.
 196 * mm_users == 0, the context may be in the process of being closed.
 197 */
 198static struct mm_struct *get_mem_context(struct cxl_context *ctx)
 199{
 200        if (ctx->mm == NULL)
 201                return NULL;
 202
 203        if (!mmget_not_zero(ctx->mm))
 204                return NULL;
 205
 206        return ctx->mm;
 207}
 208
 209static bool cxl_is_segment_miss(struct cxl_context *ctx, u64 dsisr)
 210{
 211        if ((cxl_is_power8() && (dsisr & CXL_PSL_DSISR_An_DS)))
 212                return true;
 213
 214        return false;
 215}
 216
 217static bool cxl_is_page_fault(struct cxl_context *ctx, u64 dsisr)
 218{
 219        if ((cxl_is_power8()) && (dsisr & CXL_PSL_DSISR_An_DM))
 220                return true;
 221
 222        if (cxl_is_power9())
 223                return true;
 224
 225        return false;
 226}
 227
 228void cxl_handle_fault(struct work_struct *fault_work)
 229{
 230        struct cxl_context *ctx =
 231                container_of(fault_work, struct cxl_context, fault_work);
 232        u64 dsisr = ctx->dsisr;
 233        u64 dar = ctx->dar;
 234        struct mm_struct *mm = NULL;
 235
 236        if (cpu_has_feature(CPU_FTR_HVMODE)) {
 237                if (cxl_p2n_read(ctx->afu, CXL_PSL_DSISR_An) != dsisr ||
 238                    cxl_p2n_read(ctx->afu, CXL_PSL_DAR_An) != dar ||
 239                    cxl_p2n_read(ctx->afu, CXL_PSL_PEHandle_An) != ctx->pe) {
 240                        /* Most likely explanation is harmless - a dedicated
 241                         * process has detached and these were cleared by the
 242                         * PSL purge, but warn about it just in case
 243                         */
 244                        dev_notice(&ctx->afu->dev, "cxl_handle_fault: Translation fault regs changed\n");
 245                        return;
 246                }
 247        }
 248
 249        /* Early return if the context is being / has been detached */
 250        if (ctx->status == CLOSED) {
 251                cxl_ack_ae(ctx);
 252                return;
 253        }
 254
 255        pr_devel("CXL BOTTOM HALF handling fault for afu pe: %i. "
 256                "DSISR: %#llx DAR: %#llx\n", ctx->pe, dsisr, dar);
 257
 258        if (!ctx->kernel) {
 259
 260                mm = get_mem_context(ctx);
 261                if (mm == NULL) {
 262                        pr_devel("%s: unable to get mm for pe=%d pid=%i\n",
 263                                 __func__, ctx->pe, pid_nr(ctx->pid));
 264                        cxl_ack_ae(ctx);
 265                        return;
 266                } else {
 267                        pr_devel("Handling page fault for pe=%d pid=%i\n",
 268                                 ctx->pe, pid_nr(ctx->pid));
 269                }
 270        }
 271
 272        if (cxl_is_segment_miss(ctx, dsisr))
 273                cxl_handle_segment_miss(ctx, mm, dar);
 274        else if (cxl_is_page_fault(ctx, dsisr))
 275                cxl_handle_page_fault(ctx, mm, dsisr, dar);
 276        else
 277                WARN(1, "cxl_handle_fault has nothing to handle\n");
 278
 279        if (mm)
 280                mmput(mm);
 281}
 282
 283static void cxl_prefault_one(struct cxl_context *ctx, u64 ea)
 284{
 285        struct mm_struct *mm;
 286
 287        mm = get_mem_context(ctx);
 288        if (mm == NULL) {
 289                pr_devel("cxl_prefault_one unable to get mm %i\n",
 290                         pid_nr(ctx->pid));
 291                return;
 292        }
 293
 294        cxl_fault_segment(ctx, mm, ea);
 295
 296        mmput(mm);
 297}
 298
 299static u64 next_segment(u64 ea, u64 vsid)
 300{
 301        if (vsid & SLB_VSID_B_1T)
 302                ea |= (1ULL << 40) - 1;
 303        else
 304                ea |= (1ULL << 28) - 1;
 305
 306        return ea + 1;
 307}
 308
 309static void cxl_prefault_vma(struct cxl_context *ctx)
 310{
 311        u64 ea, last_esid = 0;
 312        struct copro_slb slb;
 313        struct vm_area_struct *vma;
 314        int rc;
 315        struct mm_struct *mm;
 316
 317        mm = get_mem_context(ctx);
 318        if (mm == NULL) {
 319                pr_devel("cxl_prefault_vm unable to get mm %i\n",
 320                         pid_nr(ctx->pid));
 321                return;
 322        }
 323
 324        mmap_read_lock(mm);
 325        for (vma = mm->mmap; vma; vma = vma->vm_next) {
 326                for (ea = vma->vm_start; ea < vma->vm_end;
 327                                ea = next_segment(ea, slb.vsid)) {
 328                        rc = copro_calculate_slb(mm, ea, &slb);
 329                        if (rc)
 330                                continue;
 331
 332                        if (last_esid == slb.esid)
 333                                continue;
 334
 335                        cxl_load_segment(ctx, &slb);
 336                        last_esid = slb.esid;
 337                }
 338        }
 339        mmap_read_unlock(mm);
 340
 341        mmput(mm);
 342}
 343
 344void cxl_prefault(struct cxl_context *ctx, u64 wed)
 345{
 346        switch (ctx->afu->prefault_mode) {
 347        case CXL_PREFAULT_WED:
 348                cxl_prefault_one(ctx, wed);
 349                break;
 350        case CXL_PREFAULT_ALL:
 351                cxl_prefault_vma(ctx);
 352                break;
 353        default:
 354                break;
 355        }
 356}
 357