linux/drivers/misc/cxl/fault.c
<<
>>
Prefs
   1/*
   2 * Copyright 2014 IBM Corp.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of the GNU General Public License
   6 * as published by the Free Software Foundation; either version
   7 * 2 of the License, or (at your option) any later version.
   8 */
   9
  10#include <linux/workqueue.h>
  11#include <linux/sched/signal.h>
  12#include <linux/sched/mm.h>
  13#include <linux/pid.h>
  14#include <linux/mm.h>
  15#include <linux/moduleparam.h>
  16
  17#undef MODULE_PARAM_PREFIX
  18#define MODULE_PARAM_PREFIX "cxl" "."
  19#include <asm/current.h>
  20#include <asm/copro.h>
  21#include <asm/mmu.h>
  22
  23#include "cxl.h"
  24#include "trace.h"
  25
  26static bool sste_matches(struct cxl_sste *sste, struct copro_slb *slb)
  27{
  28        return ((sste->vsid_data == cpu_to_be64(slb->vsid)) &&
  29                (sste->esid_data == cpu_to_be64(slb->esid)));
  30}
  31
  32/*
  33 * This finds a free SSTE for the given SLB, or returns NULL if it's already in
  34 * the segment table.
  35 */
  36static struct cxl_sste* find_free_sste(struct cxl_context *ctx,
  37                                       struct copro_slb *slb)
  38{
  39        struct cxl_sste *primary, *sste, *ret = NULL;
  40        unsigned int mask = (ctx->sst_size >> 7) - 1; /* SSTP0[SegTableSize] */
  41        unsigned int entry;
  42        unsigned int hash;
  43
  44        if (slb->vsid & SLB_VSID_B_1T)
  45                hash = (slb->esid >> SID_SHIFT_1T) & mask;
  46        else /* 256M */
  47                hash = (slb->esid >> SID_SHIFT) & mask;
  48
  49        primary = ctx->sstp + (hash << 3);
  50
  51        for (entry = 0, sste = primary; entry < 8; entry++, sste++) {
  52                if (!ret && !(be64_to_cpu(sste->esid_data) & SLB_ESID_V))
  53                        ret = sste;
  54                if (sste_matches(sste, slb))
  55                        return NULL;
  56        }
  57        if (ret)
  58                return ret;
  59
  60        /* Nothing free, select an entry to cast out */
  61        ret = primary + ctx->sst_lru;
  62        ctx->sst_lru = (ctx->sst_lru + 1) & 0x7;
  63
  64        return ret;
  65}
  66
  67static void cxl_load_segment(struct cxl_context *ctx, struct copro_slb *slb)
  68{
  69        /* mask is the group index, we search primary and secondary here. */
  70        struct cxl_sste *sste;
  71        unsigned long flags;
  72
  73        spin_lock_irqsave(&ctx->sste_lock, flags);
  74        sste = find_free_sste(ctx, slb);
  75        if (!sste)
  76                goto out_unlock;
  77
  78        pr_devel("CXL Populating SST[%li]: %#llx %#llx\n",
  79                        sste - ctx->sstp, slb->vsid, slb->esid);
  80        trace_cxl_ste_write(ctx, sste - ctx->sstp, slb->esid, slb->vsid);
  81
  82        sste->vsid_data = cpu_to_be64(slb->vsid);
  83        sste->esid_data = cpu_to_be64(slb->esid);
  84out_unlock:
  85        spin_unlock_irqrestore(&ctx->sste_lock, flags);
  86}
  87
  88static int cxl_fault_segment(struct cxl_context *ctx, struct mm_struct *mm,
  89                             u64 ea)
  90{
  91        struct copro_slb slb = {0,0};
  92        int rc;
  93
  94        if (!(rc = copro_calculate_slb(mm, ea, &slb))) {
  95                cxl_load_segment(ctx, &slb);
  96        }
  97
  98        return rc;
  99}
 100
 101static void cxl_ack_ae(struct cxl_context *ctx)
 102{
 103        unsigned long flags;
 104
 105        cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_AE, 0);
 106
 107        spin_lock_irqsave(&ctx->lock, flags);
 108        ctx->pending_fault = true;
 109        ctx->fault_addr = ctx->dar;
 110        ctx->fault_dsisr = ctx->dsisr;
 111        spin_unlock_irqrestore(&ctx->lock, flags);
 112
 113        wake_up_all(&ctx->wq);
 114}
 115
 116static int cxl_handle_segment_miss(struct cxl_context *ctx,
 117                                   struct mm_struct *mm, u64 ea)
 118{
 119        int rc;
 120
 121        pr_devel("CXL interrupt: Segment fault pe: %i ea: %#llx\n", ctx->pe, ea);
 122        trace_cxl_ste_miss(ctx, ea);
 123
 124        if ((rc = cxl_fault_segment(ctx, mm, ea)))
 125                cxl_ack_ae(ctx);
 126        else {
 127
 128                mb(); /* Order seg table write to TFC MMIO write */
 129                cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_R, 0);
 130        }
 131
 132        return IRQ_HANDLED;
 133}
 134
 135int cxl_handle_mm_fault(struct mm_struct *mm, u64 dsisr, u64 dar)
 136{
 137        unsigned flt = 0;
 138        int result;
 139        unsigned long access, flags, inv_flags = 0;
 140
 141        /*
 142         * Add the fault handling cpu to task mm cpumask so that we
 143         * can do a safe lockless page table walk when inserting the
 144         * hash page table entry. This function get called with a
 145         * valid mm for user space addresses. Hence using the if (mm)
 146         * check is sufficient here.
 147         */
 148        if (mm && !cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm))) {
 149                cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
 150                /*
 151                 * We need to make sure we walk the table only after
 152                 * we update the cpumask. The other side of the barrier
 153                 * is explained in serialize_against_pte_lookup()
 154                 */
 155                smp_mb();
 156        }
 157        if ((result = copro_handle_mm_fault(mm, dar, dsisr, &flt))) {
 158                pr_devel("copro_handle_mm_fault failed: %#x\n", result);
 159                return result;
 160        }
 161
 162        if (!radix_enabled()) {
 163                /*
 164                 * update_mmu_cache() will not have loaded the hash since current->trap
 165                 * is not a 0x400 or 0x300, so just call hash_page_mm() here.
 166                 */
 167                access = _PAGE_PRESENT | _PAGE_READ;
 168                if (dsisr & CXL_PSL_DSISR_An_S)
 169                        access |= _PAGE_WRITE;
 170
 171                if (!mm && (REGION_ID(dar) != USER_REGION_ID))
 172                        access |= _PAGE_PRIVILEGED;
 173
 174                if (dsisr & DSISR_NOHPTE)
 175                        inv_flags |= HPTE_NOHPTE_UPDATE;
 176
 177                local_irq_save(flags);
 178                hash_page_mm(mm, dar, access, 0x300, inv_flags);
 179                local_irq_restore(flags);
 180        }
 181        return 0;
 182}
 183
 184static void cxl_handle_page_fault(struct cxl_context *ctx,
 185                                  struct mm_struct *mm,
 186                                  u64 dsisr, u64 dar)
 187{
 188        trace_cxl_pte_miss(ctx, dsisr, dar);
 189
 190        if (cxl_handle_mm_fault(mm, dsisr, dar)) {
 191                cxl_ack_ae(ctx);
 192        } else {
 193                pr_devel("Page fault successfully handled for pe: %i!\n", ctx->pe);
 194                cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_R, 0);
 195        }
 196}
 197
 198/*
 199 * Returns the mm_struct corresponding to the context ctx.
 200 * mm_users == 0, the context may be in the process of being closed.
 201 */
 202static struct mm_struct *get_mem_context(struct cxl_context *ctx)
 203{
 204        if (ctx->mm == NULL)
 205                return NULL;
 206
 207        if (!atomic_inc_not_zero(&ctx->mm->mm_users))
 208                return NULL;
 209
 210        return ctx->mm;
 211}
 212
 213static bool cxl_is_segment_miss(struct cxl_context *ctx, u64 dsisr)
 214{
 215        if ((cxl_is_power8() && (dsisr & CXL_PSL_DSISR_An_DS)))
 216                return true;
 217
 218        return false;
 219}
 220
 221static bool cxl_is_page_fault(struct cxl_context *ctx, u64 dsisr)
 222{
 223        if ((cxl_is_power8()) && (dsisr & CXL_PSL_DSISR_An_DM))
 224                return true;
 225
 226        if (cxl_is_power9())
 227                return true;
 228
 229        return false;
 230}
 231
 232void cxl_handle_fault(struct work_struct *fault_work)
 233{
 234        struct cxl_context *ctx =
 235                container_of(fault_work, struct cxl_context, fault_work);
 236        u64 dsisr = ctx->dsisr;
 237        u64 dar = ctx->dar;
 238        struct mm_struct *mm = NULL;
 239
 240        if (cpu_has_feature(CPU_FTR_HVMODE)) {
 241                if (cxl_p2n_read(ctx->afu, CXL_PSL_DSISR_An) != dsisr ||
 242                    cxl_p2n_read(ctx->afu, CXL_PSL_DAR_An) != dar ||
 243                    cxl_p2n_read(ctx->afu, CXL_PSL_PEHandle_An) != ctx->pe) {
 244                        /* Most likely explanation is harmless - a dedicated
 245                         * process has detached and these were cleared by the
 246                         * PSL purge, but warn about it just in case
 247                         */
 248                        dev_notice(&ctx->afu->dev, "cxl_handle_fault: Translation fault regs changed\n");
 249                        return;
 250                }
 251        }
 252
 253        /* Early return if the context is being / has been detached */
 254        if (ctx->status == CLOSED) {
 255                cxl_ack_ae(ctx);
 256                return;
 257        }
 258
 259        pr_devel("CXL BOTTOM HALF handling fault for afu pe: %i. "
 260                "DSISR: %#llx DAR: %#llx\n", ctx->pe, dsisr, dar);
 261
 262        if (!ctx->kernel) {
 263
 264                mm = get_mem_context(ctx);
 265                if (mm == NULL) {
 266                        pr_devel("%s: unable to get mm for pe=%d pid=%i\n",
 267                                 __func__, ctx->pe, pid_nr(ctx->pid));
 268                        cxl_ack_ae(ctx);
 269                        return;
 270                } else {
 271                        pr_devel("Handling page fault for pe=%d pid=%i\n",
 272                                 ctx->pe, pid_nr(ctx->pid));
 273                }
 274        }
 275
 276        if (cxl_is_segment_miss(ctx, dsisr))
 277                cxl_handle_segment_miss(ctx, mm, dar);
 278        else if (cxl_is_page_fault(ctx, dsisr))
 279                cxl_handle_page_fault(ctx, mm, dsisr, dar);
 280        else
 281                WARN(1, "cxl_handle_fault has nothing to handle\n");
 282
 283        if (mm)
 284                mmput(mm);
 285}
 286
 287static void cxl_prefault_one(struct cxl_context *ctx, u64 ea)
 288{
 289        struct mm_struct *mm;
 290
 291        mm = get_mem_context(ctx);
 292        if (mm == NULL) {
 293                pr_devel("cxl_prefault_one unable to get mm %i\n",
 294                         pid_nr(ctx->pid));
 295                return;
 296        }
 297
 298        cxl_fault_segment(ctx, mm, ea);
 299
 300        mmput(mm);
 301}
 302
 303static u64 next_segment(u64 ea, u64 vsid)
 304{
 305        if (vsid & SLB_VSID_B_1T)
 306                ea |= (1ULL << 40) - 1;
 307        else
 308                ea |= (1ULL << 28) - 1;
 309
 310        return ea + 1;
 311}
 312
 313static void cxl_prefault_vma(struct cxl_context *ctx)
 314{
 315        u64 ea, last_esid = 0;
 316        struct copro_slb slb;
 317        struct vm_area_struct *vma;
 318        int rc;
 319        struct mm_struct *mm;
 320
 321        mm = get_mem_context(ctx);
 322        if (mm == NULL) {
 323                pr_devel("cxl_prefault_vm unable to get mm %i\n",
 324                         pid_nr(ctx->pid));
 325                return;
 326        }
 327
 328        down_read(&mm->mmap_sem);
 329        for (vma = mm->mmap; vma; vma = vma->vm_next) {
 330                for (ea = vma->vm_start; ea < vma->vm_end;
 331                                ea = next_segment(ea, slb.vsid)) {
 332                        rc = copro_calculate_slb(mm, ea, &slb);
 333                        if (rc)
 334                                continue;
 335
 336                        if (last_esid == slb.esid)
 337                                continue;
 338
 339                        cxl_load_segment(ctx, &slb);
 340                        last_esid = slb.esid;
 341                }
 342        }
 343        up_read(&mm->mmap_sem);
 344
 345        mmput(mm);
 346}
 347
 348void cxl_prefault(struct cxl_context *ctx, u64 wed)
 349{
 350        switch (ctx->afu->prefault_mode) {
 351        case CXL_PREFAULT_WED:
 352                cxl_prefault_one(ctx, wed);
 353                break;
 354        case CXL_PREFAULT_ALL:
 355                cxl_prefault_vma(ctx);
 356                break;
 357        default:
 358                break;
 359        }
 360}
 361