linux/drivers/misc/cxl/fault.c
<<
>>
Prefs
   1/*
   2 * Copyright 2014 IBM Corp.
   3 *
   4 * This program is free software; you can redistribute it and/or
   5 * modify it under the terms of the GNU General Public License
   6 * as published by the Free Software Foundation; either version
   7 * 2 of the License, or (at your option) any later version.
   8 */
   9
  10#include <linux/workqueue.h>
  11#include <linux/sched.h>
  12#include <linux/pid.h>
  13#include <linux/mm.h>
  14#include <linux/moduleparam.h>
  15
  16#undef MODULE_PARAM_PREFIX
  17#define MODULE_PARAM_PREFIX "cxl" "."
  18#include <asm/current.h>
  19#include <asm/copro.h>
  20#include <asm/mmu.h>
  21
  22#include "cxl.h"
  23#include "trace.h"
  24
  25static bool sste_matches(struct cxl_sste *sste, struct copro_slb *slb)
  26{
  27        return ((sste->vsid_data == cpu_to_be64(slb->vsid)) &&
  28                (sste->esid_data == cpu_to_be64(slb->esid)));
  29}
  30
  31/*
  32 * This finds a free SSTE for the given SLB, or returns NULL if it's already in
  33 * the segment table.
  34 */
  35static struct cxl_sste* find_free_sste(struct cxl_context *ctx,
  36                                       struct copro_slb *slb)
  37{
  38        struct cxl_sste *primary, *sste, *ret = NULL;
  39        unsigned int mask = (ctx->sst_size >> 7) - 1; /* SSTP0[SegTableSize] */
  40        unsigned int entry;
  41        unsigned int hash;
  42
  43        if (slb->vsid & SLB_VSID_B_1T)
  44                hash = (slb->esid >> SID_SHIFT_1T) & mask;
  45        else /* 256M */
  46                hash = (slb->esid >> SID_SHIFT) & mask;
  47
  48        primary = ctx->sstp + (hash << 3);
  49
  50        for (entry = 0, sste = primary; entry < 8; entry++, sste++) {
  51                if (!ret && !(be64_to_cpu(sste->esid_data) & SLB_ESID_V))
  52                        ret = sste;
  53                if (sste_matches(sste, slb))
  54                        return NULL;
  55        }
  56        if (ret)
  57                return ret;
  58
  59        /* Nothing free, select an entry to cast out */
  60        ret = primary + ctx->sst_lru;
  61        ctx->sst_lru = (ctx->sst_lru + 1) & 0x7;
  62
  63        return ret;
  64}
  65
  66static void cxl_load_segment(struct cxl_context *ctx, struct copro_slb *slb)
  67{
  68        /* mask is the group index, we search primary and secondary here. */
  69        struct cxl_sste *sste;
  70        unsigned long flags;
  71
  72        spin_lock_irqsave(&ctx->sste_lock, flags);
  73        sste = find_free_sste(ctx, slb);
  74        if (!sste)
  75                goto out_unlock;
  76
  77        pr_devel("CXL Populating SST[%li]: %#llx %#llx\n",
  78                        sste - ctx->sstp, slb->vsid, slb->esid);
  79        trace_cxl_ste_write(ctx, sste - ctx->sstp, slb->esid, slb->vsid);
  80
  81        sste->vsid_data = cpu_to_be64(slb->vsid);
  82        sste->esid_data = cpu_to_be64(slb->esid);
  83out_unlock:
  84        spin_unlock_irqrestore(&ctx->sste_lock, flags);
  85}
  86
  87static int cxl_fault_segment(struct cxl_context *ctx, struct mm_struct *mm,
  88                             u64 ea)
  89{
  90        struct copro_slb slb = {0,0};
  91        int rc;
  92
  93        if (!(rc = copro_calculate_slb(mm, ea, &slb))) {
  94                cxl_load_segment(ctx, &slb);
  95        }
  96
  97        return rc;
  98}
  99
 100static void cxl_ack_ae(struct cxl_context *ctx)
 101{
 102        unsigned long flags;
 103
 104        cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_AE, 0);
 105
 106        spin_lock_irqsave(&ctx->lock, flags);
 107        ctx->pending_fault = true;
 108        ctx->fault_addr = ctx->dar;
 109        ctx->fault_dsisr = ctx->dsisr;
 110        spin_unlock_irqrestore(&ctx->lock, flags);
 111
 112        wake_up_all(&ctx->wq);
 113}
 114
 115static int cxl_handle_segment_miss(struct cxl_context *ctx,
 116                                   struct mm_struct *mm, u64 ea)
 117{
 118        int rc;
 119
 120        pr_devel("CXL interrupt: Segment fault pe: %i ea: %#llx\n", ctx->pe, ea);
 121        trace_cxl_ste_miss(ctx, ea);
 122
 123        if ((rc = cxl_fault_segment(ctx, mm, ea)))
 124                cxl_ack_ae(ctx);
 125        else {
 126
 127                mb(); /* Order seg table write to TFC MMIO write */
 128                cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_R, 0);
 129        }
 130
 131        return IRQ_HANDLED;
 132}
 133
 134static void cxl_handle_page_fault(struct cxl_context *ctx,
 135                                  struct mm_struct *mm, u64 dsisr, u64 dar)
 136{
 137        unsigned flt = 0;
 138        int result;
 139        unsigned long access, flags, inv_flags = 0;
 140
 141        trace_cxl_pte_miss(ctx, dsisr, dar);
 142
 143        if ((result = copro_handle_mm_fault(mm, dar, dsisr, &flt))) {
 144                pr_devel("copro_handle_mm_fault failed: %#x\n", result);
 145                return cxl_ack_ae(ctx);
 146        }
 147
 148        /*
 149         * update_mmu_cache() will not have loaded the hash since current->trap
 150         * is not a 0x400 or 0x300, so just call hash_page_mm() here.
 151         */
 152        access = _PAGE_PRESENT;
 153        if (dsisr & CXL_PSL_DSISR_An_S)
 154                access |= _PAGE_RW;
 155        if ((!ctx->kernel) || ~(dar & (1ULL << 63)))
 156                access |= _PAGE_USER;
 157
 158        if (dsisr & DSISR_NOHPTE)
 159                inv_flags |= HPTE_NOHPTE_UPDATE;
 160
 161        local_irq_save(flags);
 162        hash_page_mm(mm, dar, access, 0x300, inv_flags);
 163        local_irq_restore(flags);
 164
 165        pr_devel("Page fault successfully handled for pe: %i!\n", ctx->pe);
 166        cxl_ops->ack_irq(ctx, CXL_PSL_TFC_An_R, 0);
 167}
 168
 169/*
 170 * Returns the mm_struct corresponding to the context ctx via ctx->pid
 171 * In case the task has exited we use the task group leader accessible
 172 * via ctx->glpid to find the next task in the thread group that has a
 173 * valid  mm_struct associated with it. If a task with valid mm_struct
 174 * is found the ctx->pid is updated to use the task struct for subsequent
 175 * translations. In case no valid mm_struct is found in the task group to
 176 * service the fault a NULL is returned.
 177 */
 178static struct mm_struct *get_mem_context(struct cxl_context *ctx)
 179{
 180        struct task_struct *task = NULL;
 181        struct mm_struct *mm = NULL;
 182        struct pid *old_pid = ctx->pid;
 183
 184        if (old_pid == NULL) {
 185                pr_warn("%s: Invalid context for pe=%d\n",
 186                         __func__, ctx->pe);
 187                return NULL;
 188        }
 189
 190        task = get_pid_task(old_pid, PIDTYPE_PID);
 191
 192        /*
 193         * pid_alive may look racy but this saves us from costly
 194         * get_task_mm when the task is a zombie. In worst case
 195         * we may think a task is alive, which is about to die
 196         * but get_task_mm will return NULL.
 197         */
 198        if (task != NULL && pid_alive(task))
 199                mm = get_task_mm(task);
 200
 201        /* release the task struct that was taken earlier */
 202        if (task)
 203                put_task_struct(task);
 204        else
 205                pr_devel("%s: Context owning pid=%i for pe=%i dead\n",
 206                        __func__, pid_nr(old_pid), ctx->pe);
 207
 208        /*
 209         * If we couldn't find the mm context then use the group
 210         * leader to iterate over the task group and find a task
 211         * that gives us mm_struct.
 212         */
 213        if (unlikely(mm == NULL && ctx->glpid != NULL)) {
 214
 215                rcu_read_lock();
 216                task = pid_task(ctx->glpid, PIDTYPE_PID);
 217                if (task)
 218                        do {
 219                                mm = get_task_mm(task);
 220                                if (mm) {
 221                                        ctx->pid = get_task_pid(task,
 222                                                                PIDTYPE_PID);
 223                                        break;
 224                                }
 225                                task = next_thread(task);
 226                        } while (task && !thread_group_leader(task));
 227                rcu_read_unlock();
 228
 229                /* check if we switched pid */
 230                if (ctx->pid != old_pid) {
 231                        if (mm)
 232                                pr_devel("%s:pe=%i switch pid %i->%i\n",
 233                                         __func__, ctx->pe, pid_nr(old_pid),
 234                                         pid_nr(ctx->pid));
 235                        else
 236                                pr_devel("%s:Cannot find mm for pid=%i\n",
 237                                         __func__, pid_nr(old_pid));
 238
 239                        /* drop the reference to older pid */
 240                        put_pid(old_pid);
 241                }
 242        }
 243
 244        return mm;
 245}
 246
 247
 248
 249void cxl_handle_fault(struct work_struct *fault_work)
 250{
 251        struct cxl_context *ctx =
 252                container_of(fault_work, struct cxl_context, fault_work);
 253        u64 dsisr = ctx->dsisr;
 254        u64 dar = ctx->dar;
 255        struct mm_struct *mm = NULL;
 256
 257        if (cpu_has_feature(CPU_FTR_HVMODE)) {
 258                if (cxl_p2n_read(ctx->afu, CXL_PSL_DSISR_An) != dsisr ||
 259                    cxl_p2n_read(ctx->afu, CXL_PSL_DAR_An) != dar ||
 260                    cxl_p2n_read(ctx->afu, CXL_PSL_PEHandle_An) != ctx->pe) {
 261                        /* Most likely explanation is harmless - a dedicated
 262                         * process has detached and these were cleared by the
 263                         * PSL purge, but warn about it just in case
 264                         */
 265                        dev_notice(&ctx->afu->dev, "cxl_handle_fault: Translation fault regs changed\n");
 266                        return;
 267                }
 268        }
 269
 270        /* Early return if the context is being / has been detached */
 271        if (ctx->status == CLOSED) {
 272                cxl_ack_ae(ctx);
 273                return;
 274        }
 275
 276        pr_devel("CXL BOTTOM HALF handling fault for afu pe: %i. "
 277                "DSISR: %#llx DAR: %#llx\n", ctx->pe, dsisr, dar);
 278
 279        if (!ctx->kernel) {
 280
 281                mm = get_mem_context(ctx);
 282                /* indicates all the thread in task group have exited */
 283                if (mm == NULL) {
 284                        pr_devel("%s: unable to get mm for pe=%d pid=%i\n",
 285                                 __func__, ctx->pe, pid_nr(ctx->pid));
 286                        cxl_ack_ae(ctx);
 287                        return;
 288                } else {
 289                        pr_devel("Handling page fault for pe=%d pid=%i\n",
 290                                 ctx->pe, pid_nr(ctx->pid));
 291                }
 292        }
 293
 294        if (dsisr & CXL_PSL_DSISR_An_DS)
 295                cxl_handle_segment_miss(ctx, mm, dar);
 296        else if (dsisr & CXL_PSL_DSISR_An_DM)
 297                cxl_handle_page_fault(ctx, mm, dsisr, dar);
 298        else
 299                WARN(1, "cxl_handle_fault has nothing to handle\n");
 300
 301        if (mm)
 302                mmput(mm);
 303}
 304
 305static void cxl_prefault_one(struct cxl_context *ctx, u64 ea)
 306{
 307        struct mm_struct *mm;
 308
 309        mm = get_mem_context(ctx);
 310        if (mm == NULL) {
 311                pr_devel("cxl_prefault_one unable to get mm %i\n",
 312                         pid_nr(ctx->pid));
 313                return;
 314        }
 315
 316        cxl_fault_segment(ctx, mm, ea);
 317
 318        mmput(mm);
 319}
 320
 321static u64 next_segment(u64 ea, u64 vsid)
 322{
 323        if (vsid & SLB_VSID_B_1T)
 324                ea |= (1ULL << 40) - 1;
 325        else
 326                ea |= (1ULL << 28) - 1;
 327
 328        return ea + 1;
 329}
 330
 331static void cxl_prefault_vma(struct cxl_context *ctx)
 332{
 333        u64 ea, last_esid = 0;
 334        struct copro_slb slb;
 335        struct vm_area_struct *vma;
 336        int rc;
 337        struct mm_struct *mm;
 338
 339        mm = get_mem_context(ctx);
 340        if (mm == NULL) {
 341                pr_devel("cxl_prefault_vm unable to get mm %i\n",
 342                         pid_nr(ctx->pid));
 343                return;
 344        }
 345
 346        down_read(&mm->mmap_sem);
 347        for (vma = mm->mmap; vma; vma = vma->vm_next) {
 348                for (ea = vma->vm_start; ea < vma->vm_end;
 349                                ea = next_segment(ea, slb.vsid)) {
 350                        rc = copro_calculate_slb(mm, ea, &slb);
 351                        if (rc)
 352                                continue;
 353
 354                        if (last_esid == slb.esid)
 355                                continue;
 356
 357                        cxl_load_segment(ctx, &slb);
 358                        last_esid = slb.esid;
 359                }
 360        }
 361        up_read(&mm->mmap_sem);
 362
 363        mmput(mm);
 364}
 365
 366void cxl_prefault(struct cxl_context *ctx, u64 wed)
 367{
 368        switch (ctx->afu->prefault_mode) {
 369        case CXL_PREFAULT_WED:
 370                cxl_prefault_one(ctx, wed);
 371                break;
 372        case CXL_PREFAULT_ALL:
 373                cxl_prefault_vma(ctx);
 374                break;
 375        default:
 376                break;
 377        }
 378}
 379