linux/drivers/iommu/io-pgfault.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Handle device page faults
   4 *
   5 * Copyright (C) 2020 ARM Ltd.
   6 */
   7
   8#include <linux/iommu.h>
   9#include <linux/list.h>
  10#include <linux/sched/mm.h>
  11#include <linux/slab.h>
  12#include <linux/workqueue.h>
  13
  14#include "iommu-sva-lib.h"
  15
  16/**
  17 * struct iopf_queue - IO Page Fault queue
  18 * @wq: the fault workqueue
  19 * @devices: devices attached to this queue
  20 * @lock: protects the device list
  21 */
  22struct iopf_queue {
  23        struct workqueue_struct         *wq;
  24        struct list_head                devices;
  25        struct mutex                    lock;
  26};
  27
  28/**
  29 * struct iopf_device_param - IO Page Fault data attached to a device
  30 * @dev: the device that owns this param
  31 * @queue: IOPF queue
  32 * @queue_list: index into queue->devices
  33 * @partial: faults that are part of a Page Request Group for which the last
  34 *           request hasn't been submitted yet.
  35 */
  36struct iopf_device_param {
  37        struct device                   *dev;
  38        struct iopf_queue               *queue;
  39        struct list_head                queue_list;
  40        struct list_head                partial;
  41};
  42
  43struct iopf_fault {
  44        struct iommu_fault              fault;
  45        struct list_head                list;
  46};
  47
  48struct iopf_group {
  49        struct iopf_fault               last_fault;
  50        struct list_head                faults;
  51        struct work_struct              work;
  52        struct device                   *dev;
  53};
  54
  55static int iopf_complete_group(struct device *dev, struct iopf_fault *iopf,
  56                               enum iommu_page_response_code status)
  57{
  58        struct iommu_page_response resp = {
  59                .version                = IOMMU_PAGE_RESP_VERSION_1,
  60                .pasid                  = iopf->fault.prm.pasid,
  61                .grpid                  = iopf->fault.prm.grpid,
  62                .code                   = status,
  63        };
  64
  65        if ((iopf->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID) &&
  66            (iopf->fault.prm.flags & IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID))
  67                resp.flags = IOMMU_PAGE_RESP_PASID_VALID;
  68
  69        return iommu_page_response(dev, &resp);
  70}
  71
  72static enum iommu_page_response_code
  73iopf_handle_single(struct iopf_fault *iopf)
  74{
  75        vm_fault_t ret;
  76        struct mm_struct *mm;
  77        struct vm_area_struct *vma;
  78        unsigned int access_flags = 0;
  79        unsigned int fault_flags = FAULT_FLAG_REMOTE;
  80        struct iommu_fault_page_request *prm = &iopf->fault.prm;
  81        enum iommu_page_response_code status = IOMMU_PAGE_RESP_INVALID;
  82
  83        if (!(prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID))
  84                return status;
  85
  86        mm = iommu_sva_find(prm->pasid);
  87        if (IS_ERR_OR_NULL(mm))
  88                return status;
  89
  90        mmap_read_lock(mm);
  91
  92        vma = find_extend_vma(mm, prm->addr);
  93        if (!vma)
  94                /* Unmapped area */
  95                goto out_put_mm;
  96
  97        if (prm->perm & IOMMU_FAULT_PERM_READ)
  98                access_flags |= VM_READ;
  99
 100        if (prm->perm & IOMMU_FAULT_PERM_WRITE) {
 101                access_flags |= VM_WRITE;
 102                fault_flags |= FAULT_FLAG_WRITE;
 103        }
 104
 105        if (prm->perm & IOMMU_FAULT_PERM_EXEC) {
 106                access_flags |= VM_EXEC;
 107                fault_flags |= FAULT_FLAG_INSTRUCTION;
 108        }
 109
 110        if (!(prm->perm & IOMMU_FAULT_PERM_PRIV))
 111                fault_flags |= FAULT_FLAG_USER;
 112
 113        if (access_flags & ~vma->vm_flags)
 114                /* Access fault */
 115                goto out_put_mm;
 116
 117        ret = handle_mm_fault(vma, prm->addr, fault_flags, NULL);
 118        status = ret & VM_FAULT_ERROR ? IOMMU_PAGE_RESP_INVALID :
 119                IOMMU_PAGE_RESP_SUCCESS;
 120
 121out_put_mm:
 122        mmap_read_unlock(mm);
 123        mmput(mm);
 124
 125        return status;
 126}
 127
 128static void iopf_handle_group(struct work_struct *work)
 129{
 130        struct iopf_group *group;
 131        struct iopf_fault *iopf, *next;
 132        enum iommu_page_response_code status = IOMMU_PAGE_RESP_SUCCESS;
 133
 134        group = container_of(work, struct iopf_group, work);
 135
 136        list_for_each_entry_safe(iopf, next, &group->faults, list) {
 137                /*
 138                 * For the moment, errors are sticky: don't handle subsequent
 139                 * faults in the group if there is an error.
 140                 */
 141                if (status == IOMMU_PAGE_RESP_SUCCESS)
 142                        status = iopf_handle_single(iopf);
 143
 144                if (!(iopf->fault.prm.flags &
 145                      IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE))
 146                        kfree(iopf);
 147        }
 148
 149        iopf_complete_group(group->dev, &group->last_fault, status);
 150        kfree(group);
 151}
 152
 153/**
 154 * iommu_queue_iopf - IO Page Fault handler
 155 * @fault: fault event
 156 * @cookie: struct device, passed to iommu_register_device_fault_handler.
 157 *
 158 * Add a fault to the device workqueue, to be handled by mm.
 159 *
 160 * This module doesn't handle PCI PASID Stop Marker; IOMMU drivers must discard
 161 * them before reporting faults. A PASID Stop Marker (LRW = 0b100) doesn't
 162 * expect a response. It may be generated when disabling a PASID (issuing a
 163 * PASID stop request) by some PCI devices.
 164 *
 165 * The PASID stop request is issued by the device driver before unbind(). Once
 166 * it completes, no page request is generated for this PASID anymore and
 167 * outstanding ones have been pushed to the IOMMU (as per PCIe 4.0r1.0 - 6.20.1
 168 * and 10.4.1.2 - Managing PASID TLP Prefix Usage). Some PCI devices will wait
 169 * for all outstanding page requests to come back with a response before
 170 * completing the PASID stop request. Others do not wait for page responses, and
 171 * instead issue this Stop Marker that tells us when the PASID can be
 172 * reallocated.
 173 *
 174 * It is safe to discard the Stop Marker because it is an optimization.
 175 * a. Page requests, which are posted requests, have been flushed to the IOMMU
 176 *    when the stop request completes.
 177 * b. The IOMMU driver flushes all fault queues on unbind() before freeing the
 178 *    PASID.
 179 *
 180 * So even though the Stop Marker might be issued by the device *after* the stop
 181 * request completes, outstanding faults will have been dealt with by the time
 182 * the PASID is freed.
 183 *
 184 * Return: 0 on success and <0 on error.
 185 */
 186int iommu_queue_iopf(struct iommu_fault *fault, void *cookie)
 187{
 188        int ret;
 189        struct iopf_group *group;
 190        struct iopf_fault *iopf, *next;
 191        struct iopf_device_param *iopf_param;
 192
 193        struct device *dev = cookie;
 194        struct dev_iommu *param = dev->iommu;
 195
 196        lockdep_assert_held(&param->lock);
 197
 198        if (fault->type != IOMMU_FAULT_PAGE_REQ)
 199                /* Not a recoverable page fault */
 200                return -EOPNOTSUPP;
 201
 202        /*
 203         * As long as we're holding param->lock, the queue can't be unlinked
 204         * from the device and therefore cannot disappear.
 205         */
 206        iopf_param = param->iopf_param;
 207        if (!iopf_param)
 208                return -ENODEV;
 209
 210        if (!(fault->prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) {
 211                iopf = kzalloc(sizeof(*iopf), GFP_KERNEL);
 212                if (!iopf)
 213                        return -ENOMEM;
 214
 215                iopf->fault = *fault;
 216
 217                /* Non-last request of a group. Postpone until the last one */
 218                list_add(&iopf->list, &iopf_param->partial);
 219
 220                return 0;
 221        }
 222
 223        group = kzalloc(sizeof(*group), GFP_KERNEL);
 224        if (!group) {
 225                /*
 226                 * The caller will send a response to the hardware. But we do
 227                 * need to clean up before leaving, otherwise partial faults
 228                 * will be stuck.
 229                 */
 230                ret = -ENOMEM;
 231                goto cleanup_partial;
 232        }
 233
 234        group->dev = dev;
 235        group->last_fault.fault = *fault;
 236        INIT_LIST_HEAD(&group->faults);
 237        list_add(&group->last_fault.list, &group->faults);
 238        INIT_WORK(&group->work, iopf_handle_group);
 239
 240        /* See if we have partial faults for this group */
 241        list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) {
 242                if (iopf->fault.prm.grpid == fault->prm.grpid)
 243                        /* Insert *before* the last fault */
 244                        list_move(&iopf->list, &group->faults);
 245        }
 246
 247        queue_work(iopf_param->queue->wq, &group->work);
 248        return 0;
 249
 250cleanup_partial:
 251        list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) {
 252                if (iopf->fault.prm.grpid == fault->prm.grpid) {
 253                        list_del(&iopf->list);
 254                        kfree(iopf);
 255                }
 256        }
 257        return ret;
 258}
 259EXPORT_SYMBOL_GPL(iommu_queue_iopf);
 260
 261/**
 262 * iopf_queue_flush_dev - Ensure that all queued faults have been processed
 263 * @dev: the endpoint whose faults need to be flushed.
 264 *
 265 * The IOMMU driver calls this before releasing a PASID, to ensure that all
 266 * pending faults for this PASID have been handled, and won't hit the address
 267 * space of the next process that uses this PASID. The driver must make sure
 268 * that no new fault is added to the queue. In particular it must flush its
 269 * low-level queue before calling this function.
 270 *
 271 * Return: 0 on success and <0 on error.
 272 */
 273int iopf_queue_flush_dev(struct device *dev)
 274{
 275        int ret = 0;
 276        struct iopf_device_param *iopf_param;
 277        struct dev_iommu *param = dev->iommu;
 278
 279        if (!param)
 280                return -ENODEV;
 281
 282        mutex_lock(&param->lock);
 283        iopf_param = param->iopf_param;
 284        if (iopf_param)
 285                flush_workqueue(iopf_param->queue->wq);
 286        else
 287                ret = -ENODEV;
 288        mutex_unlock(&param->lock);
 289
 290        return ret;
 291}
 292EXPORT_SYMBOL_GPL(iopf_queue_flush_dev);
 293
 294/**
 295 * iopf_queue_discard_partial - Remove all pending partial fault
 296 * @queue: the queue whose partial faults need to be discarded
 297 *
 298 * When the hardware queue overflows, last page faults in a group may have been
 299 * lost and the IOMMU driver calls this to discard all partial faults. The
 300 * driver shouldn't be adding new faults to this queue concurrently.
 301 *
 302 * Return: 0 on success and <0 on error.
 303 */
 304int iopf_queue_discard_partial(struct iopf_queue *queue)
 305{
 306        struct iopf_fault *iopf, *next;
 307        struct iopf_device_param *iopf_param;
 308
 309        if (!queue)
 310                return -EINVAL;
 311
 312        mutex_lock(&queue->lock);
 313        list_for_each_entry(iopf_param, &queue->devices, queue_list) {
 314                list_for_each_entry_safe(iopf, next, &iopf_param->partial,
 315                                         list) {
 316                        list_del(&iopf->list);
 317                        kfree(iopf);
 318                }
 319        }
 320        mutex_unlock(&queue->lock);
 321        return 0;
 322}
 323EXPORT_SYMBOL_GPL(iopf_queue_discard_partial);
 324
 325/**
 326 * iopf_queue_add_device - Add producer to the fault queue
 327 * @queue: IOPF queue
 328 * @dev: device to add
 329 *
 330 * Return: 0 on success and <0 on error.
 331 */
 332int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev)
 333{
 334        int ret = -EBUSY;
 335        struct iopf_device_param *iopf_param;
 336        struct dev_iommu *param = dev->iommu;
 337
 338        if (!param)
 339                return -ENODEV;
 340
 341        iopf_param = kzalloc(sizeof(*iopf_param), GFP_KERNEL);
 342        if (!iopf_param)
 343                return -ENOMEM;
 344
 345        INIT_LIST_HEAD(&iopf_param->partial);
 346        iopf_param->queue = queue;
 347        iopf_param->dev = dev;
 348
 349        mutex_lock(&queue->lock);
 350        mutex_lock(&param->lock);
 351        if (!param->iopf_param) {
 352                list_add(&iopf_param->queue_list, &queue->devices);
 353                param->iopf_param = iopf_param;
 354                ret = 0;
 355        }
 356        mutex_unlock(&param->lock);
 357        mutex_unlock(&queue->lock);
 358
 359        if (ret)
 360                kfree(iopf_param);
 361
 362        return ret;
 363}
 364EXPORT_SYMBOL_GPL(iopf_queue_add_device);
 365
 366/**
 367 * iopf_queue_remove_device - Remove producer from fault queue
 368 * @queue: IOPF queue
 369 * @dev: device to remove
 370 *
 371 * Caller makes sure that no more faults are reported for this device.
 372 *
 373 * Return: 0 on success and <0 on error.
 374 */
 375int iopf_queue_remove_device(struct iopf_queue *queue, struct device *dev)
 376{
 377        int ret = -EINVAL;
 378        struct iopf_fault *iopf, *next;
 379        struct iopf_device_param *iopf_param;
 380        struct dev_iommu *param = dev->iommu;
 381
 382        if (!param || !queue)
 383                return -EINVAL;
 384
 385        mutex_lock(&queue->lock);
 386        mutex_lock(&param->lock);
 387        iopf_param = param->iopf_param;
 388        if (iopf_param && iopf_param->queue == queue) {
 389                list_del(&iopf_param->queue_list);
 390                param->iopf_param = NULL;
 391                ret = 0;
 392        }
 393        mutex_unlock(&param->lock);
 394        mutex_unlock(&queue->lock);
 395        if (ret)
 396                return ret;
 397
 398        /* Just in case some faults are still stuck */
 399        list_for_each_entry_safe(iopf, next, &iopf_param->partial, list)
 400                kfree(iopf);
 401
 402        kfree(iopf_param);
 403
 404        return 0;
 405}
 406EXPORT_SYMBOL_GPL(iopf_queue_remove_device);
 407
 408/**
 409 * iopf_queue_alloc - Allocate and initialize a fault queue
 410 * @name: a unique string identifying the queue (for workqueue)
 411 *
 412 * Return: the queue on success and NULL on error.
 413 */
 414struct iopf_queue *iopf_queue_alloc(const char *name)
 415{
 416        struct iopf_queue *queue;
 417
 418        queue = kzalloc(sizeof(*queue), GFP_KERNEL);
 419        if (!queue)
 420                return NULL;
 421
 422        /*
 423         * The WQ is unordered because the low-level handler enqueues faults by
 424         * group. PRI requests within a group have to be ordered, but once
 425         * that's dealt with, the high-level function can handle groups out of
 426         * order.
 427         */
 428        queue->wq = alloc_workqueue("iopf_queue/%s", WQ_UNBOUND, 0, name);
 429        if (!queue->wq) {
 430                kfree(queue);
 431                return NULL;
 432        }
 433
 434        INIT_LIST_HEAD(&queue->devices);
 435        mutex_init(&queue->lock);
 436
 437        return queue;
 438}
 439EXPORT_SYMBOL_GPL(iopf_queue_alloc);
 440
 441/**
 442 * iopf_queue_free - Free IOPF queue
 443 * @queue: queue to free
 444 *
 445 * Counterpart to iopf_queue_alloc(). The driver must not be queuing faults or
 446 * adding/removing devices on this queue anymore.
 447 */
 448void iopf_queue_free(struct iopf_queue *queue)
 449{
 450        struct iopf_device_param *iopf_param, *next;
 451
 452        if (!queue)
 453                return;
 454
 455        list_for_each_entry_safe(iopf_param, next, &queue->devices, queue_list)
 456                iopf_queue_remove_device(queue, iopf_param->dev);
 457
 458        destroy_workqueue(queue->wq);
 459        kfree(queue);
 460}
 461EXPORT_SYMBOL_GPL(iopf_queue_free);
 462