linux/drivers/s390/crypto/vfio_ap_ops.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0+
   2/*
   3 * Adjunct processor matrix VFIO device driver callbacks.
   4 *
   5 * Copyright IBM Corp. 2018
   6 *
   7 * Author(s): Tony Krowiak <akrowiak@linux.ibm.com>
   8 *            Halil Pasic <pasic@linux.ibm.com>
   9 *            Pierre Morel <pmorel@linux.ibm.com>
  10 */
  11#include <linux/string.h>
  12#include <linux/vfio.h>
  13#include <linux/device.h>
  14#include <linux/list.h>
  15#include <linux/ctype.h>
  16#include <linux/bitops.h>
  17#include <linux/kvm_host.h>
  18#include <linux/module.h>
  19#include <linux/uuid.h>
  20#include <asm/kvm.h>
  21#include <asm/zcrypt.h>
  22
  23#include "vfio_ap_private.h"
  24#include "vfio_ap_debug.h"
  25
  26#define VFIO_AP_MDEV_TYPE_HWVIRT "passthrough"
  27#define VFIO_AP_MDEV_NAME_HWVIRT "VFIO AP Passthrough Device"
  28
  29static int vfio_ap_mdev_reset_queues(struct ap_matrix_mdev *matrix_mdev);
  30static struct vfio_ap_queue *vfio_ap_find_queue(int apqn);
  31static const struct vfio_device_ops vfio_ap_matrix_dev_ops;
  32
  33static int match_apqn(struct device *dev, const void *data)
  34{
  35        struct vfio_ap_queue *q = dev_get_drvdata(dev);
  36
  37        return (q->apqn == *(int *)(data)) ? 1 : 0;
  38}
  39
  40/**
  41 * vfio_ap_get_queue - retrieve a queue with a specific APQN from a list
  42 * @matrix_mdev: the associated mediated matrix
  43 * @apqn: The queue APQN
  44 *
  45 * Retrieve a queue with a specific APQN from the list of the
  46 * devices of the vfio_ap_drv.
  47 * Verify that the APID and the APQI are set in the matrix.
  48 *
  49 * Return: the pointer to the associated vfio_ap_queue
  50 */
  51static struct vfio_ap_queue *vfio_ap_get_queue(
  52                                        struct ap_matrix_mdev *matrix_mdev,
  53                                        int apqn)
  54{
  55        struct vfio_ap_queue *q;
  56
  57        if (!test_bit_inv(AP_QID_CARD(apqn), matrix_mdev->matrix.apm))
  58                return NULL;
  59        if (!test_bit_inv(AP_QID_QUEUE(apqn), matrix_mdev->matrix.aqm))
  60                return NULL;
  61
  62        q = vfio_ap_find_queue(apqn);
  63        if (q)
  64                q->matrix_mdev = matrix_mdev;
  65
  66        return q;
  67}
  68
  69/**
  70 * vfio_ap_wait_for_irqclear - clears the IR bit or gives up after 5 tries
  71 * @apqn: The AP Queue number
  72 *
  73 * Checks the IRQ bit for the status of this APQN using ap_tapq.
  74 * Returns if the ap_tapq function succeeded and the bit is clear.
  75 * Returns if ap_tapq function failed with invalid, deconfigured or
  76 * checkstopped AP.
  77 * Otherwise retries up to 5 times after waiting 20ms.
  78 */
  79static void vfio_ap_wait_for_irqclear(int apqn)
  80{
  81        struct ap_queue_status status;
  82        int retry = 5;
  83
  84        do {
  85                status = ap_tapq(apqn, NULL);
  86                switch (status.response_code) {
  87                case AP_RESPONSE_NORMAL:
  88                case AP_RESPONSE_RESET_IN_PROGRESS:
  89                        if (!status.irq_enabled)
  90                                return;
  91                        fallthrough;
  92                case AP_RESPONSE_BUSY:
  93                        msleep(20);
  94                        break;
  95                case AP_RESPONSE_Q_NOT_AVAIL:
  96                case AP_RESPONSE_DECONFIGURED:
  97                case AP_RESPONSE_CHECKSTOPPED:
  98                default:
  99                        WARN_ONCE(1, "%s: tapq rc %02x: %04x\n", __func__,
 100                                  status.response_code, apqn);
 101                        return;
 102                }
 103        } while (--retry);
 104
 105        WARN_ONCE(1, "%s: tapq rc %02x: %04x could not clear IR bit\n",
 106                  __func__, status.response_code, apqn);
 107}
 108
 109/**
 110 * vfio_ap_free_aqic_resources - free vfio_ap_queue resources
 111 * @q: The vfio_ap_queue
 112 *
 113 * Unregisters the ISC in the GIB when the saved ISC not invalid.
 114 * Unpins the guest's page holding the NIB when it exists.
 115 * Resets the saved_pfn and saved_isc to invalid values.
 116 */
 117static void vfio_ap_free_aqic_resources(struct vfio_ap_queue *q)
 118{
 119        if (!q)
 120                return;
 121        if (q->saved_isc != VFIO_AP_ISC_INVALID &&
 122            !WARN_ON(!(q->matrix_mdev && q->matrix_mdev->kvm))) {
 123                kvm_s390_gisc_unregister(q->matrix_mdev->kvm, q->saved_isc);
 124                q->saved_isc = VFIO_AP_ISC_INVALID;
 125        }
 126        if (q->saved_pfn && !WARN_ON(!q->matrix_mdev)) {
 127                vfio_unpin_pages(&q->matrix_mdev->vdev, &q->saved_pfn, 1);
 128                q->saved_pfn = 0;
 129        }
 130}
 131
 132/**
 133 * vfio_ap_irq_disable - disables and clears an ap_queue interrupt
 134 * @q: The vfio_ap_queue
 135 *
 136 * Uses ap_aqic to disable the interruption and in case of success, reset
 137 * in progress or IRQ disable command already proceeded: calls
 138 * vfio_ap_wait_for_irqclear() to check for the IRQ bit to be clear
 139 * and calls vfio_ap_free_aqic_resources() to free the resources associated
 140 * with the AP interrupt handling.
 141 *
 142 * In the case the AP is busy, or a reset is in progress,
 143 * retries after 20ms, up to 5 times.
 144 *
 145 * Returns if ap_aqic function failed with invalid, deconfigured or
 146 * checkstopped AP.
 147 *
 148 * Return: &struct ap_queue_status
 149 */
 150static struct ap_queue_status vfio_ap_irq_disable(struct vfio_ap_queue *q)
 151{
 152        struct ap_qirq_ctrl aqic_gisa = {};
 153        struct ap_queue_status status;
 154        int retries = 5;
 155
 156        do {
 157                status = ap_aqic(q->apqn, aqic_gisa, NULL);
 158                switch (status.response_code) {
 159                case AP_RESPONSE_OTHERWISE_CHANGED:
 160                case AP_RESPONSE_NORMAL:
 161                        vfio_ap_wait_for_irqclear(q->apqn);
 162                        goto end_free;
 163                case AP_RESPONSE_RESET_IN_PROGRESS:
 164                case AP_RESPONSE_BUSY:
 165                        msleep(20);
 166                        break;
 167                case AP_RESPONSE_Q_NOT_AVAIL:
 168                case AP_RESPONSE_DECONFIGURED:
 169                case AP_RESPONSE_CHECKSTOPPED:
 170                case AP_RESPONSE_INVALID_ADDRESS:
 171                default:
 172                        /* All cases in default means AP not operational */
 173                        WARN_ONCE(1, "%s: ap_aqic status %d\n", __func__,
 174                                  status.response_code);
 175                        goto end_free;
 176                }
 177        } while (retries--);
 178
 179        WARN_ONCE(1, "%s: ap_aqic status %d\n", __func__,
 180                  status.response_code);
 181end_free:
 182        vfio_ap_free_aqic_resources(q);
 183        q->matrix_mdev = NULL;
 184        return status;
 185}
 186
 187/**
 188 * vfio_ap_validate_nib - validate a notification indicator byte (nib) address.
 189 *
 190 * @vcpu: the object representing the vcpu executing the PQAP(AQIC) instruction.
 191 * @nib: the location for storing the nib address.
 192 * @g_pfn: the location for storing the page frame number of the page containing
 193 *         the nib.
 194 *
 195 * When the PQAP(AQIC) instruction is executed, general register 2 contains the
 196 * address of the notification indicator byte (nib) used for IRQ notification.
 197 * This function parses the nib from gr2 and calculates the page frame
 198 * number for the guest of the page containing the nib. The values are
 199 * stored in @nib and @g_pfn respectively.
 200 *
 201 * The g_pfn of the nib is then validated to ensure the nib address is valid.
 202 *
 203 * Return: returns zero if the nib address is a valid; otherwise, returns
 204 *         -EINVAL.
 205 */
 206static int vfio_ap_validate_nib(struct kvm_vcpu *vcpu, unsigned long *nib,
 207                                unsigned long *g_pfn)
 208{
 209        *nib = vcpu->run->s.regs.gprs[2];
 210        *g_pfn = *nib >> PAGE_SHIFT;
 211
 212        if (kvm_is_error_hva(gfn_to_hva(vcpu->kvm, *g_pfn)))
 213                return -EINVAL;
 214
 215        return 0;
 216}
 217
 218/**
 219 * vfio_ap_irq_enable - Enable Interruption for a APQN
 220 *
 221 * @q:   the vfio_ap_queue holding AQIC parameters
 222 * @isc: the guest ISC to register with the GIB interface
 223 * @vcpu: the vcpu object containing the registers specifying the parameters
 224 *        passed to the PQAP(AQIC) instruction.
 225 *
 226 * Pin the NIB saved in *q
 227 * Register the guest ISC to GIB interface and retrieve the
 228 * host ISC to issue the host side PQAP/AQIC
 229 *
 230 * Response.status may be set to AP_RESPONSE_INVALID_ADDRESS in case the
 231 * vfio_pin_pages failed.
 232 *
 233 * Otherwise return the ap_queue_status returned by the ap_aqic(),
 234 * all retry handling will be done by the guest.
 235 *
 236 * Return: &struct ap_queue_status
 237 */
 238static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q,
 239                                                 int isc,
 240                                                 struct kvm_vcpu *vcpu)
 241{
 242        unsigned long nib;
 243        struct ap_qirq_ctrl aqic_gisa = {};
 244        struct ap_queue_status status = {};
 245        struct kvm_s390_gisa *gisa;
 246        int nisc;
 247        struct kvm *kvm;
 248        unsigned long h_nib, g_pfn, h_pfn;
 249        int ret;
 250
 251        /* Verify that the notification indicator byte address is valid */
 252        if (vfio_ap_validate_nib(vcpu, &nib, &g_pfn)) {
 253                VFIO_AP_DBF_WARN("%s: invalid NIB address: nib=%#lx, g_pfn=%#lx, apqn=%#04x\n",
 254                                 __func__, nib, g_pfn, q->apqn);
 255
 256                status.response_code = AP_RESPONSE_INVALID_ADDRESS;
 257                return status;
 258        }
 259
 260        ret = vfio_pin_pages(&q->matrix_mdev->vdev, &g_pfn, 1,
 261                             IOMMU_READ | IOMMU_WRITE, &h_pfn);
 262        switch (ret) {
 263        case 1:
 264                break;
 265        default:
 266                VFIO_AP_DBF_WARN("%s: vfio_pin_pages failed: rc=%d,"
 267                                 "nib=%#lx, g_pfn=%#lx, apqn=%#04x\n",
 268                                 __func__, ret, nib, g_pfn, q->apqn);
 269
 270                status.response_code = AP_RESPONSE_INVALID_ADDRESS;
 271                return status;
 272        }
 273
 274        kvm = q->matrix_mdev->kvm;
 275        gisa = kvm->arch.gisa_int.origin;
 276
 277        h_nib = (h_pfn << PAGE_SHIFT) | (nib & ~PAGE_MASK);
 278        aqic_gisa.gisc = isc;
 279
 280        nisc = kvm_s390_gisc_register(kvm, isc);
 281        if (nisc < 0) {
 282                VFIO_AP_DBF_WARN("%s: gisc registration failed: nisc=%d, isc=%d, apqn=%#04x\n",
 283                                 __func__, nisc, isc, q->apqn);
 284
 285                status.response_code = AP_RESPONSE_INVALID_GISA;
 286                return status;
 287        }
 288
 289        aqic_gisa.isc = nisc;
 290        aqic_gisa.ir = 1;
 291        aqic_gisa.gisa = (uint64_t)gisa >> 4;
 292
 293        status = ap_aqic(q->apqn, aqic_gisa, (void *)h_nib);
 294        switch (status.response_code) {
 295        case AP_RESPONSE_NORMAL:
 296                /* See if we did clear older IRQ configuration */
 297                vfio_ap_free_aqic_resources(q);
 298                q->saved_pfn = g_pfn;
 299                q->saved_isc = isc;
 300                break;
 301        case AP_RESPONSE_OTHERWISE_CHANGED:
 302                /* We could not modify IRQ setings: clear new configuration */
 303                vfio_unpin_pages(&q->matrix_mdev->vdev, &g_pfn, 1);
 304                kvm_s390_gisc_unregister(kvm, isc);
 305                break;
 306        default:
 307                pr_warn("%s: apqn %04x: response: %02x\n", __func__, q->apqn,
 308                        status.response_code);
 309                vfio_ap_irq_disable(q);
 310                break;
 311        }
 312
 313        if (status.response_code != AP_RESPONSE_NORMAL) {
 314                VFIO_AP_DBF_WARN("%s: PQAP(AQIC) failed with status=%#02x: "
 315                                 "zone=%#x, ir=%#x, gisc=%#x, f=%#x,"
 316                                 "gisa=%#x, isc=%#x, apqn=%#04x\n",
 317                                 __func__, status.response_code,
 318                                 aqic_gisa.zone, aqic_gisa.ir, aqic_gisa.gisc,
 319                                 aqic_gisa.gf, aqic_gisa.gisa, aqic_gisa.isc,
 320                                 q->apqn);
 321        }
 322
 323        return status;
 324}
 325
 326/**
 327 * vfio_ap_le_guid_to_be_uuid - convert a little endian guid array into an array
 328 *                              of big endian elements that can be passed by
 329 *                              value to an s390dbf sprintf event function to
 330 *                              format a UUID string.
 331 *
 332 * @guid: the object containing the little endian guid
 333 * @uuid: a six-element array of long values that can be passed by value as
 334 *        arguments for a formatting string specifying a UUID.
 335 *
 336 * The S390 Debug Feature (s390dbf) allows the use of "%s" in the sprintf
 337 * event functions if the memory for the passed string is available as long as
 338 * the debug feature exists. Since a mediated device can be removed at any
 339 * time, it's name can not be used because %s passes the reference to the string
 340 * in memory and the reference will go stale once the device is removed .
 341 *
 342 * The s390dbf string formatting function allows a maximum of 9 arguments for a
 343 * message to be displayed in the 'sprintf' view. In order to use the bytes
 344 * comprising the mediated device's UUID to display the mediated device name,
 345 * they will have to be converted into an array whose elements can be passed by
 346 * value to sprintf. For example:
 347 *
 348 * guid array: { 83, 78, 17, 62, bb, f1, f0, 47, 91, 4d, 32, a2, 2e, 3a, 88, 04 }
 349 * mdev name: 62177883-f1bb-47f0-914d-32a22e3a8804
 350 * array returned: { 62177883, f1bb, 47f0, 914d, 32a2, 2e3a8804 }
 351 * formatting string: "%08lx-%04lx-%04lx-%04lx-%02lx%04lx"
 352 */
 353static void vfio_ap_le_guid_to_be_uuid(guid_t *guid, unsigned long *uuid)
 354{
 355        /*
 356         * The input guid is ordered in little endian, so it needs to be
 357         * reordered for displaying a UUID as a string. This specifies the
 358         * guid indices in proper order.
 359         */
 360        uuid[0] = le32_to_cpup((__le32 *)guid);
 361        uuid[1] = le16_to_cpup((__le16 *)&guid->b[4]);
 362        uuid[2] = le16_to_cpup((__le16 *)&guid->b[6]);
 363        uuid[3] = *((__u16 *)&guid->b[8]);
 364        uuid[4] = *((__u16 *)&guid->b[10]);
 365        uuid[5] = *((__u32 *)&guid->b[12]);
 366}
 367
 368/**
 369 * handle_pqap - PQAP instruction callback
 370 *
 371 * @vcpu: The vcpu on which we received the PQAP instruction
 372 *
 373 * Get the general register contents to initialize internal variables.
 374 * REG[0]: APQN
 375 * REG[1]: IR and ISC
 376 * REG[2]: NIB
 377 *
 378 * Response.status may be set to following Response Code:
 379 * - AP_RESPONSE_Q_NOT_AVAIL: if the queue is not available
 380 * - AP_RESPONSE_DECONFIGURED: if the queue is not configured
 381 * - AP_RESPONSE_NORMAL (0) : in case of successs
 382 *   Check vfio_ap_setirq() and vfio_ap_clrirq() for other possible RC.
 383 * We take the matrix_dev lock to ensure serialization on queues and
 384 * mediated device access.
 385 *
 386 * Return: 0 if we could handle the request inside KVM.
 387 * Otherwise, returns -EOPNOTSUPP to let QEMU handle the fault.
 388 */
 389static int handle_pqap(struct kvm_vcpu *vcpu)
 390{
 391        uint64_t status;
 392        uint16_t apqn;
 393        unsigned long uuid[6];
 394        struct vfio_ap_queue *q;
 395        struct ap_queue_status qstatus = {
 396                               .response_code = AP_RESPONSE_Q_NOT_AVAIL, };
 397        struct ap_matrix_mdev *matrix_mdev;
 398
 399        apqn = vcpu->run->s.regs.gprs[0] & 0xffff;
 400
 401        /* If we do not use the AIV facility just go to userland */
 402        if (!(vcpu->arch.sie_block->eca & ECA_AIV)) {
 403                VFIO_AP_DBF_WARN("%s: AIV facility not installed: apqn=0x%04x, eca=0x%04x\n",
 404                                 __func__, apqn, vcpu->arch.sie_block->eca);
 405
 406                return -EOPNOTSUPP;
 407        }
 408
 409        mutex_lock(&matrix_dev->lock);
 410        if (!vcpu->kvm->arch.crypto.pqap_hook) {
 411                VFIO_AP_DBF_WARN("%s: PQAP(AQIC) hook not registered with the vfio_ap driver: apqn=0x%04x\n",
 412                                 __func__, apqn);
 413                goto out_unlock;
 414        }
 415
 416        matrix_mdev = container_of(vcpu->kvm->arch.crypto.pqap_hook,
 417                                   struct ap_matrix_mdev, pqap_hook);
 418
 419        /* If the there is no guest using the mdev, there is nothing to do */
 420        if (!matrix_mdev->kvm) {
 421                vfio_ap_le_guid_to_be_uuid(&matrix_mdev->mdev->uuid, uuid);
 422                VFIO_AP_DBF_WARN("%s: mdev %08lx-%04lx-%04lx-%04lx-%04lx%08lx not in use: apqn=0x%04x\n",
 423                                 __func__, uuid[0],  uuid[1], uuid[2],
 424                                 uuid[3], uuid[4], uuid[5], apqn);
 425                goto out_unlock;
 426        }
 427
 428        q = vfio_ap_get_queue(matrix_mdev, apqn);
 429        if (!q) {
 430                VFIO_AP_DBF_WARN("%s: Queue %02x.%04x not bound to the vfio_ap driver\n",
 431                                 __func__, AP_QID_CARD(apqn),
 432                                 AP_QID_QUEUE(apqn));
 433                goto out_unlock;
 434        }
 435
 436        status = vcpu->run->s.regs.gprs[1];
 437
 438        /* If IR bit(16) is set we enable the interrupt */
 439        if ((status >> (63 - 16)) & 0x01)
 440                qstatus = vfio_ap_irq_enable(q, status & 0x07, vcpu);
 441        else
 442                qstatus = vfio_ap_irq_disable(q);
 443
 444out_unlock:
 445        memcpy(&vcpu->run->s.regs.gprs[1], &qstatus, sizeof(qstatus));
 446        vcpu->run->s.regs.gprs[1] >>= 32;
 447        mutex_unlock(&matrix_dev->lock);
 448        return 0;
 449}
 450
 451static void vfio_ap_matrix_init(struct ap_config_info *info,
 452                                struct ap_matrix *matrix)
 453{
 454        matrix->apm_max = info->apxa ? info->Na : 63;
 455        matrix->aqm_max = info->apxa ? info->Nd : 15;
 456        matrix->adm_max = info->apxa ? info->Nd : 15;
 457}
 458
 459static int vfio_ap_mdev_probe(struct mdev_device *mdev)
 460{
 461        struct ap_matrix_mdev *matrix_mdev;
 462        int ret;
 463
 464        if ((atomic_dec_if_positive(&matrix_dev->available_instances) < 0))
 465                return -EPERM;
 466
 467        matrix_mdev = kzalloc(sizeof(*matrix_mdev), GFP_KERNEL);
 468        if (!matrix_mdev) {
 469                ret = -ENOMEM;
 470                goto err_dec_available;
 471        }
 472        vfio_init_group_dev(&matrix_mdev->vdev, &mdev->dev,
 473                            &vfio_ap_matrix_dev_ops);
 474
 475        matrix_mdev->mdev = mdev;
 476        vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->matrix);
 477        matrix_mdev->pqap_hook = handle_pqap;
 478        mutex_lock(&matrix_dev->lock);
 479        list_add(&matrix_mdev->node, &matrix_dev->mdev_list);
 480        mutex_unlock(&matrix_dev->lock);
 481
 482        ret = vfio_register_emulated_iommu_dev(&matrix_mdev->vdev);
 483        if (ret)
 484                goto err_list;
 485        dev_set_drvdata(&mdev->dev, matrix_mdev);
 486        return 0;
 487
 488err_list:
 489        mutex_lock(&matrix_dev->lock);
 490        list_del(&matrix_mdev->node);
 491        mutex_unlock(&matrix_dev->lock);
 492        vfio_uninit_group_dev(&matrix_mdev->vdev);
 493        kfree(matrix_mdev);
 494err_dec_available:
 495        atomic_inc(&matrix_dev->available_instances);
 496        return ret;
 497}
 498
 499static void vfio_ap_mdev_remove(struct mdev_device *mdev)
 500{
 501        struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(&mdev->dev);
 502
 503        vfio_unregister_group_dev(&matrix_mdev->vdev);
 504
 505        mutex_lock(&matrix_dev->lock);
 506        vfio_ap_mdev_reset_queues(matrix_mdev);
 507        list_del(&matrix_mdev->node);
 508        mutex_unlock(&matrix_dev->lock);
 509        vfio_uninit_group_dev(&matrix_mdev->vdev);
 510        kfree(matrix_mdev);
 511        atomic_inc(&matrix_dev->available_instances);
 512}
 513
 514static ssize_t name_show(struct mdev_type *mtype,
 515                         struct mdev_type_attribute *attr, char *buf)
 516{
 517        return sprintf(buf, "%s\n", VFIO_AP_MDEV_NAME_HWVIRT);
 518}
 519
 520static MDEV_TYPE_ATTR_RO(name);
 521
 522static ssize_t available_instances_show(struct mdev_type *mtype,
 523                                        struct mdev_type_attribute *attr,
 524                                        char *buf)
 525{
 526        return sprintf(buf, "%d\n",
 527                       atomic_read(&matrix_dev->available_instances));
 528}
 529
 530static MDEV_TYPE_ATTR_RO(available_instances);
 531
 532static ssize_t device_api_show(struct mdev_type *mtype,
 533                               struct mdev_type_attribute *attr, char *buf)
 534{
 535        return sprintf(buf, "%s\n", VFIO_DEVICE_API_AP_STRING);
 536}
 537
 538static MDEV_TYPE_ATTR_RO(device_api);
 539
 540static struct attribute *vfio_ap_mdev_type_attrs[] = {
 541        &mdev_type_attr_name.attr,
 542        &mdev_type_attr_device_api.attr,
 543        &mdev_type_attr_available_instances.attr,
 544        NULL,
 545};
 546
 547static struct attribute_group vfio_ap_mdev_hwvirt_type_group = {
 548        .name = VFIO_AP_MDEV_TYPE_HWVIRT,
 549        .attrs = vfio_ap_mdev_type_attrs,
 550};
 551
 552static struct attribute_group *vfio_ap_mdev_type_groups[] = {
 553        &vfio_ap_mdev_hwvirt_type_group,
 554        NULL,
 555};
 556
 557struct vfio_ap_queue_reserved {
 558        unsigned long *apid;
 559        unsigned long *apqi;
 560        bool reserved;
 561};
 562
 563/**
 564 * vfio_ap_has_queue - determines if the AP queue containing the target in @data
 565 *
 566 * @dev: an AP queue device
 567 * @data: a struct vfio_ap_queue_reserved reference
 568 *
 569 * Flags whether the AP queue device (@dev) has a queue ID containing the APQN,
 570 * apid or apqi specified in @data:
 571 *
 572 * - If @data contains both an apid and apqi value, then @data will be flagged
 573 *   as reserved if the APID and APQI fields for the AP queue device matches
 574 *
 575 * - If @data contains only an apid value, @data will be flagged as
 576 *   reserved if the APID field in the AP queue device matches
 577 *
 578 * - If @data contains only an apqi value, @data will be flagged as
 579 *   reserved if the APQI field in the AP queue device matches
 580 *
 581 * Return: 0 to indicate the input to function succeeded. Returns -EINVAL if
 582 * @data does not contain either an apid or apqi.
 583 */
 584static int vfio_ap_has_queue(struct device *dev, void *data)
 585{
 586        struct vfio_ap_queue_reserved *qres = data;
 587        struct ap_queue *ap_queue = to_ap_queue(dev);
 588        ap_qid_t qid;
 589        unsigned long id;
 590
 591        if (qres->apid && qres->apqi) {
 592                qid = AP_MKQID(*qres->apid, *qres->apqi);
 593                if (qid == ap_queue->qid)
 594                        qres->reserved = true;
 595        } else if (qres->apid && !qres->apqi) {
 596                id = AP_QID_CARD(ap_queue->qid);
 597                if (id == *qres->apid)
 598                        qres->reserved = true;
 599        } else if (!qres->apid && qres->apqi) {
 600                id = AP_QID_QUEUE(ap_queue->qid);
 601                if (id == *qres->apqi)
 602                        qres->reserved = true;
 603        } else {
 604                return -EINVAL;
 605        }
 606
 607        return 0;
 608}
 609
 610/**
 611 * vfio_ap_verify_queue_reserved - verifies that the AP queue containing
 612 * @apid or @aqpi is reserved
 613 *
 614 * @apid: an AP adapter ID
 615 * @apqi: an AP queue index
 616 *
 617 * Verifies that the AP queue with @apid/@apqi is reserved by the VFIO AP device
 618 * driver according to the following rules:
 619 *
 620 * - If both @apid and @apqi are not NULL, then there must be an AP queue
 621 *   device bound to the vfio_ap driver with the APQN identified by @apid and
 622 *   @apqi
 623 *
 624 * - If only @apid is not NULL, then there must be an AP queue device bound
 625 *   to the vfio_ap driver with an APQN containing @apid
 626 *
 627 * - If only @apqi is not NULL, then there must be an AP queue device bound
 628 *   to the vfio_ap driver with an APQN containing @apqi
 629 *
 630 * Return: 0 if the AP queue is reserved; otherwise, returns -EADDRNOTAVAIL.
 631 */
 632static int vfio_ap_verify_queue_reserved(unsigned long *apid,
 633                                         unsigned long *apqi)
 634{
 635        int ret;
 636        struct vfio_ap_queue_reserved qres;
 637
 638        qres.apid = apid;
 639        qres.apqi = apqi;
 640        qres.reserved = false;
 641
 642        ret = driver_for_each_device(&matrix_dev->vfio_ap_drv->driver, NULL,
 643                                     &qres, vfio_ap_has_queue);
 644        if (ret)
 645                return ret;
 646
 647        if (qres.reserved)
 648                return 0;
 649
 650        return -EADDRNOTAVAIL;
 651}
 652
 653static int
 654vfio_ap_mdev_verify_queues_reserved_for_apid(struct ap_matrix_mdev *matrix_mdev,
 655                                             unsigned long apid)
 656{
 657        int ret;
 658        unsigned long apqi;
 659        unsigned long nbits = matrix_mdev->matrix.aqm_max + 1;
 660
 661        if (find_first_bit_inv(matrix_mdev->matrix.aqm, nbits) >= nbits)
 662                return vfio_ap_verify_queue_reserved(&apid, NULL);
 663
 664        for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, nbits) {
 665                ret = vfio_ap_verify_queue_reserved(&apid, &apqi);
 666                if (ret)
 667                        return ret;
 668        }
 669
 670        return 0;
 671}
 672
 673/**
 674 * vfio_ap_mdev_verify_no_sharing - verifies that the AP matrix is not configured
 675 *
 676 * @matrix_mdev: the mediated matrix device
 677 *
 678 * Verifies that the APQNs derived from the cross product of the AP adapter IDs
 679 * and AP queue indexes comprising the AP matrix are not configured for another
 680 * mediated device. AP queue sharing is not allowed.
 681 *
 682 * Return: 0 if the APQNs are not shared; otherwise returns -EADDRINUSE.
 683 */
 684static int vfio_ap_mdev_verify_no_sharing(struct ap_matrix_mdev *matrix_mdev)
 685{
 686        struct ap_matrix_mdev *lstdev;
 687        DECLARE_BITMAP(apm, AP_DEVICES);
 688        DECLARE_BITMAP(aqm, AP_DOMAINS);
 689
 690        list_for_each_entry(lstdev, &matrix_dev->mdev_list, node) {
 691                if (matrix_mdev == lstdev)
 692                        continue;
 693
 694                memset(apm, 0, sizeof(apm));
 695                memset(aqm, 0, sizeof(aqm));
 696
 697                /*
 698                 * We work on full longs, as we can only exclude the leftover
 699                 * bits in non-inverse order. The leftover is all zeros.
 700                 */
 701                if (!bitmap_and(apm, matrix_mdev->matrix.apm,
 702                                lstdev->matrix.apm, AP_DEVICES))
 703                        continue;
 704
 705                if (!bitmap_and(aqm, matrix_mdev->matrix.aqm,
 706                                lstdev->matrix.aqm, AP_DOMAINS))
 707                        continue;
 708
 709                return -EADDRINUSE;
 710        }
 711
 712        return 0;
 713}
 714
 715/**
 716 * assign_adapter_store - parses the APID from @buf and sets the
 717 * corresponding bit in the mediated matrix device's APM
 718 *
 719 * @dev:        the matrix device
 720 * @attr:       the mediated matrix device's assign_adapter attribute
 721 * @buf:        a buffer containing the AP adapter number (APID) to
 722 *              be assigned
 723 * @count:      the number of bytes in @buf
 724 *
 725 * Return: the number of bytes processed if the APID is valid; otherwise,
 726 * returns one of the following errors:
 727 *
 728 *      1. -EINVAL
 729 *         The APID is not a valid number
 730 *
 731 *      2. -ENODEV
 732 *         The APID exceeds the maximum value configured for the system
 733 *
 734 *      3. -EADDRNOTAVAIL
 735 *         An APQN derived from the cross product of the APID being assigned
 736 *         and the APQIs previously assigned is not bound to the vfio_ap device
 737 *         driver; or, if no APQIs have yet been assigned, the APID is not
 738 *         contained in an APQN bound to the vfio_ap device driver.
 739 *
 740 *      4. -EADDRINUSE
 741 *         An APQN derived from the cross product of the APID being assigned
 742 *         and the APQIs previously assigned is being used by another mediated
 743 *         matrix device
 744 */
 745static ssize_t assign_adapter_store(struct device *dev,
 746                                    struct device_attribute *attr,
 747                                    const char *buf, size_t count)
 748{
 749        int ret;
 750        unsigned long apid;
 751        struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
 752
 753        mutex_lock(&matrix_dev->lock);
 754
 755        /* If the KVM guest is running, disallow assignment of adapter */
 756        if (matrix_mdev->kvm) {
 757                ret = -EBUSY;
 758                goto done;
 759        }
 760
 761        ret = kstrtoul(buf, 0, &apid);
 762        if (ret)
 763                goto done;
 764
 765        if (apid > matrix_mdev->matrix.apm_max) {
 766                ret = -ENODEV;
 767                goto done;
 768        }
 769
 770        /*
 771         * Set the bit in the AP mask (APM) corresponding to the AP adapter
 772         * number (APID). The bits in the mask, from most significant to least
 773         * significant bit, correspond to APIDs 0-255.
 774         */
 775        ret = vfio_ap_mdev_verify_queues_reserved_for_apid(matrix_mdev, apid);
 776        if (ret)
 777                goto done;
 778
 779        set_bit_inv(apid, matrix_mdev->matrix.apm);
 780
 781        ret = vfio_ap_mdev_verify_no_sharing(matrix_mdev);
 782        if (ret)
 783                goto share_err;
 784
 785        ret = count;
 786        goto done;
 787
 788share_err:
 789        clear_bit_inv(apid, matrix_mdev->matrix.apm);
 790done:
 791        mutex_unlock(&matrix_dev->lock);
 792
 793        return ret;
 794}
 795static DEVICE_ATTR_WO(assign_adapter);
 796
 797/**
 798 * unassign_adapter_store - parses the APID from @buf and clears the
 799 * corresponding bit in the mediated matrix device's APM
 800 *
 801 * @dev:        the matrix device
 802 * @attr:       the mediated matrix device's unassign_adapter attribute
 803 * @buf:        a buffer containing the adapter number (APID) to be unassigned
 804 * @count:      the number of bytes in @buf
 805 *
 806 * Return: the number of bytes processed if the APID is valid; otherwise,
 807 * returns one of the following errors:
 808 *      -EINVAL if the APID is not a number
 809 *      -ENODEV if the APID it exceeds the maximum value configured for the
 810 *              system
 811 */
 812static ssize_t unassign_adapter_store(struct device *dev,
 813                                      struct device_attribute *attr,
 814                                      const char *buf, size_t count)
 815{
 816        int ret;
 817        unsigned long apid;
 818        struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
 819
 820        mutex_lock(&matrix_dev->lock);
 821
 822        /* If the KVM guest is running, disallow unassignment of adapter */
 823        if (matrix_mdev->kvm) {
 824                ret = -EBUSY;
 825                goto done;
 826        }
 827
 828        ret = kstrtoul(buf, 0, &apid);
 829        if (ret)
 830                goto done;
 831
 832        if (apid > matrix_mdev->matrix.apm_max) {
 833                ret = -ENODEV;
 834                goto done;
 835        }
 836
 837        clear_bit_inv((unsigned long)apid, matrix_mdev->matrix.apm);
 838        ret = count;
 839done:
 840        mutex_unlock(&matrix_dev->lock);
 841        return ret;
 842}
 843static DEVICE_ATTR_WO(unassign_adapter);
 844
 845static int
 846vfio_ap_mdev_verify_queues_reserved_for_apqi(struct ap_matrix_mdev *matrix_mdev,
 847                                             unsigned long apqi)
 848{
 849        int ret;
 850        unsigned long apid;
 851        unsigned long nbits = matrix_mdev->matrix.apm_max + 1;
 852
 853        if (find_first_bit_inv(matrix_mdev->matrix.apm, nbits) >= nbits)
 854                return vfio_ap_verify_queue_reserved(NULL, &apqi);
 855
 856        for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, nbits) {
 857                ret = vfio_ap_verify_queue_reserved(&apid, &apqi);
 858                if (ret)
 859                        return ret;
 860        }
 861
 862        return 0;
 863}
 864
 865/**
 866 * assign_domain_store - parses the APQI from @buf and sets the
 867 * corresponding bit in the mediated matrix device's AQM
 868 *
 869 * @dev:        the matrix device
 870 * @attr:       the mediated matrix device's assign_domain attribute
 871 * @buf:        a buffer containing the AP queue index (APQI) of the domain to
 872 *              be assigned
 873 * @count:      the number of bytes in @buf
 874 *
 875 * Return: the number of bytes processed if the APQI is valid; otherwise returns
 876 * one of the following errors:
 877 *
 878 *      1. -EINVAL
 879 *         The APQI is not a valid number
 880 *
 881 *      2. -ENODEV
 882 *         The APQI exceeds the maximum value configured for the system
 883 *
 884 *      3. -EADDRNOTAVAIL
 885 *         An APQN derived from the cross product of the APQI being assigned
 886 *         and the APIDs previously assigned is not bound to the vfio_ap device
 887 *         driver; or, if no APIDs have yet been assigned, the APQI is not
 888 *         contained in an APQN bound to the vfio_ap device driver.
 889 *
 890 *      4. -EADDRINUSE
 891 *         An APQN derived from the cross product of the APQI being assigned
 892 *         and the APIDs previously assigned is being used by another mediated
 893 *         matrix device
 894 */
 895static ssize_t assign_domain_store(struct device *dev,
 896                                   struct device_attribute *attr,
 897                                   const char *buf, size_t count)
 898{
 899        int ret;
 900        unsigned long apqi;
 901        struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
 902        unsigned long max_apqi = matrix_mdev->matrix.aqm_max;
 903
 904        mutex_lock(&matrix_dev->lock);
 905
 906        /* If the KVM guest is running, disallow assignment of domain */
 907        if (matrix_mdev->kvm) {
 908                ret = -EBUSY;
 909                goto done;
 910        }
 911
 912        ret = kstrtoul(buf, 0, &apqi);
 913        if (ret)
 914                goto done;
 915        if (apqi > max_apqi) {
 916                ret = -ENODEV;
 917                goto done;
 918        }
 919
 920        ret = vfio_ap_mdev_verify_queues_reserved_for_apqi(matrix_mdev, apqi);
 921        if (ret)
 922                goto done;
 923
 924        set_bit_inv(apqi, matrix_mdev->matrix.aqm);
 925
 926        ret = vfio_ap_mdev_verify_no_sharing(matrix_mdev);
 927        if (ret)
 928                goto share_err;
 929
 930        ret = count;
 931        goto done;
 932
 933share_err:
 934        clear_bit_inv(apqi, matrix_mdev->matrix.aqm);
 935done:
 936        mutex_unlock(&matrix_dev->lock);
 937
 938        return ret;
 939}
 940static DEVICE_ATTR_WO(assign_domain);
 941
 942
 943/**
 944 * unassign_domain_store - parses the APQI from @buf and clears the
 945 * corresponding bit in the mediated matrix device's AQM
 946 *
 947 * @dev:        the matrix device
 948 * @attr:       the mediated matrix device's unassign_domain attribute
 949 * @buf:        a buffer containing the AP queue index (APQI) of the domain to
 950 *              be unassigned
 951 * @count:      the number of bytes in @buf
 952 *
 953 * Return: the number of bytes processed if the APQI is valid; otherwise,
 954 * returns one of the following errors:
 955 *      -EINVAL if the APQI is not a number
 956 *      -ENODEV if the APQI exceeds the maximum value configured for the system
 957 */
 958static ssize_t unassign_domain_store(struct device *dev,
 959                                     struct device_attribute *attr,
 960                                     const char *buf, size_t count)
 961{
 962        int ret;
 963        unsigned long apqi;
 964        struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
 965
 966        mutex_lock(&matrix_dev->lock);
 967
 968        /* If the KVM guest is running, disallow unassignment of domain */
 969        if (matrix_mdev->kvm) {
 970                ret = -EBUSY;
 971                goto done;
 972        }
 973
 974        ret = kstrtoul(buf, 0, &apqi);
 975        if (ret)
 976                goto done;
 977
 978        if (apqi > matrix_mdev->matrix.aqm_max) {
 979                ret = -ENODEV;
 980                goto done;
 981        }
 982
 983        clear_bit_inv((unsigned long)apqi, matrix_mdev->matrix.aqm);
 984        ret = count;
 985
 986done:
 987        mutex_unlock(&matrix_dev->lock);
 988        return ret;
 989}
 990static DEVICE_ATTR_WO(unassign_domain);
 991
 992/**
 993 * assign_control_domain_store - parses the domain ID from @buf and sets
 994 * the corresponding bit in the mediated matrix device's ADM
 995 *
 996 * @dev:        the matrix device
 997 * @attr:       the mediated matrix device's assign_control_domain attribute
 998 * @buf:        a buffer containing the domain ID to be assigned
 999 * @count:      the number of bytes in @buf
1000 *
1001 * Return: the number of bytes processed if the domain ID is valid; otherwise,
1002 * returns one of the following errors:
1003 *      -EINVAL if the ID is not a number
1004 *      -ENODEV if the ID exceeds the maximum value configured for the system
1005 */
1006static ssize_t assign_control_domain_store(struct device *dev,
1007                                           struct device_attribute *attr,
1008                                           const char *buf, size_t count)
1009{
1010        int ret;
1011        unsigned long id;
1012        struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
1013
1014        mutex_lock(&matrix_dev->lock);
1015
1016        /* If the KVM guest is running, disallow assignment of control domain */
1017        if (matrix_mdev->kvm) {
1018                ret = -EBUSY;
1019                goto done;
1020        }
1021
1022        ret = kstrtoul(buf, 0, &id);
1023        if (ret)
1024                goto done;
1025
1026        if (id > matrix_mdev->matrix.adm_max) {
1027                ret = -ENODEV;
1028                goto done;
1029        }
1030
1031        /* Set the bit in the ADM (bitmask) corresponding to the AP control
1032         * domain number (id). The bits in the mask, from most significant to
1033         * least significant, correspond to IDs 0 up to the one less than the
1034         * number of control domains that can be assigned.
1035         */
1036        set_bit_inv(id, matrix_mdev->matrix.adm);
1037        ret = count;
1038done:
1039        mutex_unlock(&matrix_dev->lock);
1040        return ret;
1041}
1042static DEVICE_ATTR_WO(assign_control_domain);
1043
1044/**
1045 * unassign_control_domain_store - parses the domain ID from @buf and
1046 * clears the corresponding bit in the mediated matrix device's ADM
1047 *
1048 * @dev:        the matrix device
1049 * @attr:       the mediated matrix device's unassign_control_domain attribute
1050 * @buf:        a buffer containing the domain ID to be unassigned
1051 * @count:      the number of bytes in @buf
1052 *
1053 * Return: the number of bytes processed if the domain ID is valid; otherwise,
1054 * returns one of the following errors:
1055 *      -EINVAL if the ID is not a number
1056 *      -ENODEV if the ID exceeds the maximum value configured for the system
1057 */
1058static ssize_t unassign_control_domain_store(struct device *dev,
1059                                             struct device_attribute *attr,
1060                                             const char *buf, size_t count)
1061{
1062        int ret;
1063        unsigned long domid;
1064        struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
1065        unsigned long max_domid =  matrix_mdev->matrix.adm_max;
1066
1067        mutex_lock(&matrix_dev->lock);
1068
1069        /* If a KVM guest is running, disallow unassignment of control domain */
1070        if (matrix_mdev->kvm) {
1071                ret = -EBUSY;
1072                goto done;
1073        }
1074
1075        ret = kstrtoul(buf, 0, &domid);
1076        if (ret)
1077                goto done;
1078        if (domid > max_domid) {
1079                ret = -ENODEV;
1080                goto done;
1081        }
1082
1083        clear_bit_inv(domid, matrix_mdev->matrix.adm);
1084        ret = count;
1085done:
1086        mutex_unlock(&matrix_dev->lock);
1087        return ret;
1088}
1089static DEVICE_ATTR_WO(unassign_control_domain);
1090
1091static ssize_t control_domains_show(struct device *dev,
1092                                    struct device_attribute *dev_attr,
1093                                    char *buf)
1094{
1095        unsigned long id;
1096        int nchars = 0;
1097        int n;
1098        char *bufpos = buf;
1099        struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
1100        unsigned long max_domid = matrix_mdev->matrix.adm_max;
1101
1102        mutex_lock(&matrix_dev->lock);
1103        for_each_set_bit_inv(id, matrix_mdev->matrix.adm, max_domid + 1) {
1104                n = sprintf(bufpos, "%04lx\n", id);
1105                bufpos += n;
1106                nchars += n;
1107        }
1108        mutex_unlock(&matrix_dev->lock);
1109
1110        return nchars;
1111}
1112static DEVICE_ATTR_RO(control_domains);
1113
1114static ssize_t matrix_show(struct device *dev, struct device_attribute *attr,
1115                           char *buf)
1116{
1117        struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev);
1118        char *bufpos = buf;
1119        unsigned long apid;
1120        unsigned long apqi;
1121        unsigned long apid1;
1122        unsigned long apqi1;
1123        unsigned long napm_bits = matrix_mdev->matrix.apm_max + 1;
1124        unsigned long naqm_bits = matrix_mdev->matrix.aqm_max + 1;
1125        int nchars = 0;
1126        int n;
1127
1128        apid1 = find_first_bit_inv(matrix_mdev->matrix.apm, napm_bits);
1129        apqi1 = find_first_bit_inv(matrix_mdev->matrix.aqm, naqm_bits);
1130
1131        mutex_lock(&matrix_dev->lock);
1132
1133        if ((apid1 < napm_bits) && (apqi1 < naqm_bits)) {
1134                for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, napm_bits) {
1135                        for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm,
1136                                             naqm_bits) {
1137                                n = sprintf(bufpos, "%02lx.%04lx\n", apid,
1138                                            apqi);
1139                                bufpos += n;
1140                                nchars += n;
1141                        }
1142                }
1143        } else if (apid1 < napm_bits) {
1144                for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, napm_bits) {
1145                        n = sprintf(bufpos, "%02lx.\n", apid);
1146                        bufpos += n;
1147                        nchars += n;
1148                }
1149        } else if (apqi1 < naqm_bits) {
1150                for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm, naqm_bits) {
1151                        n = sprintf(bufpos, ".%04lx\n", apqi);
1152                        bufpos += n;
1153                        nchars += n;
1154                }
1155        }
1156
1157        mutex_unlock(&matrix_dev->lock);
1158
1159        return nchars;
1160}
1161static DEVICE_ATTR_RO(matrix);
1162
1163static struct attribute *vfio_ap_mdev_attrs[] = {
1164        &dev_attr_assign_adapter.attr,
1165        &dev_attr_unassign_adapter.attr,
1166        &dev_attr_assign_domain.attr,
1167        &dev_attr_unassign_domain.attr,
1168        &dev_attr_assign_control_domain.attr,
1169        &dev_attr_unassign_control_domain.attr,
1170        &dev_attr_control_domains.attr,
1171        &dev_attr_matrix.attr,
1172        NULL,
1173};
1174
1175static struct attribute_group vfio_ap_mdev_attr_group = {
1176        .attrs = vfio_ap_mdev_attrs
1177};
1178
1179static const struct attribute_group *vfio_ap_mdev_attr_groups[] = {
1180        &vfio_ap_mdev_attr_group,
1181        NULL
1182};
1183
1184/**
1185 * vfio_ap_mdev_set_kvm - sets all data for @matrix_mdev that are needed
1186 * to manage AP resources for the guest whose state is represented by @kvm
1187 *
1188 * @matrix_mdev: a mediated matrix device
1189 * @kvm: reference to KVM instance
1190 *
1191 * Return: 0 if no other mediated matrix device has a reference to @kvm;
1192 * otherwise, returns an -EPERM.
1193 */
1194static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev,
1195                                struct kvm *kvm)
1196{
1197        struct ap_matrix_mdev *m;
1198
1199        if (kvm->arch.crypto.crycbd) {
1200                down_write(&kvm->arch.crypto.pqap_hook_rwsem);
1201                kvm->arch.crypto.pqap_hook = &matrix_mdev->pqap_hook;
1202                up_write(&kvm->arch.crypto.pqap_hook_rwsem);
1203
1204                mutex_lock(&kvm->lock);
1205                mutex_lock(&matrix_dev->lock);
1206
1207                list_for_each_entry(m, &matrix_dev->mdev_list, node) {
1208                        if (m != matrix_mdev && m->kvm == kvm) {
1209                                mutex_unlock(&kvm->lock);
1210                                mutex_unlock(&matrix_dev->lock);
1211                                return -EPERM;
1212                        }
1213                }
1214
1215                kvm_get_kvm(kvm);
1216                matrix_mdev->kvm = kvm;
1217                kvm_arch_crypto_set_masks(kvm,
1218                                          matrix_mdev->matrix.apm,
1219                                          matrix_mdev->matrix.aqm,
1220                                          matrix_mdev->matrix.adm);
1221
1222                mutex_unlock(&kvm->lock);
1223                mutex_unlock(&matrix_dev->lock);
1224        }
1225
1226        return 0;
1227}
1228
1229/**
1230 * vfio_ap_mdev_iommu_notifier - IOMMU notifier callback
1231 *
1232 * @nb: The notifier block
1233 * @action: Action to be taken
1234 * @data: data associated with the request
1235 *
1236 * For an UNMAP request, unpin the guest IOVA (the NIB guest address we
1237 * pinned before). Other requests are ignored.
1238 *
1239 * Return: for an UNMAP request, NOFITY_OK; otherwise NOTIFY_DONE.
1240 */
1241static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb,
1242                                       unsigned long action, void *data)
1243{
1244        struct ap_matrix_mdev *matrix_mdev;
1245
1246        matrix_mdev = container_of(nb, struct ap_matrix_mdev, iommu_notifier);
1247
1248        if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
1249                struct vfio_iommu_type1_dma_unmap *unmap = data;
1250                unsigned long g_pfn = unmap->iova >> PAGE_SHIFT;
1251
1252                vfio_unpin_pages(&matrix_mdev->vdev, &g_pfn, 1);
1253                return NOTIFY_OK;
1254        }
1255
1256        return NOTIFY_DONE;
1257}
1258
1259/**
1260 * vfio_ap_mdev_unset_kvm - performs clean-up of resources no longer needed
1261 * by @matrix_mdev.
1262 *
1263 * @matrix_mdev: a matrix mediated device
1264 */
1265static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev)
1266{
1267        struct kvm *kvm = matrix_mdev->kvm;
1268
1269        if (kvm && kvm->arch.crypto.crycbd) {
1270                down_write(&kvm->arch.crypto.pqap_hook_rwsem);
1271                kvm->arch.crypto.pqap_hook = NULL;
1272                up_write(&kvm->arch.crypto.pqap_hook_rwsem);
1273
1274                mutex_lock(&kvm->lock);
1275                mutex_lock(&matrix_dev->lock);
1276
1277                kvm_arch_crypto_clear_masks(kvm);
1278                vfio_ap_mdev_reset_queues(matrix_mdev);
1279                kvm_put_kvm(kvm);
1280                matrix_mdev->kvm = NULL;
1281
1282                mutex_unlock(&kvm->lock);
1283                mutex_unlock(&matrix_dev->lock);
1284        }
1285}
1286
1287static struct vfio_ap_queue *vfio_ap_find_queue(int apqn)
1288{
1289        struct device *dev;
1290        struct vfio_ap_queue *q = NULL;
1291
1292        dev = driver_find_device(&matrix_dev->vfio_ap_drv->driver, NULL,
1293                                 &apqn, match_apqn);
1294        if (dev) {
1295                q = dev_get_drvdata(dev);
1296                put_device(dev);
1297        }
1298
1299        return q;
1300}
1301
1302int vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q,
1303                             unsigned int retry)
1304{
1305        struct ap_queue_status status;
1306        int ret;
1307        int retry2 = 2;
1308
1309        if (!q)
1310                return 0;
1311
1312retry_zapq:
1313        status = ap_zapq(q->apqn);
1314        switch (status.response_code) {
1315        case AP_RESPONSE_NORMAL:
1316                ret = 0;
1317                break;
1318        case AP_RESPONSE_RESET_IN_PROGRESS:
1319                if (retry--) {
1320                        msleep(20);
1321                        goto retry_zapq;
1322                }
1323                ret = -EBUSY;
1324                break;
1325        case AP_RESPONSE_Q_NOT_AVAIL:
1326        case AP_RESPONSE_DECONFIGURED:
1327        case AP_RESPONSE_CHECKSTOPPED:
1328                WARN_ON_ONCE(status.irq_enabled);
1329                ret = -EBUSY;
1330                goto free_resources;
1331        default:
1332                /* things are really broken, give up */
1333                WARN(true, "PQAP/ZAPQ completed with invalid rc (%x)\n",
1334                     status.response_code);
1335                return -EIO;
1336        }
1337
1338        /* wait for the reset to take effect */
1339        while (retry2--) {
1340                if (status.queue_empty && !status.irq_enabled)
1341                        break;
1342                msleep(20);
1343                status = ap_tapq(q->apqn, NULL);
1344        }
1345        WARN_ON_ONCE(retry2 <= 0);
1346
1347free_resources:
1348        vfio_ap_free_aqic_resources(q);
1349
1350        return ret;
1351}
1352
1353static int vfio_ap_mdev_reset_queues(struct ap_matrix_mdev *matrix_mdev)
1354{
1355        int ret;
1356        int rc = 0;
1357        unsigned long apid, apqi;
1358        struct vfio_ap_queue *q;
1359
1360        for_each_set_bit_inv(apid, matrix_mdev->matrix.apm,
1361                             matrix_mdev->matrix.apm_max + 1) {
1362                for_each_set_bit_inv(apqi, matrix_mdev->matrix.aqm,
1363                                     matrix_mdev->matrix.aqm_max + 1) {
1364                        q = vfio_ap_find_queue(AP_MKQID(apid, apqi));
1365                        ret = vfio_ap_mdev_reset_queue(q, 1);
1366                        /*
1367                         * Regardless whether a queue turns out to be busy, or
1368                         * is not operational, we need to continue resetting
1369                         * the remaining queues.
1370                         */
1371                        if (ret)
1372                                rc = ret;
1373                }
1374        }
1375
1376        return rc;
1377}
1378
1379static int vfio_ap_mdev_open_device(struct vfio_device *vdev)
1380{
1381        struct ap_matrix_mdev *matrix_mdev =
1382                container_of(vdev, struct ap_matrix_mdev, vdev);
1383        unsigned long events;
1384        int ret;
1385
1386        if (!vdev->kvm)
1387                return -EINVAL;
1388
1389        ret = vfio_ap_mdev_set_kvm(matrix_mdev, vdev->kvm);
1390        if (ret)
1391                return ret;
1392
1393        matrix_mdev->iommu_notifier.notifier_call = vfio_ap_mdev_iommu_notifier;
1394        events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
1395        ret = vfio_register_notifier(vdev, VFIO_IOMMU_NOTIFY, &events,
1396                                     &matrix_mdev->iommu_notifier);
1397        if (ret)
1398                goto err_kvm;
1399        return 0;
1400
1401err_kvm:
1402        vfio_ap_mdev_unset_kvm(matrix_mdev);
1403        return ret;
1404}
1405
1406static void vfio_ap_mdev_close_device(struct vfio_device *vdev)
1407{
1408        struct ap_matrix_mdev *matrix_mdev =
1409                container_of(vdev, struct ap_matrix_mdev, vdev);
1410
1411        vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY,
1412                                 &matrix_mdev->iommu_notifier);
1413        vfio_ap_mdev_unset_kvm(matrix_mdev);
1414}
1415
1416static int vfio_ap_mdev_get_device_info(unsigned long arg)
1417{
1418        unsigned long minsz;
1419        struct vfio_device_info info;
1420
1421        minsz = offsetofend(struct vfio_device_info, num_irqs);
1422
1423        if (copy_from_user(&info, (void __user *)arg, minsz))
1424                return -EFAULT;
1425
1426        if (info.argsz < minsz)
1427                return -EINVAL;
1428
1429        info.flags = VFIO_DEVICE_FLAGS_AP | VFIO_DEVICE_FLAGS_RESET;
1430        info.num_regions = 0;
1431        info.num_irqs = 0;
1432
1433        return copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0;
1434}
1435
1436static ssize_t vfio_ap_mdev_ioctl(struct vfio_device *vdev,
1437                                    unsigned int cmd, unsigned long arg)
1438{
1439        struct ap_matrix_mdev *matrix_mdev =
1440                container_of(vdev, struct ap_matrix_mdev, vdev);
1441        int ret;
1442
1443        mutex_lock(&matrix_dev->lock);
1444        switch (cmd) {
1445        case VFIO_DEVICE_GET_INFO:
1446                ret = vfio_ap_mdev_get_device_info(arg);
1447                break;
1448        case VFIO_DEVICE_RESET:
1449                ret = vfio_ap_mdev_reset_queues(matrix_mdev);
1450                break;
1451        default:
1452                ret = -EOPNOTSUPP;
1453                break;
1454        }
1455        mutex_unlock(&matrix_dev->lock);
1456
1457        return ret;
1458}
1459
1460static const struct vfio_device_ops vfio_ap_matrix_dev_ops = {
1461        .open_device = vfio_ap_mdev_open_device,
1462        .close_device = vfio_ap_mdev_close_device,
1463        .ioctl = vfio_ap_mdev_ioctl,
1464};
1465
1466static struct mdev_driver vfio_ap_matrix_driver = {
1467        .driver = {
1468                .name = "vfio_ap_mdev",
1469                .owner = THIS_MODULE,
1470                .mod_name = KBUILD_MODNAME,
1471                .dev_groups = vfio_ap_mdev_attr_groups,
1472        },
1473        .probe = vfio_ap_mdev_probe,
1474        .remove = vfio_ap_mdev_remove,
1475        .supported_type_groups = vfio_ap_mdev_type_groups,
1476};
1477
1478int vfio_ap_mdev_register(void)
1479{
1480        int ret;
1481
1482        atomic_set(&matrix_dev->available_instances, MAX_ZDEV_ENTRIES_EXT);
1483
1484        ret = mdev_register_driver(&vfio_ap_matrix_driver);
1485        if (ret)
1486                return ret;
1487
1488        ret = mdev_register_device(&matrix_dev->device, &vfio_ap_matrix_driver);
1489        if (ret)
1490                goto err_driver;
1491        return 0;
1492
1493err_driver:
1494        mdev_unregister_driver(&vfio_ap_matrix_driver);
1495        return ret;
1496}
1497
1498void vfio_ap_mdev_unregister(void)
1499{
1500        mdev_unregister_device(&matrix_dev->device);
1501        mdev_unregister_driver(&vfio_ap_matrix_driver);
1502}
1503