linux/drivers/virt/nitro_enclaves/ne_pci_dev.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
   4 */
   5
   6/**
   7 * DOC: Nitro Enclaves (NE) PCI device driver.
   8 */
   9
  10#include <linux/delay.h>
  11#include <linux/device.h>
  12#include <linux/list.h>
  13#include <linux/module.h>
  14#include <linux/mutex.h>
  15#include <linux/nitro_enclaves.h>
  16#include <linux/pci.h>
  17#include <linux/types.h>
  18#include <linux/wait.h>
  19
  20#include "ne_misc_dev.h"
  21#include "ne_pci_dev.h"
  22
  23/**
  24 * NE_DEFAULT_TIMEOUT_MSECS - Default timeout to wait for a reply from
  25 *                            the NE PCI device.
  26 */
  27#define NE_DEFAULT_TIMEOUT_MSECS        (120000) /* 120 sec */
  28
  29static const struct pci_device_id ne_pci_ids[] = {
  30        { PCI_DEVICE(PCI_VENDOR_ID_AMAZON, PCI_DEVICE_ID_NE) },
  31        { 0, }
  32};
  33
  34MODULE_DEVICE_TABLE(pci, ne_pci_ids);
  35
  36/**
  37 * ne_submit_request() - Submit command request to the PCI device based on the
  38 *                       command type.
  39 * @pdev:               PCI device to send the command to.
  40 * @cmd_type:           Command type of the request sent to the PCI device.
  41 * @cmd_request:        Command request payload.
  42 * @cmd_request_size:   Size of the command request payload.
  43 *
  44 * Context: Process context. This function is called with the ne_pci_dev mutex held.
  45 */
  46static void ne_submit_request(struct pci_dev *pdev, enum ne_pci_dev_cmd_type cmd_type,
  47                              void *cmd_request, size_t cmd_request_size)
  48{
  49        struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
  50
  51        memcpy_toio(ne_pci_dev->iomem_base + NE_SEND_DATA, cmd_request, cmd_request_size);
  52
  53        iowrite32(cmd_type, ne_pci_dev->iomem_base + NE_COMMAND);
  54}
  55
  56/**
  57 * ne_retrieve_reply() - Retrieve reply from the PCI device.
  58 * @pdev:               PCI device to receive the reply from.
  59 * @cmd_reply:          Command reply payload.
  60 * @cmd_reply_size:     Size of the command reply payload.
  61 *
  62 * Context: Process context. This function is called with the ne_pci_dev mutex held.
  63 */
  64static void ne_retrieve_reply(struct pci_dev *pdev, struct ne_pci_dev_cmd_reply *cmd_reply,
  65                              size_t cmd_reply_size)
  66{
  67        struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
  68
  69        memcpy_fromio(cmd_reply, ne_pci_dev->iomem_base + NE_RECV_DATA, cmd_reply_size);
  70}
  71
  72/**
  73 * ne_wait_for_reply() - Wait for a reply of a PCI device command.
  74 * @pdev:       PCI device for which a reply is waited.
  75 *
  76 * Context: Process context. This function is called with the ne_pci_dev mutex held.
  77 * Return:
  78 * * 0 on success.
  79 * * Negative return value on failure.
  80 */
  81static int ne_wait_for_reply(struct pci_dev *pdev)
  82{
  83        struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
  84        int rc = -EINVAL;
  85
  86        /*
  87         * TODO: Update to _interruptible and handle interrupted wait event
  88         * e.g. -ERESTARTSYS, incoming signals + update timeout, if needed.
  89         */
  90        rc = wait_event_timeout(ne_pci_dev->cmd_reply_wait_q,
  91                                atomic_read(&ne_pci_dev->cmd_reply_avail) != 0,
  92                                msecs_to_jiffies(NE_DEFAULT_TIMEOUT_MSECS));
  93        if (!rc)
  94                return -ETIMEDOUT;
  95
  96        return 0;
  97}
  98
  99int ne_do_request(struct pci_dev *pdev, enum ne_pci_dev_cmd_type cmd_type,
 100                  void *cmd_request, size_t cmd_request_size,
 101                  struct ne_pci_dev_cmd_reply *cmd_reply, size_t cmd_reply_size)
 102{
 103        struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
 104        int rc = -EINVAL;
 105
 106        if (cmd_type <= INVALID_CMD || cmd_type >= MAX_CMD) {
 107                dev_err_ratelimited(&pdev->dev, "Invalid cmd type=%u\n", cmd_type);
 108
 109                return -EINVAL;
 110        }
 111
 112        if (!cmd_request) {
 113                dev_err_ratelimited(&pdev->dev, "Null cmd request for cmd type=%u\n",
 114                                    cmd_type);
 115
 116                return -EINVAL;
 117        }
 118
 119        if (cmd_request_size > NE_SEND_DATA_SIZE) {
 120                dev_err_ratelimited(&pdev->dev, "Invalid req size=%zu for cmd type=%u\n",
 121                                    cmd_request_size, cmd_type);
 122
 123                return -EINVAL;
 124        }
 125
 126        if (!cmd_reply) {
 127                dev_err_ratelimited(&pdev->dev, "Null cmd reply for cmd type=%u\n",
 128                                    cmd_type);
 129
 130                return -EINVAL;
 131        }
 132
 133        if (cmd_reply_size > NE_RECV_DATA_SIZE) {
 134                dev_err_ratelimited(&pdev->dev, "Invalid reply size=%zu for cmd type=%u\n",
 135                                    cmd_reply_size, cmd_type);
 136
 137                return -EINVAL;
 138        }
 139
 140        /*
 141         * Use this mutex so that the PCI device handles one command request at
 142         * a time.
 143         */
 144        mutex_lock(&ne_pci_dev->pci_dev_mutex);
 145
 146        atomic_set(&ne_pci_dev->cmd_reply_avail, 0);
 147
 148        ne_submit_request(pdev, cmd_type, cmd_request, cmd_request_size);
 149
 150        rc = ne_wait_for_reply(pdev);
 151        if (rc < 0) {
 152                dev_err_ratelimited(&pdev->dev, "Error in wait for reply for cmd type=%u [rc=%d]\n",
 153                                    cmd_type, rc);
 154
 155                goto unlock_mutex;
 156        }
 157
 158        ne_retrieve_reply(pdev, cmd_reply, cmd_reply_size);
 159
 160        atomic_set(&ne_pci_dev->cmd_reply_avail, 0);
 161
 162        if (cmd_reply->rc < 0) {
 163                rc = cmd_reply->rc;
 164
 165                dev_err_ratelimited(&pdev->dev, "Error in cmd process logic, cmd type=%u [rc=%d]\n",
 166                                    cmd_type, rc);
 167
 168                goto unlock_mutex;
 169        }
 170
 171        rc = 0;
 172
 173unlock_mutex:
 174        mutex_unlock(&ne_pci_dev->pci_dev_mutex);
 175
 176        return rc;
 177}
 178
 179/**
 180 * ne_reply_handler() - Interrupt handler for retrieving a reply matching a
 181 *                      request sent to the PCI device for enclave lifetime
 182 *                      management.
 183 * @irq:        Received interrupt for a reply sent by the PCI device.
 184 * @args:       PCI device private data structure.
 185 *
 186 * Context: Interrupt context.
 187 * Return:
 188 * * IRQ_HANDLED on handled interrupt.
 189 */
 190static irqreturn_t ne_reply_handler(int irq, void *args)
 191{
 192        struct ne_pci_dev *ne_pci_dev = (struct ne_pci_dev *)args;
 193
 194        atomic_set(&ne_pci_dev->cmd_reply_avail, 1);
 195
 196        /* TODO: Update to _interruptible. */
 197        wake_up(&ne_pci_dev->cmd_reply_wait_q);
 198
 199        return IRQ_HANDLED;
 200}
 201
 202/**
 203 * ne_event_work_handler() - Work queue handler for notifying enclaves on a
 204 *                           state change received by the event interrupt
 205 *                           handler.
 206 * @work:       Item containing the NE PCI device for which an out-of-band event
 207 *              was issued.
 208 *
 209 * An out-of-band event is being issued by the Nitro Hypervisor when at least
 210 * one enclave is changing state without client interaction.
 211 *
 212 * Context: Work queue context.
 213 */
 214static void ne_event_work_handler(struct work_struct *work)
 215{
 216        struct ne_pci_dev_cmd_reply cmd_reply = {};
 217        struct ne_enclave *ne_enclave = NULL;
 218        struct ne_pci_dev *ne_pci_dev =
 219                container_of(work, struct ne_pci_dev, notify_work);
 220        struct pci_dev *pdev = ne_pci_dev->pdev;
 221        int rc = -EINVAL;
 222        struct slot_info_req slot_info_req = {};
 223
 224        mutex_lock(&ne_pci_dev->enclaves_list_mutex);
 225
 226        /*
 227         * Iterate over all enclaves registered for the Nitro Enclaves
 228         * PCI device and determine for which enclave(s) the out-of-band event
 229         * is corresponding to.
 230         */
 231        list_for_each_entry(ne_enclave, &ne_pci_dev->enclaves_list, enclave_list_entry) {
 232                mutex_lock(&ne_enclave->enclave_info_mutex);
 233
 234                /*
 235                 * Enclaves that were never started cannot receive out-of-band
 236                 * events.
 237                 */
 238                if (ne_enclave->state != NE_STATE_RUNNING)
 239                        goto unlock;
 240
 241                slot_info_req.slot_uid = ne_enclave->slot_uid;
 242
 243                rc = ne_do_request(pdev, SLOT_INFO,
 244                                   &slot_info_req, sizeof(slot_info_req),
 245                                   &cmd_reply, sizeof(cmd_reply));
 246                if (rc < 0)
 247                        dev_err(&pdev->dev, "Error in slot info [rc=%d]\n", rc);
 248
 249                /* Notify enclave process that the enclave state changed. */
 250                if (ne_enclave->state != cmd_reply.state) {
 251                        ne_enclave->state = cmd_reply.state;
 252
 253                        ne_enclave->has_event = true;
 254
 255                        wake_up_interruptible(&ne_enclave->eventq);
 256                }
 257
 258unlock:
 259                 mutex_unlock(&ne_enclave->enclave_info_mutex);
 260        }
 261
 262        mutex_unlock(&ne_pci_dev->enclaves_list_mutex);
 263}
 264
 265/**
 266 * ne_event_handler() - Interrupt handler for PCI device out-of-band events.
 267 *                      This interrupt does not supply any data in the MMIO
 268 *                      region. It notifies a change in the state of any of
 269 *                      the launched enclaves.
 270 * @irq:        Received interrupt for an out-of-band event.
 271 * @args:       PCI device private data structure.
 272 *
 273 * Context: Interrupt context.
 274 * Return:
 275 * * IRQ_HANDLED on handled interrupt.
 276 */
 277static irqreturn_t ne_event_handler(int irq, void *args)
 278{
 279        struct ne_pci_dev *ne_pci_dev = (struct ne_pci_dev *)args;
 280
 281        queue_work(ne_pci_dev->event_wq, &ne_pci_dev->notify_work);
 282
 283        return IRQ_HANDLED;
 284}
 285
 286/**
 287 * ne_setup_msix() - Setup MSI-X vectors for the PCI device.
 288 * @pdev:       PCI device to setup the MSI-X for.
 289 *
 290 * Context: Process context.
 291 * Return:
 292 * * 0 on success.
 293 * * Negative return value on failure.
 294 */
 295static int ne_setup_msix(struct pci_dev *pdev)
 296{
 297        struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
 298        int nr_vecs = 0;
 299        int rc = -EINVAL;
 300
 301        nr_vecs = pci_msix_vec_count(pdev);
 302        if (nr_vecs < 0) {
 303                rc = nr_vecs;
 304
 305                dev_err(&pdev->dev, "Error in getting vec count [rc=%d]\n", rc);
 306
 307                return rc;
 308        }
 309
 310        rc = pci_alloc_irq_vectors(pdev, nr_vecs, nr_vecs, PCI_IRQ_MSIX);
 311        if (rc < 0) {
 312                dev_err(&pdev->dev, "Error in alloc MSI-X vecs [rc=%d]\n", rc);
 313
 314                return rc;
 315        }
 316
 317        /*
 318         * This IRQ gets triggered every time the PCI device responds to a
 319         * command request. The reply is then retrieved, reading from the MMIO
 320         * space of the PCI device.
 321         */
 322        rc = request_irq(pci_irq_vector(pdev, NE_VEC_REPLY), ne_reply_handler,
 323                         0, "enclave_cmd", ne_pci_dev);
 324        if (rc < 0) {
 325                dev_err(&pdev->dev, "Error in request irq reply [rc=%d]\n", rc);
 326
 327                goto free_irq_vectors;
 328        }
 329
 330        ne_pci_dev->event_wq = create_singlethread_workqueue("ne_pci_dev_wq");
 331        if (!ne_pci_dev->event_wq) {
 332                rc = -ENOMEM;
 333
 334                dev_err(&pdev->dev, "Cannot get wq for dev events [rc=%d]\n", rc);
 335
 336                goto free_reply_irq_vec;
 337        }
 338
 339        INIT_WORK(&ne_pci_dev->notify_work, ne_event_work_handler);
 340
 341        /*
 342         * This IRQ gets triggered every time any enclave's state changes. Its
 343         * handler then scans for the changes and propagates them to the user
 344         * space.
 345         */
 346        rc = request_irq(pci_irq_vector(pdev, NE_VEC_EVENT), ne_event_handler,
 347                         0, "enclave_evt", ne_pci_dev);
 348        if (rc < 0) {
 349                dev_err(&pdev->dev, "Error in request irq event [rc=%d]\n", rc);
 350
 351                goto destroy_wq;
 352        }
 353
 354        return 0;
 355
 356destroy_wq:
 357        destroy_workqueue(ne_pci_dev->event_wq);
 358free_reply_irq_vec:
 359        free_irq(pci_irq_vector(pdev, NE_VEC_REPLY), ne_pci_dev);
 360free_irq_vectors:
 361        pci_free_irq_vectors(pdev);
 362
 363        return rc;
 364}
 365
 366/**
 367 * ne_teardown_msix() - Teardown MSI-X vectors for the PCI device.
 368 * @pdev:       PCI device to teardown the MSI-X for.
 369 *
 370 * Context: Process context.
 371 */
 372static void ne_teardown_msix(struct pci_dev *pdev)
 373{
 374        struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
 375
 376        free_irq(pci_irq_vector(pdev, NE_VEC_EVENT), ne_pci_dev);
 377
 378        flush_work(&ne_pci_dev->notify_work);
 379        flush_workqueue(ne_pci_dev->event_wq);
 380        destroy_workqueue(ne_pci_dev->event_wq);
 381
 382        free_irq(pci_irq_vector(pdev, NE_VEC_REPLY), ne_pci_dev);
 383
 384        pci_free_irq_vectors(pdev);
 385}
 386
 387/**
 388 * ne_pci_dev_enable() - Select the PCI device version and enable it.
 389 * @pdev:       PCI device to select version for and then enable.
 390 *
 391 * Context: Process context.
 392 * Return:
 393 * * 0 on success.
 394 * * Negative return value on failure.
 395 */
 396static int ne_pci_dev_enable(struct pci_dev *pdev)
 397{
 398        u8 dev_enable_reply = 0;
 399        u16 dev_version_reply = 0;
 400        struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
 401
 402        iowrite16(NE_VERSION_MAX, ne_pci_dev->iomem_base + NE_VERSION);
 403
 404        dev_version_reply = ioread16(ne_pci_dev->iomem_base + NE_VERSION);
 405        if (dev_version_reply != NE_VERSION_MAX) {
 406                dev_err(&pdev->dev, "Error in pci dev version cmd\n");
 407
 408                return -EIO;
 409        }
 410
 411        iowrite8(NE_ENABLE_ON, ne_pci_dev->iomem_base + NE_ENABLE);
 412
 413        dev_enable_reply = ioread8(ne_pci_dev->iomem_base + NE_ENABLE);
 414        if (dev_enable_reply != NE_ENABLE_ON) {
 415                dev_err(&pdev->dev, "Error in pci dev enable cmd\n");
 416
 417                return -EIO;
 418        }
 419
 420        return 0;
 421}
 422
 423/**
 424 * ne_pci_dev_disable() - Disable the PCI device.
 425 * @pdev:       PCI device to disable.
 426 *
 427 * Context: Process context.
 428 */
 429static void ne_pci_dev_disable(struct pci_dev *pdev)
 430{
 431        u8 dev_disable_reply = 0;
 432        struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
 433        const unsigned int sleep_time = 10; /* 10 ms */
 434        unsigned int sleep_time_count = 0;
 435
 436        iowrite8(NE_ENABLE_OFF, ne_pci_dev->iomem_base + NE_ENABLE);
 437
 438        /*
 439         * Check for NE_ENABLE_OFF in a loop, to handle cases when the device
 440         * state is not immediately set to disabled and going through a
 441         * transitory state of disabling.
 442         */
 443        while (sleep_time_count < NE_DEFAULT_TIMEOUT_MSECS) {
 444                dev_disable_reply = ioread8(ne_pci_dev->iomem_base + NE_ENABLE);
 445                if (dev_disable_reply == NE_ENABLE_OFF)
 446                        return;
 447
 448                msleep_interruptible(sleep_time);
 449                sleep_time_count += sleep_time;
 450        }
 451
 452        dev_disable_reply = ioread8(ne_pci_dev->iomem_base + NE_ENABLE);
 453        if (dev_disable_reply != NE_ENABLE_OFF)
 454                dev_err(&pdev->dev, "Error in pci dev disable cmd\n");
 455}
 456
 457/**
 458 * ne_pci_probe() - Probe function for the NE PCI device.
 459 * @pdev:       PCI device to match with the NE PCI driver.
 460 * @id :        PCI device id table associated with the NE PCI driver.
 461 *
 462 * Context: Process context.
 463 * Return:
 464 * * 0 on success.
 465 * * Negative return value on failure.
 466 */
 467static int ne_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 468{
 469        struct ne_pci_dev *ne_pci_dev = NULL;
 470        int rc = -EINVAL;
 471
 472        ne_pci_dev = kzalloc(sizeof(*ne_pci_dev), GFP_KERNEL);
 473        if (!ne_pci_dev)
 474                return -ENOMEM;
 475
 476        rc = pci_enable_device(pdev);
 477        if (rc < 0) {
 478                dev_err(&pdev->dev, "Error in pci dev enable [rc=%d]\n", rc);
 479
 480                goto free_ne_pci_dev;
 481        }
 482
 483        rc = pci_request_regions_exclusive(pdev, "nitro_enclaves");
 484        if (rc < 0) {
 485                dev_err(&pdev->dev, "Error in pci request regions [rc=%d]\n", rc);
 486
 487                goto disable_pci_dev;
 488        }
 489
 490        ne_pci_dev->iomem_base = pci_iomap(pdev, PCI_BAR_NE, 0);
 491        if (!ne_pci_dev->iomem_base) {
 492                rc = -ENOMEM;
 493
 494                dev_err(&pdev->dev, "Error in pci iomap [rc=%d]\n", rc);
 495
 496                goto release_pci_regions;
 497        }
 498
 499        pci_set_drvdata(pdev, ne_pci_dev);
 500
 501        rc = ne_setup_msix(pdev);
 502        if (rc < 0) {
 503                dev_err(&pdev->dev, "Error in pci dev msix setup [rc=%d]\n", rc);
 504
 505                goto iounmap_pci_bar;
 506        }
 507
 508        ne_pci_dev_disable(pdev);
 509
 510        rc = ne_pci_dev_enable(pdev);
 511        if (rc < 0) {
 512                dev_err(&pdev->dev, "Error in ne_pci_dev enable [rc=%d]\n", rc);
 513
 514                goto teardown_msix;
 515        }
 516
 517        atomic_set(&ne_pci_dev->cmd_reply_avail, 0);
 518        init_waitqueue_head(&ne_pci_dev->cmd_reply_wait_q);
 519        INIT_LIST_HEAD(&ne_pci_dev->enclaves_list);
 520        mutex_init(&ne_pci_dev->enclaves_list_mutex);
 521        mutex_init(&ne_pci_dev->pci_dev_mutex);
 522        ne_pci_dev->pdev = pdev;
 523
 524        ne_devs.ne_pci_dev = ne_pci_dev;
 525
 526        rc = misc_register(ne_devs.ne_misc_dev);
 527        if (rc < 0) {
 528                dev_err(&pdev->dev, "Error in misc dev register [rc=%d]\n", rc);
 529
 530                goto disable_ne_pci_dev;
 531        }
 532
 533        return 0;
 534
 535disable_ne_pci_dev:
 536        ne_devs.ne_pci_dev = NULL;
 537        ne_pci_dev_disable(pdev);
 538teardown_msix:
 539        ne_teardown_msix(pdev);
 540iounmap_pci_bar:
 541        pci_set_drvdata(pdev, NULL);
 542        pci_iounmap(pdev, ne_pci_dev->iomem_base);
 543release_pci_regions:
 544        pci_release_regions(pdev);
 545disable_pci_dev:
 546        pci_disable_device(pdev);
 547free_ne_pci_dev:
 548        kfree(ne_pci_dev);
 549
 550        return rc;
 551}
 552
 553/**
 554 * ne_pci_remove() - Remove function for the NE PCI device.
 555 * @pdev:       PCI device associated with the NE PCI driver.
 556 *
 557 * Context: Process context.
 558 */
 559static void ne_pci_remove(struct pci_dev *pdev)
 560{
 561        struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
 562
 563        misc_deregister(ne_devs.ne_misc_dev);
 564
 565        ne_devs.ne_pci_dev = NULL;
 566
 567        ne_pci_dev_disable(pdev);
 568
 569        ne_teardown_msix(pdev);
 570
 571        pci_set_drvdata(pdev, NULL);
 572
 573        pci_iounmap(pdev, ne_pci_dev->iomem_base);
 574
 575        pci_release_regions(pdev);
 576
 577        pci_disable_device(pdev);
 578
 579        kfree(ne_pci_dev);
 580}
 581
 582/**
 583 * ne_pci_shutdown() - Shutdown function for the NE PCI device.
 584 * @pdev:       PCI device associated with the NE PCI driver.
 585 *
 586 * Context: Process context.
 587 */
 588static void ne_pci_shutdown(struct pci_dev *pdev)
 589{
 590        struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
 591
 592        if (!ne_pci_dev)
 593                return;
 594
 595        misc_deregister(ne_devs.ne_misc_dev);
 596
 597        ne_devs.ne_pci_dev = NULL;
 598
 599        ne_pci_dev_disable(pdev);
 600
 601        ne_teardown_msix(pdev);
 602
 603        pci_set_drvdata(pdev, NULL);
 604
 605        pci_iounmap(pdev, ne_pci_dev->iomem_base);
 606
 607        pci_release_regions(pdev);
 608
 609        pci_disable_device(pdev);
 610
 611        kfree(ne_pci_dev);
 612}
 613
 614/*
 615 * TODO: Add suspend / resume functions for power management w/ CONFIG_PM, if
 616 * needed.
 617 */
 618/* NE PCI device driver. */
 619struct pci_driver ne_pci_driver = {
 620        .name           = "nitro_enclaves",
 621        .id_table       = ne_pci_ids,
 622        .probe          = ne_pci_probe,
 623        .remove         = ne_pci_remove,
 624        .shutdown       = ne_pci_shutdown,
 625};
 626