linux/drivers/virt/nitro_enclaves/ne_pci_dev.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
   4 */
   5
   6/**
   7 * DOC: Nitro Enclaves (NE) PCI device driver.
   8 */
   9
  10#include <linux/delay.h>
  11#include <linux/device.h>
  12#include <linux/list.h>
  13#include <linux/module.h>
  14#include <linux/mutex.h>
  15#include <linux/nitro_enclaves.h>
  16#include <linux/pci.h>
  17#include <linux/types.h>
  18#include <linux/wait.h>
  19
  20#include "ne_misc_dev.h"
  21#include "ne_pci_dev.h"
  22
  23/**
  24 * NE_DEFAULT_TIMEOUT_MSECS - Default timeout to wait for a reply from
  25 *                            the NE PCI device.
  26 */
  27#define NE_DEFAULT_TIMEOUT_MSECS        (120000) /* 120 sec */
  28
  29static const struct pci_device_id ne_pci_ids[] = {
  30        { PCI_DEVICE(PCI_VENDOR_ID_AMAZON, PCI_DEVICE_ID_NE) },
  31        { 0, }
  32};
  33
  34MODULE_DEVICE_TABLE(pci, ne_pci_ids);
  35
  36/**
  37 * ne_submit_request() - Submit command request to the PCI device based on the
  38 *                       command type.
  39 * @pdev:               PCI device to send the command to.
  40 * @cmd_type:           Command type of the request sent to the PCI device.
  41 * @cmd_request:        Command request payload.
  42 * @cmd_request_size:   Size of the command request payload.
  43 *
  44 * Context: Process context. This function is called with the ne_pci_dev mutex held.
  45 */
  46static void ne_submit_request(struct pci_dev *pdev, enum ne_pci_dev_cmd_type cmd_type,
  47                              void *cmd_request, size_t cmd_request_size)
  48{
  49        struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
  50
  51        memcpy_toio(ne_pci_dev->iomem_base + NE_SEND_DATA, cmd_request, cmd_request_size);
  52
  53        iowrite32(cmd_type, ne_pci_dev->iomem_base + NE_COMMAND);
  54}
  55
  56/**
  57 * ne_retrieve_reply() - Retrieve reply from the PCI device.
  58 * @pdev:               PCI device to receive the reply from.
  59 * @cmd_reply:          Command reply payload.
  60 * @cmd_reply_size:     Size of the command reply payload.
  61 *
  62 * Context: Process context. This function is called with the ne_pci_dev mutex held.
  63 */
  64static void ne_retrieve_reply(struct pci_dev *pdev, struct ne_pci_dev_cmd_reply *cmd_reply,
  65                              size_t cmd_reply_size)
  66{
  67        struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
  68
  69        memcpy_fromio(cmd_reply, ne_pci_dev->iomem_base + NE_RECV_DATA, cmd_reply_size);
  70}
  71
  72/**
  73 * ne_wait_for_reply() - Wait for a reply of a PCI device command.
  74 * @pdev:       PCI device for which a reply is waited.
  75 *
  76 * Context: Process context. This function is called with the ne_pci_dev mutex held.
  77 * Return:
  78 * * 0 on success.
  79 * * Negative return value on failure.
  80 */
  81static int ne_wait_for_reply(struct pci_dev *pdev)
  82{
  83        struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
  84        int rc = -EINVAL;
  85
  86        /*
  87         * TODO: Update to _interruptible and handle interrupted wait event
  88         * e.g. -ERESTARTSYS, incoming signals + update timeout, if needed.
  89         */
  90        rc = wait_event_timeout(ne_pci_dev->cmd_reply_wait_q,
  91                                atomic_read(&ne_pci_dev->cmd_reply_avail) != 0,
  92                                msecs_to_jiffies(NE_DEFAULT_TIMEOUT_MSECS));
  93        if (!rc)
  94                return -ETIMEDOUT;
  95
  96        return 0;
  97}
  98
  99int ne_do_request(struct pci_dev *pdev, enum ne_pci_dev_cmd_type cmd_type,
 100                  void *cmd_request, size_t cmd_request_size,
 101                  struct ne_pci_dev_cmd_reply *cmd_reply, size_t cmd_reply_size)
 102{
 103        struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
 104        int rc = -EINVAL;
 105
 106        if (cmd_type <= INVALID_CMD || cmd_type >= MAX_CMD) {
 107                dev_err_ratelimited(&pdev->dev, "Invalid cmd type=%u\n", cmd_type);
 108
 109                return -EINVAL;
 110        }
 111
 112        if (!cmd_request) {
 113                dev_err_ratelimited(&pdev->dev, "Null cmd request for cmd type=%u\n",
 114                                    cmd_type);
 115
 116                return -EINVAL;
 117        }
 118
 119        if (cmd_request_size > NE_SEND_DATA_SIZE) {
 120                dev_err_ratelimited(&pdev->dev, "Invalid req size=%zu for cmd type=%u\n",
 121                                    cmd_request_size, cmd_type);
 122
 123                return -EINVAL;
 124        }
 125
 126        if (!cmd_reply) {
 127                dev_err_ratelimited(&pdev->dev, "Null cmd reply for cmd type=%u\n",
 128                                    cmd_type);
 129
 130                return -EINVAL;
 131        }
 132
 133        if (cmd_reply_size > NE_RECV_DATA_SIZE) {
 134                dev_err_ratelimited(&pdev->dev, "Invalid reply size=%zu for cmd type=%u\n",
 135                                    cmd_reply_size, cmd_type);
 136
 137                return -EINVAL;
 138        }
 139
 140        /*
 141         * Use this mutex so that the PCI device handles one command request at
 142         * a time.
 143         */
 144        mutex_lock(&ne_pci_dev->pci_dev_mutex);
 145
 146        atomic_set(&ne_pci_dev->cmd_reply_avail, 0);
 147
 148        ne_submit_request(pdev, cmd_type, cmd_request, cmd_request_size);
 149
 150        rc = ne_wait_for_reply(pdev);
 151        if (rc < 0) {
 152                dev_err_ratelimited(&pdev->dev, "Error in wait for reply for cmd type=%u [rc=%d]\n",
 153                                    cmd_type, rc);
 154
 155                goto unlock_mutex;
 156        }
 157
 158        ne_retrieve_reply(pdev, cmd_reply, cmd_reply_size);
 159
 160        atomic_set(&ne_pci_dev->cmd_reply_avail, 0);
 161
 162        if (cmd_reply->rc < 0) {
 163                rc = cmd_reply->rc;
 164
 165                dev_err_ratelimited(&pdev->dev, "Error in cmd process logic, cmd type=%u [rc=%d]\n",
 166                                    cmd_type, rc);
 167
 168                goto unlock_mutex;
 169        }
 170
 171        rc = 0;
 172
 173unlock_mutex:
 174        mutex_unlock(&ne_pci_dev->pci_dev_mutex);
 175
 176        return rc;
 177}
 178
 179/**
 180 * ne_reply_handler() - Interrupt handler for retrieving a reply matching a
 181 *                      request sent to the PCI device for enclave lifetime
 182 *                      management.
 183 * @irq:        Received interrupt for a reply sent by the PCI device.
 184 * @args:       PCI device private data structure.
 185 *
 186 * Context: Interrupt context.
 187 * Return:
 188 * * IRQ_HANDLED on handled interrupt.
 189 */
 190static irqreturn_t ne_reply_handler(int irq, void *args)
 191{
 192        struct ne_pci_dev *ne_pci_dev = (struct ne_pci_dev *)args;
 193
 194        atomic_set(&ne_pci_dev->cmd_reply_avail, 1);
 195
 196        /* TODO: Update to _interruptible. */
 197        wake_up(&ne_pci_dev->cmd_reply_wait_q);
 198
 199        return IRQ_HANDLED;
 200}
 201
 202/**
 203 * ne_event_work_handler() - Work queue handler for notifying enclaves on a
 204 *                           state change received by the event interrupt
 205 *                           handler.
 206 * @work:       Item containing the NE PCI device for which an out-of-band event
 207 *              was issued.
 208 *
 209 * An out-of-band event is being issued by the Nitro Hypervisor when at least
 210 * one enclave is changing state without client interaction.
 211 *
 212 * Context: Work queue context.
 213 */
 214static void ne_event_work_handler(struct work_struct *work)
 215{
 216        struct ne_pci_dev_cmd_reply cmd_reply = {};
 217        struct ne_enclave *ne_enclave = NULL;
 218        struct ne_pci_dev *ne_pci_dev =
 219                container_of(work, struct ne_pci_dev, notify_work);
 220        struct pci_dev *pdev = ne_pci_dev->pdev;
 221        int rc = -EINVAL;
 222        struct slot_info_req slot_info_req = {};
 223
 224        mutex_lock(&ne_pci_dev->enclaves_list_mutex);
 225
 226        /*
 227         * Iterate over all enclaves registered for the Nitro Enclaves
 228         * PCI device and determine for which enclave(s) the out-of-band event
 229         * is corresponding to.
 230         */
 231        list_for_each_entry(ne_enclave, &ne_pci_dev->enclaves_list, enclave_list_entry) {
 232                mutex_lock(&ne_enclave->enclave_info_mutex);
 233
 234                /*
 235                 * Enclaves that were never started cannot receive out-of-band
 236                 * events.
 237                 */
 238                if (ne_enclave->state != NE_STATE_RUNNING)
 239                        goto unlock;
 240
 241                slot_info_req.slot_uid = ne_enclave->slot_uid;
 242
 243                rc = ne_do_request(pdev, SLOT_INFO,
 244                                   &slot_info_req, sizeof(slot_info_req),
 245                                   &cmd_reply, sizeof(cmd_reply));
 246                if (rc < 0)
 247                        dev_err(&pdev->dev, "Error in slot info [rc=%d]\n", rc);
 248
 249                /* Notify enclave process that the enclave state changed. */
 250                if (ne_enclave->state != cmd_reply.state) {
 251                        ne_enclave->state = cmd_reply.state;
 252
 253                        ne_enclave->has_event = true;
 254
 255                        wake_up_interruptible(&ne_enclave->eventq);
 256                }
 257
 258unlock:
 259                 mutex_unlock(&ne_enclave->enclave_info_mutex);
 260        }
 261
 262        mutex_unlock(&ne_pci_dev->enclaves_list_mutex);
 263}
 264
 265/**
 266 * ne_event_handler() - Interrupt handler for PCI device out-of-band events.
 267 *                      This interrupt does not supply any data in the MMIO
 268 *                      region. It notifies a change in the state of any of
 269 *                      the launched enclaves.
 270 * @irq:        Received interrupt for an out-of-band event.
 271 * @args:       PCI device private data structure.
 272 *
 273 * Context: Interrupt context.
 274 * Return:
 275 * * IRQ_HANDLED on handled interrupt.
 276 */
 277static irqreturn_t ne_event_handler(int irq, void *args)
 278{
 279        struct ne_pci_dev *ne_pci_dev = (struct ne_pci_dev *)args;
 280
 281        queue_work(ne_pci_dev->event_wq, &ne_pci_dev->notify_work);
 282
 283        return IRQ_HANDLED;
 284}
 285
 286/**
 287 * ne_setup_msix() - Setup MSI-X vectors for the PCI device.
 288 * @pdev:       PCI device to setup the MSI-X for.
 289 *
 290 * Context: Process context.
 291 * Return:
 292 * * 0 on success.
 293 * * Negative return value on failure.
 294 */
 295static int ne_setup_msix(struct pci_dev *pdev)
 296{
 297        struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
 298        int nr_vecs = 0;
 299        int rc = -EINVAL;
 300
 301        nr_vecs = pci_msix_vec_count(pdev);
 302        if (nr_vecs < 0) {
 303                rc = nr_vecs;
 304
 305                dev_err(&pdev->dev, "Error in getting vec count [rc=%d]\n", rc);
 306
 307                return rc;
 308        }
 309
 310        rc = pci_alloc_irq_vectors(pdev, nr_vecs, nr_vecs, PCI_IRQ_MSIX);
 311        if (rc < 0) {
 312                dev_err(&pdev->dev, "Error in alloc MSI-X vecs [rc=%d]\n", rc);
 313
 314                return rc;
 315        }
 316
 317        /*
 318         * This IRQ gets triggered every time the PCI device responds to a
 319         * command request. The reply is then retrieved, reading from the MMIO
 320         * space of the PCI device.
 321         */
 322        rc = request_irq(pci_irq_vector(pdev, NE_VEC_REPLY), ne_reply_handler,
 323                         0, "enclave_cmd", ne_pci_dev);
 324        if (rc < 0) {
 325                dev_err(&pdev->dev, "Error in request irq reply [rc=%d]\n", rc);
 326
 327                goto free_irq_vectors;
 328        }
 329
 330        ne_pci_dev->event_wq = create_singlethread_workqueue("ne_pci_dev_wq");
 331        if (!ne_pci_dev->event_wq) {
 332                rc = -ENOMEM;
 333
 334                dev_err(&pdev->dev, "Cannot get wq for dev events [rc=%d]\n", rc);
 335
 336                goto free_reply_irq_vec;
 337        }
 338
 339        INIT_WORK(&ne_pci_dev->notify_work, ne_event_work_handler);
 340
 341        /*
 342         * This IRQ gets triggered every time any enclave's state changes. Its
 343         * handler then scans for the changes and propagates them to the user
 344         * space.
 345         */
 346        rc = request_irq(pci_irq_vector(pdev, NE_VEC_EVENT), ne_event_handler,
 347                         0, "enclave_evt", ne_pci_dev);
 348        if (rc < 0) {
 349                dev_err(&pdev->dev, "Error in request irq event [rc=%d]\n", rc);
 350
 351                goto destroy_wq;
 352        }
 353
 354        return 0;
 355
 356destroy_wq:
 357        destroy_workqueue(ne_pci_dev->event_wq);
 358free_reply_irq_vec:
 359        free_irq(pci_irq_vector(pdev, NE_VEC_REPLY), ne_pci_dev);
 360free_irq_vectors:
 361        pci_free_irq_vectors(pdev);
 362
 363        return rc;
 364}
 365
 366/**
 367 * ne_teardown_msix() - Teardown MSI-X vectors for the PCI device.
 368 * @pdev:       PCI device to teardown the MSI-X for.
 369 *
 370 * Context: Process context.
 371 */
 372static void ne_teardown_msix(struct pci_dev *pdev)
 373{
 374        struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
 375
 376        free_irq(pci_irq_vector(pdev, NE_VEC_EVENT), ne_pci_dev);
 377
 378        flush_work(&ne_pci_dev->notify_work);
 379        flush_workqueue(ne_pci_dev->event_wq);
 380        destroy_workqueue(ne_pci_dev->event_wq);
 381
 382        free_irq(pci_irq_vector(pdev, NE_VEC_REPLY), ne_pci_dev);
 383
 384        pci_free_irq_vectors(pdev);
 385}
 386
 387/**
 388 * ne_pci_dev_enable() - Select the PCI device version and enable it.
 389 * @pdev:       PCI device to select version for and then enable.
 390 *
 391 * Context: Process context.
 392 * Return:
 393 * * 0 on success.
 394 * * Negative return value on failure.
 395 */
 396static int ne_pci_dev_enable(struct pci_dev *pdev)
 397{
 398        u8 dev_enable_reply = 0;
 399        u16 dev_version_reply = 0;
 400        struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
 401
 402        iowrite16(NE_VERSION_MAX, ne_pci_dev->iomem_base + NE_VERSION);
 403
 404        dev_version_reply = ioread16(ne_pci_dev->iomem_base + NE_VERSION);
 405        if (dev_version_reply != NE_VERSION_MAX) {
 406                dev_err(&pdev->dev, "Error in pci dev version cmd\n");
 407
 408                return -EIO;
 409        }
 410
 411        iowrite8(NE_ENABLE_ON, ne_pci_dev->iomem_base + NE_ENABLE);
 412
 413        dev_enable_reply = ioread8(ne_pci_dev->iomem_base + NE_ENABLE);
 414        if (dev_enable_reply != NE_ENABLE_ON) {
 415                dev_err(&pdev->dev, "Error in pci dev enable cmd\n");
 416
 417                return -EIO;
 418        }
 419
 420        return 0;
 421}
 422
 423/**
 424 * ne_pci_dev_disable() - Disable the PCI device.
 425 * @pdev:       PCI device to disable.
 426 *
 427 * Context: Process context.
 428 */
 429static void ne_pci_dev_disable(struct pci_dev *pdev)
 430{
 431        u8 dev_disable_reply = 0;
 432        struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
 433        const unsigned int sleep_time = 10; /* 10 ms */
 434        unsigned int sleep_time_count = 0;
 435
 436        iowrite8(NE_ENABLE_OFF, ne_pci_dev->iomem_base + NE_ENABLE);
 437
 438        /*
 439         * Check for NE_ENABLE_OFF in a loop, to handle cases when the device
 440         * state is not immediately set to disabled and going through a
 441         * transitory state of disabling.
 442         */
 443        while (sleep_time_count < NE_DEFAULT_TIMEOUT_MSECS) {
 444                dev_disable_reply = ioread8(ne_pci_dev->iomem_base + NE_ENABLE);
 445                if (dev_disable_reply == NE_ENABLE_OFF)
 446                        return;
 447
 448                msleep_interruptible(sleep_time);
 449                sleep_time_count += sleep_time;
 450        }
 451
 452        dev_disable_reply = ioread8(ne_pci_dev->iomem_base + NE_ENABLE);
 453        if (dev_disable_reply != NE_ENABLE_OFF)
 454                dev_err(&pdev->dev, "Error in pci dev disable cmd\n");
 455}
 456
 457/**
 458 * ne_pci_probe() - Probe function for the NE PCI device.
 459 * @pdev:       PCI device to match with the NE PCI driver.
 460 * @id :        PCI device id table associated with the NE PCI driver.
 461 *
 462 * Context: Process context.
 463 * Return:
 464 * * 0 on success.
 465 * * Negative return value on failure.
 466 */
 467static int ne_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 468{
 469        struct ne_pci_dev *ne_pci_dev = NULL;
 470        int rc = -EINVAL;
 471
 472        ne_pci_dev = kzalloc(sizeof(*ne_pci_dev), GFP_KERNEL);
 473        if (!ne_pci_dev)
 474                return -ENOMEM;
 475
 476        rc = pci_enable_device(pdev);
 477        if (rc < 0) {
 478                dev_err(&pdev->dev, "Error in pci dev enable [rc=%d]\n", rc);
 479
 480                goto free_ne_pci_dev;
 481        }
 482
 483        pci_set_master(pdev);
 484
 485        rc = pci_request_regions_exclusive(pdev, "nitro_enclaves");
 486        if (rc < 0) {
 487                dev_err(&pdev->dev, "Error in pci request regions [rc=%d]\n", rc);
 488
 489                goto disable_pci_dev;
 490        }
 491
 492        ne_pci_dev->iomem_base = pci_iomap(pdev, PCI_BAR_NE, 0);
 493        if (!ne_pci_dev->iomem_base) {
 494                rc = -ENOMEM;
 495
 496                dev_err(&pdev->dev, "Error in pci iomap [rc=%d]\n", rc);
 497
 498                goto release_pci_regions;
 499        }
 500
 501        pci_set_drvdata(pdev, ne_pci_dev);
 502
 503        rc = ne_setup_msix(pdev);
 504        if (rc < 0) {
 505                dev_err(&pdev->dev, "Error in pci dev msix setup [rc=%d]\n", rc);
 506
 507                goto iounmap_pci_bar;
 508        }
 509
 510        ne_pci_dev_disable(pdev);
 511
 512        rc = ne_pci_dev_enable(pdev);
 513        if (rc < 0) {
 514                dev_err(&pdev->dev, "Error in ne_pci_dev enable [rc=%d]\n", rc);
 515
 516                goto teardown_msix;
 517        }
 518
 519        atomic_set(&ne_pci_dev->cmd_reply_avail, 0);
 520        init_waitqueue_head(&ne_pci_dev->cmd_reply_wait_q);
 521        INIT_LIST_HEAD(&ne_pci_dev->enclaves_list);
 522        mutex_init(&ne_pci_dev->enclaves_list_mutex);
 523        mutex_init(&ne_pci_dev->pci_dev_mutex);
 524        ne_pci_dev->pdev = pdev;
 525
 526        ne_devs.ne_pci_dev = ne_pci_dev;
 527
 528        rc = misc_register(ne_devs.ne_misc_dev);
 529        if (rc < 0) {
 530                dev_err(&pdev->dev, "Error in misc dev register [rc=%d]\n", rc);
 531
 532                goto disable_ne_pci_dev;
 533        }
 534
 535        return 0;
 536
 537disable_ne_pci_dev:
 538        ne_devs.ne_pci_dev = NULL;
 539        ne_pci_dev_disable(pdev);
 540teardown_msix:
 541        ne_teardown_msix(pdev);
 542iounmap_pci_bar:
 543        pci_set_drvdata(pdev, NULL);
 544        pci_iounmap(pdev, ne_pci_dev->iomem_base);
 545release_pci_regions:
 546        pci_release_regions(pdev);
 547disable_pci_dev:
 548        pci_disable_device(pdev);
 549free_ne_pci_dev:
 550        kfree(ne_pci_dev);
 551
 552        return rc;
 553}
 554
 555/**
 556 * ne_pci_remove() - Remove function for the NE PCI device.
 557 * @pdev:       PCI device associated with the NE PCI driver.
 558 *
 559 * Context: Process context.
 560 */
 561static void ne_pci_remove(struct pci_dev *pdev)
 562{
 563        struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
 564
 565        misc_deregister(ne_devs.ne_misc_dev);
 566
 567        ne_devs.ne_pci_dev = NULL;
 568
 569        ne_pci_dev_disable(pdev);
 570
 571        ne_teardown_msix(pdev);
 572
 573        pci_set_drvdata(pdev, NULL);
 574
 575        pci_iounmap(pdev, ne_pci_dev->iomem_base);
 576
 577        pci_release_regions(pdev);
 578
 579        pci_disable_device(pdev);
 580
 581        kfree(ne_pci_dev);
 582}
 583
 584/**
 585 * ne_pci_shutdown() - Shutdown function for the NE PCI device.
 586 * @pdev:       PCI device associated with the NE PCI driver.
 587 *
 588 * Context: Process context.
 589 */
 590static void ne_pci_shutdown(struct pci_dev *pdev)
 591{
 592        struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
 593
 594        if (!ne_pci_dev)
 595                return;
 596
 597        misc_deregister(ne_devs.ne_misc_dev);
 598
 599        ne_devs.ne_pci_dev = NULL;
 600
 601        ne_pci_dev_disable(pdev);
 602
 603        ne_teardown_msix(pdev);
 604
 605        pci_set_drvdata(pdev, NULL);
 606
 607        pci_iounmap(pdev, ne_pci_dev->iomem_base);
 608
 609        pci_release_regions(pdev);
 610
 611        pci_disable_device(pdev);
 612
 613        kfree(ne_pci_dev);
 614}
 615
 616/*
 617 * TODO: Add suspend / resume functions for power management w/ CONFIG_PM, if
 618 * needed.
 619 */
 620/* NE PCI device driver. */
 621struct pci_driver ne_pci_driver = {
 622        .name           = "nitro_enclaves",
 623        .id_table       = ne_pci_ids,
 624        .probe          = ne_pci_probe,
 625        .remove         = ne_pci_remove,
 626        .shutdown       = ne_pci_shutdown,
 627};
 628