linux/drivers/virt/nitro_enclaves/ne_pci_dev.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Copyright 2020-2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
   4 */
   5
   6/**
   7 * DOC: Nitro Enclaves (NE) PCI device driver.
   8 */
   9
  10#include <linux/delay.h>
  11#include <linux/device.h>
  12#include <linux/list.h>
  13#include <linux/module.h>
  14#include <linux/mutex.h>
  15#include <linux/nitro_enclaves.h>
  16#include <linux/pci.h>
  17#include <linux/types.h>
  18#include <linux/wait.h>
  19
  20#include "ne_misc_dev.h"
  21#include "ne_pci_dev.h"
  22
  23/**
  24 * NE_DEFAULT_TIMEOUT_MSECS - Default timeout to wait for a reply from
  25 *                            the NE PCI device.
  26 */
  27#define NE_DEFAULT_TIMEOUT_MSECS        (120000) /* 120 sec */
  28
  29static const struct pci_device_id ne_pci_ids[] = {
  30        { PCI_DEVICE(PCI_VENDOR_ID_AMAZON, PCI_DEVICE_ID_NE) },
  31        { 0, }
  32};
  33
  34MODULE_DEVICE_TABLE(pci, ne_pci_ids);
  35
  36/**
  37 * ne_submit_request() - Submit command request to the PCI device based on the
  38 *                       command type.
  39 * @pdev:               PCI device to send the command to.
  40 * @cmd_type:           Command type of the request sent to the PCI device.
  41 * @cmd_request:        Command request payload.
  42 * @cmd_request_size:   Size of the command request payload.
  43 *
  44 * Context: Process context. This function is called with the ne_pci_dev mutex held.
  45 */
  46static void ne_submit_request(struct pci_dev *pdev, enum ne_pci_dev_cmd_type cmd_type,
  47                              void *cmd_request, size_t cmd_request_size)
  48{
  49        struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
  50
  51        memcpy_toio(ne_pci_dev->iomem_base + NE_SEND_DATA, cmd_request, cmd_request_size);
  52
  53        iowrite32(cmd_type, ne_pci_dev->iomem_base + NE_COMMAND);
  54}
  55
  56/**
  57 * ne_retrieve_reply() - Retrieve reply from the PCI device.
  58 * @pdev:               PCI device to receive the reply from.
  59 * @cmd_reply:          Command reply payload.
  60 * @cmd_reply_size:     Size of the command reply payload.
  61 *
  62 * Context: Process context. This function is called with the ne_pci_dev mutex held.
  63 */
  64static void ne_retrieve_reply(struct pci_dev *pdev, struct ne_pci_dev_cmd_reply *cmd_reply,
  65                              size_t cmd_reply_size)
  66{
  67        struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
  68
  69        memcpy_fromio(cmd_reply, ne_pci_dev->iomem_base + NE_RECV_DATA, cmd_reply_size);
  70}
  71
  72/**
  73 * ne_wait_for_reply() - Wait for a reply of a PCI device command.
  74 * @pdev:       PCI device for which a reply is waited.
  75 *
  76 * Context: Process context. This function is called with the ne_pci_dev mutex held.
  77 * Return:
  78 * * 0 on success.
  79 * * Negative return value on failure.
  80 */
  81static int ne_wait_for_reply(struct pci_dev *pdev)
  82{
  83        struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
  84        int rc = -EINVAL;
  85
  86        /*
  87         * TODO: Update to _interruptible and handle interrupted wait event
  88         * e.g. -ERESTARTSYS, incoming signals + update timeout, if needed.
  89         */
  90        rc = wait_event_timeout(ne_pci_dev->cmd_reply_wait_q,
  91                                atomic_read(&ne_pci_dev->cmd_reply_avail) != 0,
  92                                msecs_to_jiffies(NE_DEFAULT_TIMEOUT_MSECS));
  93        if (!rc)
  94                return -ETIMEDOUT;
  95
  96        return 0;
  97}
  98
  99int ne_do_request(struct pci_dev *pdev, enum ne_pci_dev_cmd_type cmd_type,
 100                  void *cmd_request, size_t cmd_request_size,
 101                  struct ne_pci_dev_cmd_reply *cmd_reply, size_t cmd_reply_size)
 102{
 103        struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
 104        int rc = -EINVAL;
 105
 106        if (cmd_type <= INVALID_CMD || cmd_type >= MAX_CMD) {
 107                dev_err_ratelimited(&pdev->dev, "Invalid cmd type=%u\n", cmd_type);
 108
 109                return -EINVAL;
 110        }
 111
 112        if (!cmd_request) {
 113                dev_err_ratelimited(&pdev->dev, "Null cmd request for cmd type=%u\n",
 114                                    cmd_type);
 115
 116                return -EINVAL;
 117        }
 118
 119        if (cmd_request_size > NE_SEND_DATA_SIZE) {
 120                dev_err_ratelimited(&pdev->dev, "Invalid req size=%zu for cmd type=%u\n",
 121                                    cmd_request_size, cmd_type);
 122
 123                return -EINVAL;
 124        }
 125
 126        if (!cmd_reply) {
 127                dev_err_ratelimited(&pdev->dev, "Null cmd reply for cmd type=%u\n",
 128                                    cmd_type);
 129
 130                return -EINVAL;
 131        }
 132
 133        if (cmd_reply_size > NE_RECV_DATA_SIZE) {
 134                dev_err_ratelimited(&pdev->dev, "Invalid reply size=%zu for cmd type=%u\n",
 135                                    cmd_reply_size, cmd_type);
 136
 137                return -EINVAL;
 138        }
 139
 140        /*
 141         * Use this mutex so that the PCI device handles one command request at
 142         * a time.
 143         */
 144        mutex_lock(&ne_pci_dev->pci_dev_mutex);
 145
 146        atomic_set(&ne_pci_dev->cmd_reply_avail, 0);
 147
 148        ne_submit_request(pdev, cmd_type, cmd_request, cmd_request_size);
 149
 150        rc = ne_wait_for_reply(pdev);
 151        if (rc < 0) {
 152                dev_err_ratelimited(&pdev->dev, "Error in wait for reply for cmd type=%u [rc=%d]\n",
 153                                    cmd_type, rc);
 154
 155                goto unlock_mutex;
 156        }
 157
 158        ne_retrieve_reply(pdev, cmd_reply, cmd_reply_size);
 159
 160        atomic_set(&ne_pci_dev->cmd_reply_avail, 0);
 161
 162        if (cmd_reply->rc < 0) {
 163                rc = cmd_reply->rc;
 164
 165                dev_err_ratelimited(&pdev->dev, "Error in cmd process logic, cmd type=%u [rc=%d]\n",
 166                                    cmd_type, rc);
 167
 168                goto unlock_mutex;
 169        }
 170
 171        rc = 0;
 172
 173unlock_mutex:
 174        mutex_unlock(&ne_pci_dev->pci_dev_mutex);
 175
 176        return rc;
 177}
 178
 179/**
 180 * ne_reply_handler() - Interrupt handler for retrieving a reply matching a
 181 *                      request sent to the PCI device for enclave lifetime
 182 *                      management.
 183 * @irq:        Received interrupt for a reply sent by the PCI device.
 184 * @args:       PCI device private data structure.
 185 *
 186 * Context: Interrupt context.
 187 * Return:
 188 * * IRQ_HANDLED on handled interrupt.
 189 */
 190static irqreturn_t ne_reply_handler(int irq, void *args)
 191{
 192        struct ne_pci_dev *ne_pci_dev = (struct ne_pci_dev *)args;
 193
 194        atomic_set(&ne_pci_dev->cmd_reply_avail, 1);
 195
 196        /* TODO: Update to _interruptible. */
 197        wake_up(&ne_pci_dev->cmd_reply_wait_q);
 198
 199        return IRQ_HANDLED;
 200}
 201
 202/**
 203 * ne_event_work_handler() - Work queue handler for notifying enclaves on a
 204 *                           state change received by the event interrupt
 205 *                           handler.
 206 * @work:       Item containing the NE PCI device for which an out-of-band event
 207 *              was issued.
 208 *
 209 * An out-of-band event is being issued by the Nitro Hypervisor when at least
 210 * one enclave is changing state without client interaction.
 211 *
 212 * Context: Work queue context.
 213 */
 214static void ne_event_work_handler(struct work_struct *work)
 215{
 216        struct ne_pci_dev_cmd_reply cmd_reply = {};
 217        struct ne_enclave *ne_enclave = NULL;
 218        struct ne_pci_dev *ne_pci_dev =
 219                container_of(work, struct ne_pci_dev, notify_work);
 220        struct pci_dev *pdev = ne_pci_dev->pdev;
 221        int rc = -EINVAL;
 222        struct slot_info_req slot_info_req = {};
 223
 224        mutex_lock(&ne_pci_dev->enclaves_list_mutex);
 225
 226        /*
 227         * Iterate over all enclaves registered for the Nitro Enclaves
 228         * PCI device and determine for which enclave(s) the out-of-band event
 229         * is corresponding to.
 230         */
 231        list_for_each_entry(ne_enclave, &ne_pci_dev->enclaves_list, enclave_list_entry) {
 232                mutex_lock(&ne_enclave->enclave_info_mutex);
 233
 234                /*
 235                 * Enclaves that were never started cannot receive out-of-band
 236                 * events.
 237                 */
 238                if (ne_enclave->state != NE_STATE_RUNNING)
 239                        goto unlock;
 240
 241                slot_info_req.slot_uid = ne_enclave->slot_uid;
 242
 243                rc = ne_do_request(pdev, SLOT_INFO,
 244                                   &slot_info_req, sizeof(slot_info_req),
 245                                   &cmd_reply, sizeof(cmd_reply));
 246                if (rc < 0)
 247                        dev_err(&pdev->dev, "Error in slot info [rc=%d]\n", rc);
 248
 249                /* Notify enclave process that the enclave state changed. */
 250                if (ne_enclave->state != cmd_reply.state) {
 251                        ne_enclave->state = cmd_reply.state;
 252
 253                        ne_enclave->has_event = true;
 254
 255                        wake_up_interruptible(&ne_enclave->eventq);
 256                }
 257
 258unlock:
 259                 mutex_unlock(&ne_enclave->enclave_info_mutex);
 260        }
 261
 262        mutex_unlock(&ne_pci_dev->enclaves_list_mutex);
 263}
 264
 265/**
 266 * ne_event_handler() - Interrupt handler for PCI device out-of-band events.
 267 *                      This interrupt does not supply any data in the MMIO
 268 *                      region. It notifies a change in the state of any of
 269 *                      the launched enclaves.
 270 * @irq:        Received interrupt for an out-of-band event.
 271 * @args:       PCI device private data structure.
 272 *
 273 * Context: Interrupt context.
 274 * Return:
 275 * * IRQ_HANDLED on handled interrupt.
 276 */
 277static irqreturn_t ne_event_handler(int irq, void *args)
 278{
 279        struct ne_pci_dev *ne_pci_dev = (struct ne_pci_dev *)args;
 280
 281        queue_work(ne_pci_dev->event_wq, &ne_pci_dev->notify_work);
 282
 283        return IRQ_HANDLED;
 284}
 285
 286/**
 287 * ne_setup_msix() - Setup MSI-X vectors for the PCI device.
 288 * @pdev:       PCI device to setup the MSI-X for.
 289 *
 290 * Context: Process context.
 291 * Return:
 292 * * 0 on success.
 293 * * Negative return value on failure.
 294 */
 295static int ne_setup_msix(struct pci_dev *pdev)
 296{
 297        struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
 298        int nr_vecs = 0;
 299        int rc = -EINVAL;
 300
 301        nr_vecs = pci_msix_vec_count(pdev);
 302        if (nr_vecs < 0) {
 303                rc = nr_vecs;
 304
 305                dev_err(&pdev->dev, "Error in getting vec count [rc=%d]\n", rc);
 306
 307                return rc;
 308        }
 309
 310        rc = pci_alloc_irq_vectors(pdev, nr_vecs, nr_vecs, PCI_IRQ_MSIX);
 311        if (rc < 0) {
 312                dev_err(&pdev->dev, "Error in alloc MSI-X vecs [rc=%d]\n", rc);
 313
 314                return rc;
 315        }
 316
 317        /*
 318         * This IRQ gets triggered every time the PCI device responds to a
 319         * command request. The reply is then retrieved, reading from the MMIO
 320         * space of the PCI device.
 321         */
 322        rc = request_irq(pci_irq_vector(pdev, NE_VEC_REPLY), ne_reply_handler,
 323                         0, "enclave_cmd", ne_pci_dev);
 324        if (rc < 0) {
 325                dev_err(&pdev->dev, "Error in request irq reply [rc=%d]\n", rc);
 326
 327                goto free_irq_vectors;
 328        }
 329
 330        ne_pci_dev->event_wq = create_singlethread_workqueue("ne_pci_dev_wq");
 331        if (!ne_pci_dev->event_wq) {
 332                rc = -ENOMEM;
 333
 334                dev_err(&pdev->dev, "Cannot get wq for dev events [rc=%d]\n", rc);
 335
 336                goto free_reply_irq_vec;
 337        }
 338
 339        INIT_WORK(&ne_pci_dev->notify_work, ne_event_work_handler);
 340
 341        /*
 342         * This IRQ gets triggered every time any enclave's state changes. Its
 343         * handler then scans for the changes and propagates them to the user
 344         * space.
 345         */
 346        rc = request_irq(pci_irq_vector(pdev, NE_VEC_EVENT), ne_event_handler,
 347                         0, "enclave_evt", ne_pci_dev);
 348        if (rc < 0) {
 349                dev_err(&pdev->dev, "Error in request irq event [rc=%d]\n", rc);
 350
 351                goto destroy_wq;
 352        }
 353
 354        return 0;
 355
 356destroy_wq:
 357        destroy_workqueue(ne_pci_dev->event_wq);
 358free_reply_irq_vec:
 359        free_irq(pci_irq_vector(pdev, NE_VEC_REPLY), ne_pci_dev);
 360free_irq_vectors:
 361        pci_free_irq_vectors(pdev);
 362
 363        return rc;
 364}
 365
 366/**
 367 * ne_teardown_msix() - Teardown MSI-X vectors for the PCI device.
 368 * @pdev:       PCI device to teardown the MSI-X for.
 369 *
 370 * Context: Process context.
 371 */
 372static void ne_teardown_msix(struct pci_dev *pdev)
 373{
 374        struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
 375
 376        free_irq(pci_irq_vector(pdev, NE_VEC_EVENT), ne_pci_dev);
 377
 378        flush_work(&ne_pci_dev->notify_work);
 379        destroy_workqueue(ne_pci_dev->event_wq);
 380
 381        free_irq(pci_irq_vector(pdev, NE_VEC_REPLY), ne_pci_dev);
 382
 383        pci_free_irq_vectors(pdev);
 384}
 385
 386/**
 387 * ne_pci_dev_enable() - Select the PCI device version and enable it.
 388 * @pdev:       PCI device to select version for and then enable.
 389 *
 390 * Context: Process context.
 391 * Return:
 392 * * 0 on success.
 393 * * Negative return value on failure.
 394 */
 395static int ne_pci_dev_enable(struct pci_dev *pdev)
 396{
 397        u8 dev_enable_reply = 0;
 398        u16 dev_version_reply = 0;
 399        struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
 400
 401        iowrite16(NE_VERSION_MAX, ne_pci_dev->iomem_base + NE_VERSION);
 402
 403        dev_version_reply = ioread16(ne_pci_dev->iomem_base + NE_VERSION);
 404        if (dev_version_reply != NE_VERSION_MAX) {
 405                dev_err(&pdev->dev, "Error in pci dev version cmd\n");
 406
 407                return -EIO;
 408        }
 409
 410        iowrite8(NE_ENABLE_ON, ne_pci_dev->iomem_base + NE_ENABLE);
 411
 412        dev_enable_reply = ioread8(ne_pci_dev->iomem_base + NE_ENABLE);
 413        if (dev_enable_reply != NE_ENABLE_ON) {
 414                dev_err(&pdev->dev, "Error in pci dev enable cmd\n");
 415
 416                return -EIO;
 417        }
 418
 419        return 0;
 420}
 421
 422/**
 423 * ne_pci_dev_disable() - Disable the PCI device.
 424 * @pdev:       PCI device to disable.
 425 *
 426 * Context: Process context.
 427 */
 428static void ne_pci_dev_disable(struct pci_dev *pdev)
 429{
 430        u8 dev_disable_reply = 0;
 431        struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
 432        const unsigned int sleep_time = 10; /* 10 ms */
 433        unsigned int sleep_time_count = 0;
 434
 435        iowrite8(NE_ENABLE_OFF, ne_pci_dev->iomem_base + NE_ENABLE);
 436
 437        /*
 438         * Check for NE_ENABLE_OFF in a loop, to handle cases when the device
 439         * state is not immediately set to disabled and going through a
 440         * transitory state of disabling.
 441         */
 442        while (sleep_time_count < NE_DEFAULT_TIMEOUT_MSECS) {
 443                dev_disable_reply = ioread8(ne_pci_dev->iomem_base + NE_ENABLE);
 444                if (dev_disable_reply == NE_ENABLE_OFF)
 445                        return;
 446
 447                msleep_interruptible(sleep_time);
 448                sleep_time_count += sleep_time;
 449        }
 450
 451        dev_disable_reply = ioread8(ne_pci_dev->iomem_base + NE_ENABLE);
 452        if (dev_disable_reply != NE_ENABLE_OFF)
 453                dev_err(&pdev->dev, "Error in pci dev disable cmd\n");
 454}
 455
 456/**
 457 * ne_pci_probe() - Probe function for the NE PCI device.
 458 * @pdev:       PCI device to match with the NE PCI driver.
 459 * @id :        PCI device id table associated with the NE PCI driver.
 460 *
 461 * Context: Process context.
 462 * Return:
 463 * * 0 on success.
 464 * * Negative return value on failure.
 465 */
 466static int ne_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 467{
 468        struct ne_pci_dev *ne_pci_dev = NULL;
 469        int rc = -EINVAL;
 470
 471        ne_pci_dev = kzalloc(sizeof(*ne_pci_dev), GFP_KERNEL);
 472        if (!ne_pci_dev)
 473                return -ENOMEM;
 474
 475        rc = pci_enable_device(pdev);
 476        if (rc < 0) {
 477                dev_err(&pdev->dev, "Error in pci dev enable [rc=%d]\n", rc);
 478
 479                goto free_ne_pci_dev;
 480        }
 481
 482        pci_set_master(pdev);
 483
 484        rc = pci_request_regions_exclusive(pdev, "nitro_enclaves");
 485        if (rc < 0) {
 486                dev_err(&pdev->dev, "Error in pci request regions [rc=%d]\n", rc);
 487
 488                goto disable_pci_dev;
 489        }
 490
 491        ne_pci_dev->iomem_base = pci_iomap(pdev, PCI_BAR_NE, 0);
 492        if (!ne_pci_dev->iomem_base) {
 493                rc = -ENOMEM;
 494
 495                dev_err(&pdev->dev, "Error in pci iomap [rc=%d]\n", rc);
 496
 497                goto release_pci_regions;
 498        }
 499
 500        pci_set_drvdata(pdev, ne_pci_dev);
 501
 502        rc = ne_setup_msix(pdev);
 503        if (rc < 0) {
 504                dev_err(&pdev->dev, "Error in pci dev msix setup [rc=%d]\n", rc);
 505
 506                goto iounmap_pci_bar;
 507        }
 508
 509        ne_pci_dev_disable(pdev);
 510
 511        rc = ne_pci_dev_enable(pdev);
 512        if (rc < 0) {
 513                dev_err(&pdev->dev, "Error in ne_pci_dev enable [rc=%d]\n", rc);
 514
 515                goto teardown_msix;
 516        }
 517
 518        atomic_set(&ne_pci_dev->cmd_reply_avail, 0);
 519        init_waitqueue_head(&ne_pci_dev->cmd_reply_wait_q);
 520        INIT_LIST_HEAD(&ne_pci_dev->enclaves_list);
 521        mutex_init(&ne_pci_dev->enclaves_list_mutex);
 522        mutex_init(&ne_pci_dev->pci_dev_mutex);
 523        ne_pci_dev->pdev = pdev;
 524
 525        ne_devs.ne_pci_dev = ne_pci_dev;
 526
 527        rc = misc_register(ne_devs.ne_misc_dev);
 528        if (rc < 0) {
 529                dev_err(&pdev->dev, "Error in misc dev register [rc=%d]\n", rc);
 530
 531                goto disable_ne_pci_dev;
 532        }
 533
 534        return 0;
 535
 536disable_ne_pci_dev:
 537        ne_devs.ne_pci_dev = NULL;
 538        ne_pci_dev_disable(pdev);
 539teardown_msix:
 540        ne_teardown_msix(pdev);
 541iounmap_pci_bar:
 542        pci_set_drvdata(pdev, NULL);
 543        pci_iounmap(pdev, ne_pci_dev->iomem_base);
 544release_pci_regions:
 545        pci_release_regions(pdev);
 546disable_pci_dev:
 547        pci_disable_device(pdev);
 548free_ne_pci_dev:
 549        kfree(ne_pci_dev);
 550
 551        return rc;
 552}
 553
 554/**
 555 * ne_pci_remove() - Remove function for the NE PCI device.
 556 * @pdev:       PCI device associated with the NE PCI driver.
 557 *
 558 * Context: Process context.
 559 */
 560static void ne_pci_remove(struct pci_dev *pdev)
 561{
 562        struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
 563
 564        misc_deregister(ne_devs.ne_misc_dev);
 565
 566        ne_devs.ne_pci_dev = NULL;
 567
 568        ne_pci_dev_disable(pdev);
 569
 570        ne_teardown_msix(pdev);
 571
 572        pci_set_drvdata(pdev, NULL);
 573
 574        pci_iounmap(pdev, ne_pci_dev->iomem_base);
 575
 576        pci_release_regions(pdev);
 577
 578        pci_disable_device(pdev);
 579
 580        kfree(ne_pci_dev);
 581}
 582
 583/**
 584 * ne_pci_shutdown() - Shutdown function for the NE PCI device.
 585 * @pdev:       PCI device associated with the NE PCI driver.
 586 *
 587 * Context: Process context.
 588 */
 589static void ne_pci_shutdown(struct pci_dev *pdev)
 590{
 591        struct ne_pci_dev *ne_pci_dev = pci_get_drvdata(pdev);
 592
 593        if (!ne_pci_dev)
 594                return;
 595
 596        misc_deregister(ne_devs.ne_misc_dev);
 597
 598        ne_devs.ne_pci_dev = NULL;
 599
 600        ne_pci_dev_disable(pdev);
 601
 602        ne_teardown_msix(pdev);
 603
 604        pci_set_drvdata(pdev, NULL);
 605
 606        pci_iounmap(pdev, ne_pci_dev->iomem_base);
 607
 608        pci_release_regions(pdev);
 609
 610        pci_disable_device(pdev);
 611
 612        kfree(ne_pci_dev);
 613}
 614
 615/*
 616 * TODO: Add suspend / resume functions for power management w/ CONFIG_PM, if
 617 * needed.
 618 */
 619/* NE PCI device driver. */
 620struct pci_driver ne_pci_driver = {
 621        .name           = "nitro_enclaves",
 622        .id_table       = ne_pci_ids,
 623        .probe          = ne_pci_probe,
 624        .remove         = ne_pci_remove,
 625        .shutdown       = ne_pci_shutdown,
 626};
 627