linux/drivers/misc/habanalabs/common/habanalabs_drv.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2
   3/*
   4 * Copyright 2016-2019 HabanaLabs, Ltd.
   5 * All Rights Reserved.
   6 *
   7 */
   8
   9#define pr_fmt(fmt)             "habanalabs: " fmt
  10
  11#include "habanalabs.h"
  12
  13#include <linux/pci.h>
  14#include <linux/module.h>
  15
  16#define HL_DRIVER_AUTHOR        "HabanaLabs Kernel Driver Team"
  17
  18#define HL_DRIVER_DESC          "Driver for HabanaLabs's AI Accelerators"
  19
  20MODULE_AUTHOR(HL_DRIVER_AUTHOR);
  21MODULE_DESCRIPTION(HL_DRIVER_DESC);
  22MODULE_LICENSE("GPL v2");
  23
  24static int hl_major;
  25static struct class *hl_class;
  26static DEFINE_IDR(hl_devs_idr);
  27static DEFINE_MUTEX(hl_devs_idr_lock);
  28
  29static int timeout_locked = 5;
  30static int reset_on_lockup = 1;
  31
  32module_param(timeout_locked, int, 0444);
  33MODULE_PARM_DESC(timeout_locked,
  34        "Device lockup timeout in seconds (0 = disabled, default 5s)");
  35
  36module_param(reset_on_lockup, int, 0444);
  37MODULE_PARM_DESC(reset_on_lockup,
  38        "Do device reset on lockup (0 = no, 1 = yes, default yes)");
  39
  40#define PCI_VENDOR_ID_HABANALABS        0x1da3
  41
  42#define PCI_IDS_GOYA                    0x0001
  43#define PCI_IDS_GAUDI                   0x1000
  44
  45static const struct pci_device_id ids[] = {
  46        { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GOYA), },
  47        { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI), },
  48        { 0, }
  49};
  50MODULE_DEVICE_TABLE(pci, ids);
  51
  52/*
  53 * get_asic_type - translate device id to asic type
  54 *
  55 * @device: id of the PCI device
  56 *
  57 * Translate device id to asic type.
  58 * In case of unidentified device, return -1
  59 */
  60static enum hl_asic_type get_asic_type(u16 device)
  61{
  62        enum hl_asic_type asic_type;
  63
  64        switch (device) {
  65        case PCI_IDS_GOYA:
  66                asic_type = ASIC_GOYA;
  67                break;
  68        case PCI_IDS_GAUDI:
  69                asic_type = ASIC_GAUDI;
  70                break;
  71        default:
  72                asic_type = ASIC_INVALID;
  73                break;
  74        }
  75
  76        return asic_type;
  77}
  78
  79/*
  80 * hl_device_open - open function for habanalabs device
  81 *
  82 * @inode: pointer to inode structure
  83 * @filp: pointer to file structure
  84 *
  85 * Called when process opens an habanalabs device.
  86 */
  87int hl_device_open(struct inode *inode, struct file *filp)
  88{
  89        struct hl_device *hdev;
  90        struct hl_fpriv *hpriv;
  91        int rc;
  92
  93        mutex_lock(&hl_devs_idr_lock);
  94        hdev = idr_find(&hl_devs_idr, iminor(inode));
  95        mutex_unlock(&hl_devs_idr_lock);
  96
  97        if (!hdev) {
  98                pr_err("Couldn't find device %d:%d\n",
  99                        imajor(inode), iminor(inode));
 100                return -ENXIO;
 101        }
 102
 103        hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL);
 104        if (!hpriv)
 105                return -ENOMEM;
 106
 107        hpriv->hdev = hdev;
 108        filp->private_data = hpriv;
 109        hpriv->filp = filp;
 110        mutex_init(&hpriv->restore_phase_mutex);
 111        kref_init(&hpriv->refcount);
 112        nonseekable_open(inode, filp);
 113
 114        hl_cb_mgr_init(&hpriv->cb_mgr);
 115        hl_ctx_mgr_init(&hpriv->ctx_mgr);
 116
 117        hpriv->taskpid = find_get_pid(current->pid);
 118
 119        mutex_lock(&hdev->fpriv_list_lock);
 120
 121        if (hl_device_disabled_or_in_reset(hdev)) {
 122                dev_err_ratelimited(hdev->dev,
 123                        "Can't open %s because it is disabled or in reset\n",
 124                        dev_name(hdev->dev));
 125                rc = -EPERM;
 126                goto out_err;
 127        }
 128
 129        if (hdev->in_debug) {
 130                dev_err_ratelimited(hdev->dev,
 131                        "Can't open %s because it is being debugged by another user\n",
 132                        dev_name(hdev->dev));
 133                rc = -EPERM;
 134                goto out_err;
 135        }
 136
 137        if (hdev->compute_ctx) {
 138                dev_dbg_ratelimited(hdev->dev,
 139                        "Can't open %s because another user is working on it\n",
 140                        dev_name(hdev->dev));
 141                rc = -EBUSY;
 142                goto out_err;
 143        }
 144
 145        rc = hl_ctx_create(hdev, hpriv);
 146        if (rc) {
 147                dev_err(hdev->dev, "Failed to create context %d\n", rc);
 148                goto out_err;
 149        }
 150
 151        /* Device is IDLE at this point so it is legal to change PLLs.
 152         * There is no need to check anything because if the PLL is
 153         * already HIGH, the set function will return without doing
 154         * anything
 155         */
 156        hl_device_set_frequency(hdev, PLL_HIGH);
 157
 158        list_add(&hpriv->dev_node, &hdev->fpriv_list);
 159        mutex_unlock(&hdev->fpriv_list_lock);
 160
 161        hl_debugfs_add_file(hpriv);
 162
 163        return 0;
 164
 165out_err:
 166        mutex_unlock(&hdev->fpriv_list_lock);
 167
 168        hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
 169        hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
 170        filp->private_data = NULL;
 171        mutex_destroy(&hpriv->restore_phase_mutex);
 172        put_pid(hpriv->taskpid);
 173
 174        kfree(hpriv);
 175
 176        return rc;
 177}
 178
 179int hl_device_open_ctrl(struct inode *inode, struct file *filp)
 180{
 181        struct hl_device *hdev;
 182        struct hl_fpriv *hpriv;
 183        int rc;
 184
 185        mutex_lock(&hl_devs_idr_lock);
 186        hdev = idr_find(&hl_devs_idr, iminor(inode));
 187        mutex_unlock(&hl_devs_idr_lock);
 188
 189        if (!hdev) {
 190                pr_err("Couldn't find device %d:%d\n",
 191                        imajor(inode), iminor(inode));
 192                return -ENXIO;
 193        }
 194
 195        hpriv = kzalloc(sizeof(*hpriv), GFP_KERNEL);
 196        if (!hpriv)
 197                return -ENOMEM;
 198
 199        mutex_lock(&hdev->fpriv_list_lock);
 200
 201        if (hl_device_disabled_or_in_reset(hdev)) {
 202                dev_err_ratelimited(hdev->dev_ctrl,
 203                        "Can't open %s because it is disabled or in reset\n",
 204                        dev_name(hdev->dev_ctrl));
 205                rc = -EPERM;
 206                goto out_err;
 207        }
 208
 209        list_add(&hpriv->dev_node, &hdev->fpriv_list);
 210        mutex_unlock(&hdev->fpriv_list_lock);
 211
 212        hpriv->hdev = hdev;
 213        filp->private_data = hpriv;
 214        hpriv->filp = filp;
 215        hpriv->is_control = true;
 216        nonseekable_open(inode, filp);
 217
 218        hpriv->taskpid = find_get_pid(current->pid);
 219
 220        return 0;
 221
 222out_err:
 223        mutex_unlock(&hdev->fpriv_list_lock);
 224        kfree(hpriv);
 225        return rc;
 226}
 227
 228static void set_driver_behavior_per_device(struct hl_device *hdev)
 229{
 230        hdev->mmu_enable = 1;
 231        hdev->cpu_enable = 1;
 232        hdev->fw_loading = 1;
 233        hdev->cpu_queues_enable = 1;
 234        hdev->heartbeat = 1;
 235        hdev->clock_gating_mask = ULONG_MAX;
 236
 237        hdev->reset_pcilink = 0;
 238        hdev->axi_drain = 0;
 239        hdev->sram_scrambler_enable = 1;
 240        hdev->dram_scrambler_enable = 1;
 241        hdev->bmc_enable = 1;
 242        hdev->hard_reset_on_fw_events = 1;
 243}
 244
 245/*
 246 * create_hdev - create habanalabs device instance
 247 *
 248 * @dev: will hold the pointer to the new habanalabs device structure
 249 * @pdev: pointer to the pci device
 250 * @asic_type: in case of simulator device, which device is it
 251 * @minor: in case of simulator device, the minor of the device
 252 *
 253 * Allocate memory for habanalabs device and initialize basic fields
 254 * Identify the ASIC type
 255 * Allocate ID (minor) for the device (only for real devices)
 256 */
 257int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
 258                enum hl_asic_type asic_type, int minor)
 259{
 260        struct hl_device *hdev;
 261        int rc, main_id, ctrl_id = 0;
 262
 263        *dev = NULL;
 264
 265        hdev = kzalloc(sizeof(*hdev), GFP_KERNEL);
 266        if (!hdev)
 267                return -ENOMEM;
 268
 269        /* First, we must find out which ASIC are we handling. This is needed
 270         * to configure the behavior of the driver (kernel parameters)
 271         */
 272        if (pdev) {
 273                hdev->asic_type = get_asic_type(pdev->device);
 274                if (hdev->asic_type == ASIC_INVALID) {
 275                        dev_err(&pdev->dev, "Unsupported ASIC\n");
 276                        rc = -ENODEV;
 277                        goto free_hdev;
 278                }
 279        } else {
 280                hdev->asic_type = asic_type;
 281        }
 282
 283        hdev->major = hl_major;
 284        hdev->reset_on_lockup = reset_on_lockup;
 285        hdev->pldm = 0;
 286
 287        set_driver_behavior_per_device(hdev);
 288
 289        if (timeout_locked)
 290                hdev->timeout_jiffies = msecs_to_jiffies(timeout_locked * 1000);
 291        else
 292                hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT;
 293
 294        hdev->disabled = true;
 295        hdev->pdev = pdev; /* can be NULL in case of simulator device */
 296
 297        /* Set default DMA mask to 32 bits */
 298        hdev->dma_mask = 32;
 299
 300        mutex_lock(&hl_devs_idr_lock);
 301
 302        /* Always save 2 numbers, 1 for main device and 1 for control.
 303         * They must be consecutive
 304         */
 305        main_id = idr_alloc(&hl_devs_idr, hdev, 0, HL_MAX_MINORS,
 306                                GFP_KERNEL);
 307
 308        if (main_id >= 0)
 309                ctrl_id = idr_alloc(&hl_devs_idr, hdev, main_id + 1,
 310                                        main_id + 2, GFP_KERNEL);
 311
 312        mutex_unlock(&hl_devs_idr_lock);
 313
 314        if ((main_id < 0) || (ctrl_id < 0)) {
 315                if ((main_id == -ENOSPC) || (ctrl_id == -ENOSPC))
 316                        pr_err("too many devices in the system\n");
 317
 318                if (main_id >= 0) {
 319                        mutex_lock(&hl_devs_idr_lock);
 320                        idr_remove(&hl_devs_idr, main_id);
 321                        mutex_unlock(&hl_devs_idr_lock);
 322                }
 323
 324                rc = -EBUSY;
 325                goto free_hdev;
 326        }
 327
 328        hdev->id = main_id;
 329        hdev->id_control = ctrl_id;
 330
 331        *dev = hdev;
 332
 333        return 0;
 334
 335free_hdev:
 336        kfree(hdev);
 337        return rc;
 338}
 339
 340/*
 341 * destroy_hdev - destroy habanalabs device instance
 342 *
 343 * @dev: pointer to the habanalabs device structure
 344 *
 345 */
 346void destroy_hdev(struct hl_device *hdev)
 347{
 348        /* Remove device from the device list */
 349        mutex_lock(&hl_devs_idr_lock);
 350        idr_remove(&hl_devs_idr, hdev->id);
 351        idr_remove(&hl_devs_idr, hdev->id_control);
 352        mutex_unlock(&hl_devs_idr_lock);
 353
 354        kfree(hdev);
 355}
 356
 357static int hl_pmops_suspend(struct device *dev)
 358{
 359        struct hl_device *hdev = dev_get_drvdata(dev);
 360
 361        pr_debug("Going to suspend PCI device\n");
 362
 363        if (!hdev) {
 364                pr_err("device pointer is NULL in suspend\n");
 365                return 0;
 366        }
 367
 368        return hl_device_suspend(hdev);
 369}
 370
 371static int hl_pmops_resume(struct device *dev)
 372{
 373        struct hl_device *hdev = dev_get_drvdata(dev);
 374
 375        pr_debug("Going to resume PCI device\n");
 376
 377        if (!hdev) {
 378                pr_err("device pointer is NULL in resume\n");
 379                return 0;
 380        }
 381
 382        return hl_device_resume(hdev);
 383}
 384
 385/*
 386 * hl_pci_probe - probe PCI habanalabs devices
 387 *
 388 * @pdev: pointer to pci device
 389 * @id: pointer to pci device id structure
 390 *
 391 * Standard PCI probe function for habanalabs device.
 392 * Create a new habanalabs device and initialize it according to the
 393 * device's type
 394 */
 395static int hl_pci_probe(struct pci_dev *pdev,
 396                                const struct pci_device_id *id)
 397{
 398        struct hl_device *hdev;
 399        int rc;
 400
 401        dev_info(&pdev->dev, HL_NAME
 402                 " device found [%04x:%04x] (rev %x)\n",
 403                 (int)pdev->vendor, (int)pdev->device, (int)pdev->revision);
 404
 405        rc = create_hdev(&hdev, pdev, ASIC_INVALID, -1);
 406        if (rc)
 407                return rc;
 408
 409        pci_set_drvdata(pdev, hdev);
 410
 411        rc = hl_device_init(hdev, hl_class);
 412        if (rc) {
 413                dev_err(&pdev->dev, "Fatal error during habanalabs device init\n");
 414                rc = -ENODEV;
 415                goto disable_device;
 416        }
 417
 418        return 0;
 419
 420disable_device:
 421        pci_set_drvdata(pdev, NULL);
 422        destroy_hdev(hdev);
 423
 424        return rc;
 425}
 426
 427/*
 428 * hl_pci_remove - remove PCI habanalabs devices
 429 *
 430 * @pdev: pointer to pci device
 431 *
 432 * Standard PCI remove function for habanalabs device
 433 */
 434static void hl_pci_remove(struct pci_dev *pdev)
 435{
 436        struct hl_device *hdev;
 437
 438        hdev = pci_get_drvdata(pdev);
 439        if (!hdev)
 440                return;
 441
 442        hl_device_fini(hdev);
 443        pci_set_drvdata(pdev, NULL);
 444
 445        destroy_hdev(hdev);
 446}
 447
 448static const struct dev_pm_ops hl_pm_ops = {
 449        .suspend = hl_pmops_suspend,
 450        .resume = hl_pmops_resume,
 451};
 452
 453static struct pci_driver hl_pci_driver = {
 454        .name = HL_NAME,
 455        .id_table = ids,
 456        .probe = hl_pci_probe,
 457        .remove = hl_pci_remove,
 458        .driver.pm = &hl_pm_ops,
 459};
 460
 461/*
 462 * hl_init - Initialize the habanalabs kernel driver
 463 */
 464static int __init hl_init(void)
 465{
 466        int rc;
 467        dev_t dev;
 468
 469        pr_info("loading driver\n");
 470
 471        rc = alloc_chrdev_region(&dev, 0, HL_MAX_MINORS, HL_NAME);
 472        if (rc < 0) {
 473                pr_err("unable to get major\n");
 474                return rc;
 475        }
 476
 477        hl_major = MAJOR(dev);
 478
 479        hl_class = class_create(THIS_MODULE, HL_NAME);
 480        if (IS_ERR(hl_class)) {
 481                pr_err("failed to allocate class\n");
 482                rc = PTR_ERR(hl_class);
 483                goto remove_major;
 484        }
 485
 486        hl_debugfs_init();
 487
 488        rc = pci_register_driver(&hl_pci_driver);
 489        if (rc) {
 490                pr_err("failed to register pci device\n");
 491                goto remove_debugfs;
 492        }
 493
 494        pr_debug("driver loaded\n");
 495
 496        return 0;
 497
 498remove_debugfs:
 499        hl_debugfs_fini();
 500        class_destroy(hl_class);
 501remove_major:
 502        unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS);
 503        return rc;
 504}
 505
 506/*
 507 * hl_exit - Release all resources of the habanalabs kernel driver
 508 */
 509static void __exit hl_exit(void)
 510{
 511        pci_unregister_driver(&hl_pci_driver);
 512
 513        /*
 514         * Removing debugfs must be after all devices or simulator devices
 515         * have been removed because otherwise we get a bug in the
 516         * debugfs module for referencing NULL objects
 517         */
 518        hl_debugfs_fini();
 519
 520        class_destroy(hl_class);
 521        unregister_chrdev_region(MKDEV(hl_major, 0), HL_MAX_MINORS);
 522
 523        idr_destroy(&hl_devs_idr);
 524
 525        pr_debug("driver removed\n");
 526}
 527
 528module_init(hl_init);
 529module_exit(hl_exit);
 530