linux/drivers/misc/habanalabs/common/device.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2
   3/*
   4 * Copyright 2016-2019 HabanaLabs, Ltd.
   5 * All Rights Reserved.
   6 */
   7
   8#define pr_fmt(fmt)                     "habanalabs: " fmt
   9
  10#include <uapi/misc/habanalabs.h>
  11#include "habanalabs.h"
  12
  13#include <linux/pci.h>
  14#include <linux/hwmon.h>
  15
  16enum hl_device_status hl_device_status(struct hl_device *hdev)
  17{
  18        enum hl_device_status status;
  19
  20        if (atomic_read(&hdev->in_reset))
  21                status = HL_DEVICE_STATUS_IN_RESET;
  22        else if (hdev->needs_reset)
  23                status = HL_DEVICE_STATUS_NEEDS_RESET;
  24        else if (hdev->disabled)
  25                status = HL_DEVICE_STATUS_MALFUNCTION;
  26        else if (!hdev->init_done)
  27                status = HL_DEVICE_STATUS_IN_DEVICE_CREATION;
  28        else
  29                status = HL_DEVICE_STATUS_OPERATIONAL;
  30
  31        return status;
  32}
  33
  34bool hl_device_operational(struct hl_device *hdev,
  35                enum hl_device_status *status)
  36{
  37        enum hl_device_status current_status;
  38
  39        current_status = hl_device_status(hdev);
  40        if (status)
  41                *status = current_status;
  42
  43        switch (current_status) {
  44        case HL_DEVICE_STATUS_IN_RESET:
  45        case HL_DEVICE_STATUS_MALFUNCTION:
  46        case HL_DEVICE_STATUS_NEEDS_RESET:
  47                return false;
  48        case HL_DEVICE_STATUS_OPERATIONAL:
  49        case HL_DEVICE_STATUS_IN_DEVICE_CREATION:
  50        default:
  51                return true;
  52        }
  53}
  54
  55static void hpriv_release(struct kref *ref)
  56{
  57        u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
  58        bool device_is_idle = true;
  59        struct hl_fpriv *hpriv;
  60        struct hl_device *hdev;
  61
  62        hpriv = container_of(ref, struct hl_fpriv, refcount);
  63
  64        hdev = hpriv->hdev;
  65
  66        put_pid(hpriv->taskpid);
  67
  68        hl_debugfs_remove_file(hpriv);
  69
  70        mutex_destroy(&hpriv->restore_phase_mutex);
  71
  72        if ((!hdev->pldm) && (hdev->pdev) &&
  73                        (!hdev->asic_funcs->is_device_idle(hdev,
  74                                idle_mask,
  75                                HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL))) {
  76                dev_err(hdev->dev,
  77                        "device not idle after user context is closed (0x%llx_%llx)\n",
  78                        idle_mask[1], idle_mask[0]);
  79
  80                device_is_idle = false;
  81        }
  82
  83        /* We need to remove the user from the list to make sure the reset process won't
  84         * try to kill the user process. Because, if we got here, it means there are no
  85         * more driver/device resources that the user process is occupying so there is
  86         * no need to kill it
  87         *
  88         * However, we can't set the compute_ctx to NULL at this stage. This is to prevent
  89         * a race between the release and opening the device again. We don't want to let
  90         * a user open the device while there a reset is about to happen.
  91         */
  92        mutex_lock(&hdev->fpriv_list_lock);
  93        list_del(&hpriv->dev_node);
  94        mutex_unlock(&hdev->fpriv_list_lock);
  95
  96        if ((hdev->reset_if_device_not_idle && !device_is_idle)
  97                        || hdev->reset_upon_device_release)
  98                hl_device_reset(hdev, HL_RESET_DEVICE_RELEASE);
  99
 100        /* Now we can mark the compute_ctx as empty. Even if a reset is running in a different
 101         * thread, we don't care because the in_reset is marked so if a user will try to open
 102         * the device it will fail on that, even if compute_ctx is NULL.
 103         */
 104        mutex_lock(&hdev->fpriv_list_lock);
 105        hdev->compute_ctx = NULL;
 106        mutex_unlock(&hdev->fpriv_list_lock);
 107
 108        kfree(hpriv);
 109}
 110
 111void hl_hpriv_get(struct hl_fpriv *hpriv)
 112{
 113        kref_get(&hpriv->refcount);
 114}
 115
 116int hl_hpriv_put(struct hl_fpriv *hpriv)
 117{
 118        return kref_put(&hpriv->refcount, hpriv_release);
 119}
 120
 121/*
 122 * hl_device_release - release function for habanalabs device
 123 *
 124 * @inode: pointer to inode structure
 125 * @filp: pointer to file structure
 126 *
 127 * Called when process closes an habanalabs device
 128 */
 129static int hl_device_release(struct inode *inode, struct file *filp)
 130{
 131        struct hl_fpriv *hpriv = filp->private_data;
 132        struct hl_device *hdev = hpriv->hdev;
 133
 134        filp->private_data = NULL;
 135
 136        if (!hdev) {
 137                pr_crit("Closing FD after device was removed. Memory leak will occur and it is advised to reboot.\n");
 138                put_pid(hpriv->taskpid);
 139                return 0;
 140        }
 141
 142        /* Each pending user interrupt holds the user's context, hence we
 143         * must release them all before calling hl_ctx_mgr_fini().
 144         */
 145        hl_release_pending_user_interrupts(hpriv->hdev);
 146
 147        hl_cb_mgr_fini(hdev, &hpriv->cb_mgr);
 148        hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr);
 149
 150        if (!hl_hpriv_put(hpriv))
 151                dev_notice(hdev->dev,
 152                        "User process closed FD but device still in use\n");
 153
 154        hdev->last_open_session_duration_jif =
 155                jiffies - hdev->last_successful_open_jif;
 156
 157        return 0;
 158}
 159
 160static int hl_device_release_ctrl(struct inode *inode, struct file *filp)
 161{
 162        struct hl_fpriv *hpriv = filp->private_data;
 163        struct hl_device *hdev = hpriv->hdev;
 164
 165        filp->private_data = NULL;
 166
 167        if (!hdev) {
 168                pr_err("Closing FD after device was removed\n");
 169                goto out;
 170        }
 171
 172        mutex_lock(&hdev->fpriv_list_lock);
 173        list_del(&hpriv->dev_node);
 174        mutex_unlock(&hdev->fpriv_list_lock);
 175out:
 176        put_pid(hpriv->taskpid);
 177
 178        kfree(hpriv);
 179
 180        return 0;
 181}
 182
 183/*
 184 * hl_mmap - mmap function for habanalabs device
 185 *
 186 * @*filp: pointer to file structure
 187 * @*vma: pointer to vm_area_struct of the process
 188 *
 189 * Called when process does an mmap on habanalabs device. Call the device's mmap
 190 * function at the end of the common code.
 191 */
 192static int hl_mmap(struct file *filp, struct vm_area_struct *vma)
 193{
 194        struct hl_fpriv *hpriv = filp->private_data;
 195        struct hl_device *hdev = hpriv->hdev;
 196        unsigned long vm_pgoff;
 197
 198        if (!hdev) {
 199                pr_err_ratelimited("Trying to mmap after device was removed! Please close FD\n");
 200                return -ENODEV;
 201        }
 202
 203        vm_pgoff = vma->vm_pgoff;
 204        vma->vm_pgoff = HL_MMAP_OFFSET_VALUE_GET(vm_pgoff);
 205
 206        switch (vm_pgoff & HL_MMAP_TYPE_MASK) {
 207        case HL_MMAP_TYPE_CB:
 208                return hl_cb_mmap(hpriv, vma);
 209
 210        case HL_MMAP_TYPE_BLOCK:
 211                return hl_hw_block_mmap(hpriv, vma);
 212        }
 213
 214        return -EINVAL;
 215}
 216
 217static const struct file_operations hl_ops = {
 218        .owner = THIS_MODULE,
 219        .open = hl_device_open,
 220        .release = hl_device_release,
 221        .mmap = hl_mmap,
 222        .unlocked_ioctl = hl_ioctl,
 223        .compat_ioctl = hl_ioctl
 224};
 225
 226static const struct file_operations hl_ctrl_ops = {
 227        .owner = THIS_MODULE,
 228        .open = hl_device_open_ctrl,
 229        .release = hl_device_release_ctrl,
 230        .unlocked_ioctl = hl_ioctl_control,
 231        .compat_ioctl = hl_ioctl_control
 232};
 233
 234static void device_release_func(struct device *dev)
 235{
 236        kfree(dev);
 237}
 238
 239/*
 240 * device_init_cdev - Initialize cdev and device for habanalabs device
 241 *
 242 * @hdev: pointer to habanalabs device structure
 243 * @hclass: pointer to the class object of the device
 244 * @minor: minor number of the specific device
 245 * @fpos: file operations to install for this device
 246 * @name: name of the device as it will appear in the filesystem
 247 * @cdev: pointer to the char device object that will be initialized
 248 * @dev: pointer to the device object that will be initialized
 249 *
 250 * Initialize a cdev and a Linux device for habanalabs's device.
 251 */
 252static int device_init_cdev(struct hl_device *hdev, struct class *hclass,
 253                                int minor, const struct file_operations *fops,
 254                                char *name, struct cdev *cdev,
 255                                struct device **dev)
 256{
 257        cdev_init(cdev, fops);
 258        cdev->owner = THIS_MODULE;
 259
 260        *dev = kzalloc(sizeof(**dev), GFP_KERNEL);
 261        if (!*dev)
 262                return -ENOMEM;
 263
 264        device_initialize(*dev);
 265        (*dev)->devt = MKDEV(hdev->major, minor);
 266        (*dev)->class = hclass;
 267        (*dev)->release = device_release_func;
 268        dev_set_drvdata(*dev, hdev);
 269        dev_set_name(*dev, "%s", name);
 270
 271        return 0;
 272}
 273
 274static int device_cdev_sysfs_add(struct hl_device *hdev)
 275{
 276        int rc;
 277
 278        rc = cdev_device_add(&hdev->cdev, hdev->dev);
 279        if (rc) {
 280                dev_err(hdev->dev,
 281                        "failed to add a char device to the system\n");
 282                return rc;
 283        }
 284
 285        rc = cdev_device_add(&hdev->cdev_ctrl, hdev->dev_ctrl);
 286        if (rc) {
 287                dev_err(hdev->dev,
 288                        "failed to add a control char device to the system\n");
 289                goto delete_cdev_device;
 290        }
 291
 292        /* hl_sysfs_init() must be done after adding the device to the system */
 293        rc = hl_sysfs_init(hdev);
 294        if (rc) {
 295                dev_err(hdev->dev, "failed to initialize sysfs\n");
 296                goto delete_ctrl_cdev_device;
 297        }
 298
 299        hdev->cdev_sysfs_created = true;
 300
 301        return 0;
 302
 303delete_ctrl_cdev_device:
 304        cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
 305delete_cdev_device:
 306        cdev_device_del(&hdev->cdev, hdev->dev);
 307        return rc;
 308}
 309
 310static void device_cdev_sysfs_del(struct hl_device *hdev)
 311{
 312        if (!hdev->cdev_sysfs_created)
 313                goto put_devices;
 314
 315        hl_sysfs_fini(hdev);
 316        cdev_device_del(&hdev->cdev_ctrl, hdev->dev_ctrl);
 317        cdev_device_del(&hdev->cdev, hdev->dev);
 318
 319put_devices:
 320        put_device(hdev->dev);
 321        put_device(hdev->dev_ctrl);
 322}
 323
 324static void device_hard_reset_pending(struct work_struct *work)
 325{
 326        struct hl_device_reset_work *device_reset_work =
 327                container_of(work, struct hl_device_reset_work,
 328                                reset_work.work);
 329        struct hl_device *hdev = device_reset_work->hdev;
 330        u32 flags;
 331        int rc;
 332
 333        flags = HL_RESET_HARD | HL_RESET_FROM_RESET_THREAD;
 334
 335        if (device_reset_work->fw_reset)
 336                flags |= HL_RESET_FW;
 337
 338        rc = hl_device_reset(hdev, flags);
 339        if ((rc == -EBUSY) && !hdev->device_fini_pending) {
 340                dev_info(hdev->dev,
 341                        "Could not reset device. will try again in %u seconds",
 342                        HL_PENDING_RESET_PER_SEC);
 343
 344                queue_delayed_work(device_reset_work->wq,
 345                        &device_reset_work->reset_work,
 346                        msecs_to_jiffies(HL_PENDING_RESET_PER_SEC * 1000));
 347        }
 348}
 349
 350/*
 351 * device_early_init - do some early initialization for the habanalabs device
 352 *
 353 * @hdev: pointer to habanalabs device structure
 354 *
 355 * Install the relevant function pointers and call the early_init function,
 356 * if such a function exists
 357 */
 358static int device_early_init(struct hl_device *hdev)
 359{
 360        int i, rc;
 361        char workq_name[32];
 362
 363        switch (hdev->asic_type) {
 364        case ASIC_GOYA:
 365                goya_set_asic_funcs(hdev);
 366                strscpy(hdev->asic_name, "GOYA", sizeof(hdev->asic_name));
 367                break;
 368        case ASIC_GAUDI:
 369                gaudi_set_asic_funcs(hdev);
 370                strscpy(hdev->asic_name, "GAUDI", sizeof(hdev->asic_name));
 371                break;
 372        case ASIC_GAUDI_SEC:
 373                gaudi_set_asic_funcs(hdev);
 374                strscpy(hdev->asic_name, "GAUDI SEC", sizeof(hdev->asic_name));
 375                break;
 376        default:
 377                dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
 378                        hdev->asic_type);
 379                return -EINVAL;
 380        }
 381
 382        rc = hdev->asic_funcs->early_init(hdev);
 383        if (rc)
 384                return rc;
 385
 386        rc = hl_asid_init(hdev);
 387        if (rc)
 388                goto early_fini;
 389
 390        if (hdev->asic_prop.completion_queues_count) {
 391                hdev->cq_wq = kcalloc(hdev->asic_prop.completion_queues_count,
 392                                sizeof(*hdev->cq_wq),
 393                                GFP_KERNEL);
 394                if (!hdev->cq_wq) {
 395                        rc = -ENOMEM;
 396                        goto asid_fini;
 397                }
 398        }
 399
 400        for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) {
 401                snprintf(workq_name, 32, "hl-free-jobs-%u", (u32) i);
 402                hdev->cq_wq[i] = create_singlethread_workqueue(workq_name);
 403                if (hdev->cq_wq[i] == NULL) {
 404                        dev_err(hdev->dev, "Failed to allocate CQ workqueue\n");
 405                        rc = -ENOMEM;
 406                        goto free_cq_wq;
 407                }
 408        }
 409
 410        hdev->eq_wq = alloc_workqueue("hl-events", WQ_UNBOUND, 0);
 411        if (hdev->eq_wq == NULL) {
 412                dev_err(hdev->dev, "Failed to allocate EQ workqueue\n");
 413                rc = -ENOMEM;
 414                goto free_cq_wq;
 415        }
 416
 417        hdev->sob_reset_wq = alloc_workqueue("hl-sob-reset", WQ_UNBOUND, 0);
 418        if (!hdev->sob_reset_wq) {
 419                dev_err(hdev->dev,
 420                        "Failed to allocate SOB reset workqueue\n");
 421                rc = -ENOMEM;
 422                goto free_eq_wq;
 423        }
 424
 425        hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info),
 426                                        GFP_KERNEL);
 427        if (!hdev->hl_chip_info) {
 428                rc = -ENOMEM;
 429                goto free_sob_reset_wq;
 430        }
 431
 432        rc = hl_mmu_if_set_funcs(hdev);
 433        if (rc)
 434                goto free_chip_info;
 435
 436        hl_cb_mgr_init(&hdev->kernel_cb_mgr);
 437
 438        hdev->device_reset_work.wq =
 439                        create_singlethread_workqueue("hl_device_reset");
 440        if (!hdev->device_reset_work.wq) {
 441                rc = -ENOMEM;
 442                dev_err(hdev->dev, "Failed to create device reset WQ\n");
 443                goto free_cb_mgr;
 444        }
 445
 446        INIT_DELAYED_WORK(&hdev->device_reset_work.reset_work,
 447                        device_hard_reset_pending);
 448        hdev->device_reset_work.hdev = hdev;
 449        hdev->device_fini_pending = 0;
 450
 451        mutex_init(&hdev->send_cpu_message_lock);
 452        mutex_init(&hdev->debug_lock);
 453        INIT_LIST_HEAD(&hdev->cs_mirror_list);
 454        spin_lock_init(&hdev->cs_mirror_lock);
 455        INIT_LIST_HEAD(&hdev->fpriv_list);
 456        mutex_init(&hdev->fpriv_list_lock);
 457        atomic_set(&hdev->in_reset, 0);
 458
 459        return 0;
 460
 461free_cb_mgr:
 462        hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
 463free_chip_info:
 464        kfree(hdev->hl_chip_info);
 465free_sob_reset_wq:
 466        destroy_workqueue(hdev->sob_reset_wq);
 467free_eq_wq:
 468        destroy_workqueue(hdev->eq_wq);
 469free_cq_wq:
 470        for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
 471                if (hdev->cq_wq[i])
 472                        destroy_workqueue(hdev->cq_wq[i]);
 473        kfree(hdev->cq_wq);
 474asid_fini:
 475        hl_asid_fini(hdev);
 476early_fini:
 477        if (hdev->asic_funcs->early_fini)
 478                hdev->asic_funcs->early_fini(hdev);
 479
 480        return rc;
 481}
 482
 483/*
 484 * device_early_fini - finalize all that was done in device_early_init
 485 *
 486 * @hdev: pointer to habanalabs device structure
 487 *
 488 */
 489static void device_early_fini(struct hl_device *hdev)
 490{
 491        int i;
 492
 493        mutex_destroy(&hdev->debug_lock);
 494        mutex_destroy(&hdev->send_cpu_message_lock);
 495
 496        mutex_destroy(&hdev->fpriv_list_lock);
 497
 498        hl_cb_mgr_fini(hdev, &hdev->kernel_cb_mgr);
 499
 500        kfree(hdev->hl_chip_info);
 501
 502        destroy_workqueue(hdev->sob_reset_wq);
 503        destroy_workqueue(hdev->eq_wq);
 504        destroy_workqueue(hdev->device_reset_work.wq);
 505
 506        for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
 507                destroy_workqueue(hdev->cq_wq[i]);
 508        kfree(hdev->cq_wq);
 509
 510        hl_asid_fini(hdev);
 511
 512        if (hdev->asic_funcs->early_fini)
 513                hdev->asic_funcs->early_fini(hdev);
 514}
 515
 516static void set_freq_to_low_job(struct work_struct *work)
 517{
 518        struct hl_device *hdev = container_of(work, struct hl_device,
 519                                                work_freq.work);
 520
 521        mutex_lock(&hdev->fpriv_list_lock);
 522
 523        if (!hdev->compute_ctx)
 524                hl_device_set_frequency(hdev, PLL_LOW);
 525
 526        mutex_unlock(&hdev->fpriv_list_lock);
 527
 528        schedule_delayed_work(&hdev->work_freq,
 529                        usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
 530}
 531
 532static void hl_device_heartbeat(struct work_struct *work)
 533{
 534        struct hl_device *hdev = container_of(work, struct hl_device,
 535                                                work_heartbeat.work);
 536
 537        if (!hl_device_operational(hdev, NULL))
 538                goto reschedule;
 539
 540        if (!hdev->asic_funcs->send_heartbeat(hdev))
 541                goto reschedule;
 542
 543        dev_err(hdev->dev, "Device heartbeat failed!\n");
 544        hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_HEARTBEAT);
 545
 546        return;
 547
 548reschedule:
 549        /*
 550         * prev_reset_trigger tracks consecutive fatal h/w errors until first
 551         * heartbeat immediately post reset.
 552         * If control reached here, then at least one heartbeat work has been
 553         * scheduled since last reset/init cycle.
 554         * So if the device is not already in reset cycle, reset the flag
 555         * prev_reset_trigger as no reset occurred with HL_RESET_FW_FATAL_ERR
 556         * status for at least one heartbeat. From this point driver restarts
 557         * tracking future consecutive fatal errors.
 558         */
 559        if (!(atomic_read(&hdev->in_reset)))
 560                hdev->prev_reset_trigger = HL_RESET_TRIGGER_DEFAULT;
 561
 562        schedule_delayed_work(&hdev->work_heartbeat,
 563                        usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
 564}
 565
 566/*
 567 * device_late_init - do late stuff initialization for the habanalabs device
 568 *
 569 * @hdev: pointer to habanalabs device structure
 570 *
 571 * Do stuff that either needs the device H/W queues to be active or needs
 572 * to happen after all the rest of the initialization is finished
 573 */
 574static int device_late_init(struct hl_device *hdev)
 575{
 576        int rc;
 577
 578        if (hdev->asic_funcs->late_init) {
 579                rc = hdev->asic_funcs->late_init(hdev);
 580                if (rc) {
 581                        dev_err(hdev->dev,
 582                                "failed late initialization for the H/W\n");
 583                        return rc;
 584                }
 585        }
 586
 587        hdev->high_pll = hdev->asic_prop.high_pll;
 588
 589        /* force setting to low frequency */
 590        hdev->curr_pll_profile = PLL_LOW;
 591
 592        if (hdev->pm_mng_profile == PM_AUTO)
 593                hdev->asic_funcs->set_pll_profile(hdev, PLL_LOW);
 594        else
 595                hdev->asic_funcs->set_pll_profile(hdev, PLL_LAST);
 596
 597        INIT_DELAYED_WORK(&hdev->work_freq, set_freq_to_low_job);
 598        schedule_delayed_work(&hdev->work_freq,
 599        usecs_to_jiffies(HL_PLL_LOW_JOB_FREQ_USEC));
 600
 601        if (hdev->heartbeat) {
 602                INIT_DELAYED_WORK(&hdev->work_heartbeat, hl_device_heartbeat);
 603                schedule_delayed_work(&hdev->work_heartbeat,
 604                                usecs_to_jiffies(HL_HEARTBEAT_PER_USEC));
 605        }
 606
 607        hdev->late_init_done = true;
 608
 609        return 0;
 610}
 611
 612/*
 613 * device_late_fini - finalize all that was done in device_late_init
 614 *
 615 * @hdev: pointer to habanalabs device structure
 616 *
 617 */
 618static void device_late_fini(struct hl_device *hdev)
 619{
 620        if (!hdev->late_init_done)
 621                return;
 622
 623        cancel_delayed_work_sync(&hdev->work_freq);
 624        if (hdev->heartbeat)
 625                cancel_delayed_work_sync(&hdev->work_heartbeat);
 626
 627        if (hdev->asic_funcs->late_fini)
 628                hdev->asic_funcs->late_fini(hdev);
 629
 630        hdev->late_init_done = false;
 631}
 632
 633int hl_device_utilization(struct hl_device *hdev, u32 *utilization)
 634{
 635        u64 max_power, curr_power, dc_power, dividend;
 636        int rc;
 637
 638        max_power = hdev->asic_prop.max_power_default;
 639        dc_power = hdev->asic_prop.dc_power_default;
 640        rc = hl_fw_cpucp_power_get(hdev, &curr_power);
 641
 642        if (rc)
 643                return rc;
 644
 645        curr_power = clamp(curr_power, dc_power, max_power);
 646
 647        dividend = (curr_power - dc_power) * 100;
 648        *utilization = (u32) div_u64(dividend, (max_power - dc_power));
 649
 650        return 0;
 651}
 652
 653/*
 654 * hl_device_set_frequency - set the frequency of the device
 655 *
 656 * @hdev: pointer to habanalabs device structure
 657 * @freq: the new frequency value
 658 *
 659 * Change the frequency if needed. This function has no protection against
 660 * concurrency, therefore it is assumed that the calling function has protected
 661 * itself against the case of calling this function from multiple threads with
 662 * different values
 663 *
 664 * Returns 0 if no change was done, otherwise returns 1
 665 */
 666int hl_device_set_frequency(struct hl_device *hdev, enum hl_pll_frequency freq)
 667{
 668        if ((hdev->pm_mng_profile == PM_MANUAL) ||
 669                        (hdev->curr_pll_profile == freq))
 670                return 0;
 671
 672        dev_dbg(hdev->dev, "Changing device frequency to %s\n",
 673                freq == PLL_HIGH ? "high" : "low");
 674
 675        hdev->asic_funcs->set_pll_profile(hdev, freq);
 676
 677        hdev->curr_pll_profile = freq;
 678
 679        return 1;
 680}
 681
 682int hl_device_set_debug_mode(struct hl_device *hdev, bool enable)
 683{
 684        int rc = 0;
 685
 686        mutex_lock(&hdev->debug_lock);
 687
 688        if (!enable) {
 689                if (!hdev->in_debug) {
 690                        dev_err(hdev->dev,
 691                                "Failed to disable debug mode because device was not in debug mode\n");
 692                        rc = -EFAULT;
 693                        goto out;
 694                }
 695
 696                if (!hdev->hard_reset_pending)
 697                        hdev->asic_funcs->halt_coresight(hdev);
 698
 699                hdev->in_debug = 0;
 700
 701                if (!hdev->hard_reset_pending)
 702                        hdev->asic_funcs->set_clock_gating(hdev);
 703
 704                goto out;
 705        }
 706
 707        if (hdev->in_debug) {
 708                dev_err(hdev->dev,
 709                        "Failed to enable debug mode because device is already in debug mode\n");
 710                rc = -EFAULT;
 711                goto out;
 712        }
 713
 714        hdev->asic_funcs->disable_clock_gating(hdev);
 715        hdev->in_debug = 1;
 716
 717out:
 718        mutex_unlock(&hdev->debug_lock);
 719
 720        return rc;
 721}
 722
 723static void take_release_locks(struct hl_device *hdev)
 724{
 725        /* Flush anyone that is inside the critical section of enqueue
 726         * jobs to the H/W
 727         */
 728        hdev->asic_funcs->hw_queues_lock(hdev);
 729        hdev->asic_funcs->hw_queues_unlock(hdev);
 730
 731        /* Flush processes that are sending message to CPU */
 732        mutex_lock(&hdev->send_cpu_message_lock);
 733        mutex_unlock(&hdev->send_cpu_message_lock);
 734
 735        /* Flush anyone that is inside device open */
 736        mutex_lock(&hdev->fpriv_list_lock);
 737        mutex_unlock(&hdev->fpriv_list_lock);
 738}
 739
 740static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_reset)
 741{
 742        if (hard_reset)
 743                device_late_fini(hdev);
 744
 745        /*
 746         * Halt the engines and disable interrupts so we won't get any more
 747         * completions from H/W and we won't have any accesses from the
 748         * H/W to the host machine
 749         */
 750        hdev->asic_funcs->halt_engines(hdev, hard_reset, fw_reset);
 751
 752        /* Go over all the queues, release all CS and their jobs */
 753        hl_cs_rollback_all(hdev);
 754
 755        /* Release all pending user interrupts, each pending user interrupt
 756         * holds a reference to user context
 757         */
 758        hl_release_pending_user_interrupts(hdev);
 759}
 760
 761/*
 762 * hl_device_suspend - initiate device suspend
 763 *
 764 * @hdev: pointer to habanalabs device structure
 765 *
 766 * Puts the hw in the suspend state (all asics).
 767 * Returns 0 for success or an error on failure.
 768 * Called at driver suspend.
 769 */
 770int hl_device_suspend(struct hl_device *hdev)
 771{
 772        int rc;
 773
 774        pci_save_state(hdev->pdev);
 775
 776        /* Block future CS/VM/JOB completion operations */
 777        rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
 778        if (rc) {
 779                dev_err(hdev->dev, "Can't suspend while in reset\n");
 780                return -EIO;
 781        }
 782
 783        /* This blocks all other stuff that is not blocked by in_reset */
 784        hdev->disabled = true;
 785
 786        take_release_locks(hdev);
 787
 788        rc = hdev->asic_funcs->suspend(hdev);
 789        if (rc)
 790                dev_err(hdev->dev,
 791                        "Failed to disable PCI access of device CPU\n");
 792
 793        /* Shut down the device */
 794        pci_disable_device(hdev->pdev);
 795        pci_set_power_state(hdev->pdev, PCI_D3hot);
 796
 797        return 0;
 798}
 799
 800/*
 801 * hl_device_resume - initiate device resume
 802 *
 803 * @hdev: pointer to habanalabs device structure
 804 *
 805 * Bring the hw back to operating state (all asics).
 806 * Returns 0 for success or an error on failure.
 807 * Called at driver resume.
 808 */
 809int hl_device_resume(struct hl_device *hdev)
 810{
 811        int rc;
 812
 813        pci_set_power_state(hdev->pdev, PCI_D0);
 814        pci_restore_state(hdev->pdev);
 815        rc = pci_enable_device_mem(hdev->pdev);
 816        if (rc) {
 817                dev_err(hdev->dev,
 818                        "Failed to enable PCI device in resume\n");
 819                return rc;
 820        }
 821
 822        pci_set_master(hdev->pdev);
 823
 824        rc = hdev->asic_funcs->resume(hdev);
 825        if (rc) {
 826                dev_err(hdev->dev, "Failed to resume device after suspend\n");
 827                goto disable_device;
 828        }
 829
 830
 831        hdev->disabled = false;
 832        atomic_set(&hdev->in_reset, 0);
 833
 834        rc = hl_device_reset(hdev, HL_RESET_HARD);
 835        if (rc) {
 836                dev_err(hdev->dev, "Failed to reset device during resume\n");
 837                goto disable_device;
 838        }
 839
 840        return 0;
 841
 842disable_device:
 843        pci_clear_master(hdev->pdev);
 844        pci_disable_device(hdev->pdev);
 845
 846        return rc;
 847}
 848
 849static int device_kill_open_processes(struct hl_device *hdev, u32 timeout)
 850{
 851        struct hl_fpriv *hpriv;
 852        struct task_struct *task = NULL;
 853        u32 pending_cnt;
 854
 855
 856        /* Giving time for user to close FD, and for processes that are inside
 857         * hl_device_open to finish
 858         */
 859        if (!list_empty(&hdev->fpriv_list))
 860                ssleep(1);
 861
 862        if (timeout) {
 863                pending_cnt = timeout;
 864        } else {
 865                if (hdev->process_kill_trial_cnt) {
 866                        /* Processes have been already killed */
 867                        pending_cnt = 1;
 868                        goto wait_for_processes;
 869                } else {
 870                        /* Wait a small period after process kill */
 871                        pending_cnt = HL_PENDING_RESET_PER_SEC;
 872                }
 873        }
 874
 875        mutex_lock(&hdev->fpriv_list_lock);
 876
 877        /* This section must be protected because we are dereferencing
 878         * pointers that are freed if the process exits
 879         */
 880        list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node) {
 881                task = get_pid_task(hpriv->taskpid, PIDTYPE_PID);
 882                if (task) {
 883                        dev_info(hdev->dev, "Killing user process pid=%d\n",
 884                                task_pid_nr(task));
 885                        send_sig(SIGKILL, task, 1);
 886                        usleep_range(1000, 10000);
 887
 888                        put_task_struct(task);
 889                } else {
 890                        dev_warn(hdev->dev,
 891                                "Can't get task struct for PID so giving up on killing process\n");
 892                        mutex_unlock(&hdev->fpriv_list_lock);
 893                        return -ETIME;
 894                }
 895        }
 896
 897        mutex_unlock(&hdev->fpriv_list_lock);
 898
 899        /*
 900         * We killed the open users, but that doesn't mean they are closed.
 901         * It could be that they are running a long cleanup phase in the driver
 902         * e.g. MMU unmappings, or running other long teardown flow even before
 903         * our cleanup.
 904         * Therefore we need to wait again to make sure they are closed before
 905         * continuing with the reset.
 906         */
 907
 908wait_for_processes:
 909        while ((!list_empty(&hdev->fpriv_list)) && (pending_cnt)) {
 910                dev_dbg(hdev->dev,
 911                        "Waiting for all unmap operations to finish before hard reset\n");
 912
 913                pending_cnt--;
 914
 915                ssleep(1);
 916        }
 917
 918        /* All processes exited successfully */
 919        if (list_empty(&hdev->fpriv_list))
 920                return 0;
 921
 922        /* Give up waiting for processes to exit */
 923        if (hdev->process_kill_trial_cnt == HL_PENDING_RESET_MAX_TRIALS)
 924                return -ETIME;
 925
 926        hdev->process_kill_trial_cnt++;
 927
 928        return -EBUSY;
 929}
 930
 931static void device_disable_open_processes(struct hl_device *hdev)
 932{
 933        struct hl_fpriv *hpriv;
 934
 935        mutex_lock(&hdev->fpriv_list_lock);
 936        list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node)
 937                hpriv->hdev = NULL;
 938        mutex_unlock(&hdev->fpriv_list_lock);
 939}
 940
 941static void handle_reset_trigger(struct hl_device *hdev, u32 flags)
 942{
 943        u32 cur_reset_trigger = HL_RESET_TRIGGER_DEFAULT;
 944
 945        /*
 946         * 'reset cause' is being updated here, because getting here
 947         * means that it's the 1st time and the last time we're here
 948         * ('in_reset' makes sure of it). This makes sure that
 949         * 'reset_cause' will continue holding its 1st recorded reason!
 950         */
 951        if (flags & HL_RESET_HEARTBEAT) {
 952                hdev->curr_reset_cause = HL_RESET_CAUSE_HEARTBEAT;
 953                cur_reset_trigger = HL_RESET_HEARTBEAT;
 954        } else if (flags & HL_RESET_TDR) {
 955                hdev->curr_reset_cause = HL_RESET_CAUSE_TDR;
 956                cur_reset_trigger = HL_RESET_TDR;
 957        } else if (flags & HL_RESET_FW_FATAL_ERR) {
 958                hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
 959                cur_reset_trigger = HL_RESET_FW_FATAL_ERR;
 960        } else {
 961                hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
 962        }
 963
 964        /*
 965         * If reset cause is same twice, then reset_trigger_repeated
 966         * is set and if this reset is due to a fatal FW error
 967         * device is set to an unstable state.
 968         */
 969        if (hdev->prev_reset_trigger != cur_reset_trigger) {
 970                hdev->prev_reset_trigger = cur_reset_trigger;
 971                hdev->reset_trigger_repeated = 0;
 972        } else {
 973                hdev->reset_trigger_repeated = 1;
 974        }
 975
 976        /* If reset is due to heartbeat, device CPU is no responsive in
 977         * which case no point sending PCI disable message to it.
 978         *
 979         * If F/W is performing the reset, no need to send it a message to disable
 980         * PCI access
 981         */
 982        if ((flags & HL_RESET_HARD) &&
 983                        !(flags & (HL_RESET_HEARTBEAT | HL_RESET_FW))) {
 984                /* Disable PCI access from device F/W so he won't send
 985                 * us additional interrupts. We disable MSI/MSI-X at
 986                 * the halt_engines function and we can't have the F/W
 987                 * sending us interrupts after that. We need to disable
 988                 * the access here because if the device is marked
 989                 * disable, the message won't be send. Also, in case
 990                 * of heartbeat, the device CPU is marked as disable
 991                 * so this message won't be sent
 992                 */
 993                if (hl_fw_send_pci_access_msg(hdev,
 994                                CPUCP_PACKET_DISABLE_PCI_ACCESS))
 995                        dev_warn(hdev->dev,
 996                                "Failed to disable PCI access by F/W\n");
 997        }
 998}
 999
1000/*
1001 * hl_device_reset - reset the device
1002 *
1003 * @hdev: pointer to habanalabs device structure
1004 * @flags: reset flags.
1005 *
1006 * Block future CS and wait for pending CS to be enqueued
1007 * Call ASIC H/W fini
1008 * Flush all completions
1009 * Re-initialize all internal data structures
1010 * Call ASIC H/W init, late_init
1011 * Test queues
1012 * Enable device
1013 *
1014 * Returns 0 for success or an error on failure.
1015 */
1016int hl_device_reset(struct hl_device *hdev, u32 flags)
1017{
1018        u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
1019        bool hard_reset, from_hard_reset_thread, fw_reset, hard_instead_soft = false;
1020        int i, rc;
1021
1022        if (!hdev->init_done) {
1023                dev_err(hdev->dev,
1024                        "Can't reset before initialization is done\n");
1025                return 0;
1026        }
1027
1028        hard_reset = !!(flags & HL_RESET_HARD);
1029        from_hard_reset_thread = !!(flags & HL_RESET_FROM_RESET_THREAD);
1030        fw_reset = !!(flags & HL_RESET_FW);
1031
1032        if (!hard_reset && !hdev->supports_soft_reset) {
1033                hard_instead_soft = true;
1034                hard_reset = true;
1035        }
1036
1037        if (hdev->reset_upon_device_release &&
1038                        (flags & HL_RESET_DEVICE_RELEASE)) {
1039                dev_dbg(hdev->dev,
1040                        "Perform %s-reset upon device release\n",
1041                        hard_reset ? "hard" : "soft");
1042                goto do_reset;
1043        }
1044
1045        if (!hard_reset && !hdev->allow_inference_soft_reset) {
1046                hard_instead_soft = true;
1047                hard_reset = true;
1048        }
1049
1050        if (hard_instead_soft)
1051                dev_dbg(hdev->dev, "Doing hard-reset instead of soft-reset\n");
1052
1053do_reset:
1054        /* Re-entry of reset thread */
1055        if (from_hard_reset_thread && hdev->process_kill_trial_cnt)
1056                goto kill_processes;
1057
1058        /*
1059         * Prevent concurrency in this function - only one reset should be
1060         * done at any given time. Only need to perform this if we didn't
1061         * get from the dedicated hard reset thread
1062         */
1063        if (!from_hard_reset_thread) {
1064                /* Block future CS/VM/JOB completion operations */
1065                rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
1066                if (rc)
1067                        return 0;
1068
1069                handle_reset_trigger(hdev, flags);
1070
1071                /* This also blocks future CS/VM/JOB completion operations */
1072                hdev->disabled = true;
1073
1074                take_release_locks(hdev);
1075
1076                if (hard_reset)
1077                        dev_info(hdev->dev, "Going to reset device\n");
1078                else if (flags & HL_RESET_DEVICE_RELEASE)
1079                        dev_info(hdev->dev,
1080                                "Going to reset device after it was released by user\n");
1081                else
1082                        dev_info(hdev->dev,
1083                                "Going to reset compute engines of inference device\n");
1084        }
1085
1086again:
1087        if ((hard_reset) && (!from_hard_reset_thread)) {
1088                hdev->hard_reset_pending = true;
1089
1090                hdev->process_kill_trial_cnt = 0;
1091
1092                hdev->device_reset_work.fw_reset = fw_reset;
1093
1094                /*
1095                 * Because the reset function can't run from heartbeat work,
1096                 * we need to call the reset function from a dedicated work.
1097                 */
1098                queue_delayed_work(hdev->device_reset_work.wq,
1099                        &hdev->device_reset_work.reset_work, 0);
1100
1101                return 0;
1102        }
1103
1104        cleanup_resources(hdev, hard_reset, fw_reset);
1105
1106kill_processes:
1107        if (hard_reset) {
1108                /* Kill processes here after CS rollback. This is because the
1109                 * process can't really exit until all its CSs are done, which
1110                 * is what we do in cs rollback
1111                 */
1112                rc = device_kill_open_processes(hdev, 0);
1113
1114                if (rc == -EBUSY) {
1115                        if (hdev->device_fini_pending) {
1116                                dev_crit(hdev->dev,
1117                                        "Failed to kill all open processes, stopping hard reset\n");
1118                                goto out_err;
1119                        }
1120
1121                        /* signal reset thread to reschedule */
1122                        return rc;
1123                }
1124
1125                if (rc) {
1126                        dev_crit(hdev->dev,
1127                                "Failed to kill all open processes, stopping hard reset\n");
1128                        goto out_err;
1129                }
1130
1131                /* Flush the Event queue workers to make sure no other thread is
1132                 * reading or writing to registers during the reset
1133                 */
1134                flush_workqueue(hdev->eq_wq);
1135        }
1136
1137        /* Reset the H/W. It will be in idle state after this returns */
1138        hdev->asic_funcs->hw_fini(hdev, hard_reset, fw_reset);
1139
1140        if (hard_reset) {
1141                hdev->fw_loader.linux_loaded = false;
1142
1143                /* Release kernel context */
1144                if (hdev->kernel_ctx && hl_ctx_put(hdev->kernel_ctx) == 1)
1145                        hdev->kernel_ctx = NULL;
1146
1147                hl_vm_fini(hdev);
1148                hl_mmu_fini(hdev);
1149                hl_eq_reset(hdev, &hdev->event_queue);
1150        }
1151
1152        /* Re-initialize PI,CI to 0 in all queues (hw queue, cq) */
1153        hl_hw_queue_reset(hdev, hard_reset);
1154        for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1155                hl_cq_reset(hdev, &hdev->completion_queue[i]);
1156
1157        mutex_lock(&hdev->fpriv_list_lock);
1158
1159        /* Make sure the context switch phase will run again */
1160        if (hdev->compute_ctx) {
1161                atomic_set(&hdev->compute_ctx->thread_ctx_switch_token, 1);
1162                hdev->compute_ctx->thread_ctx_switch_wait_token = 0;
1163        }
1164
1165        mutex_unlock(&hdev->fpriv_list_lock);
1166
1167        /* Finished tear-down, starting to re-initialize */
1168
1169        if (hard_reset) {
1170                hdev->device_cpu_disabled = false;
1171                hdev->hard_reset_pending = false;
1172
1173                if (hdev->reset_trigger_repeated &&
1174                                (hdev->prev_reset_trigger == HL_RESET_FW_FATAL_ERR)) {
1175                        /* if there 2 back to back resets from FW,
1176                         * ensure driver puts the driver in a unusable state
1177                         */
1178                        dev_crit(hdev->dev,
1179                                "Consecutive FW fatal errors received, stopping hard reset\n");
1180                        rc = -EIO;
1181                        goto out_err;
1182                }
1183
1184                if (hdev->kernel_ctx) {
1185                        dev_crit(hdev->dev,
1186                                "kernel ctx was alive during hard reset, something is terribly wrong\n");
1187                        rc = -EBUSY;
1188                        goto out_err;
1189                }
1190
1191                rc = hl_mmu_init(hdev);
1192                if (rc) {
1193                        dev_err(hdev->dev,
1194                                "Failed to initialize MMU S/W after hard reset\n");
1195                        goto out_err;
1196                }
1197
1198                /* Allocate the kernel context */
1199                hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx),
1200                                                GFP_KERNEL);
1201                if (!hdev->kernel_ctx) {
1202                        rc = -ENOMEM;
1203                        hl_mmu_fini(hdev);
1204                        goto out_err;
1205                }
1206
1207                hdev->compute_ctx = NULL;
1208
1209                rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
1210                if (rc) {
1211                        dev_err(hdev->dev,
1212                                "failed to init kernel ctx in hard reset\n");
1213                        kfree(hdev->kernel_ctx);
1214                        hdev->kernel_ctx = NULL;
1215                        hl_mmu_fini(hdev);
1216                        goto out_err;
1217                }
1218        }
1219
1220        /* Device is now enabled as part of the initialization requires
1221         * communication with the device firmware to get information that
1222         * is required for the initialization itself
1223         */
1224        hdev->disabled = false;
1225
1226        rc = hdev->asic_funcs->hw_init(hdev);
1227        if (rc) {
1228                dev_err(hdev->dev,
1229                        "failed to initialize the H/W after reset\n");
1230                goto out_err;
1231        }
1232
1233        /* If device is not idle fail the reset process */
1234        if (!hdev->asic_funcs->is_device_idle(hdev, idle_mask,
1235                        HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)) {
1236                dev_err(hdev->dev,
1237                        "device is not idle (mask 0x%llx_%llx) after reset\n",
1238                        idle_mask[1], idle_mask[0]);
1239                rc = -EIO;
1240                goto out_err;
1241        }
1242
1243        /* Check that the communication with the device is working */
1244        rc = hdev->asic_funcs->test_queues(hdev);
1245        if (rc) {
1246                dev_err(hdev->dev,
1247                        "Failed to detect if device is alive after reset\n");
1248                goto out_err;
1249        }
1250
1251        if (hard_reset) {
1252                rc = device_late_init(hdev);
1253                if (rc) {
1254                        dev_err(hdev->dev,
1255                                "Failed late init after hard reset\n");
1256                        goto out_err;
1257                }
1258
1259                rc = hl_vm_init(hdev);
1260                if (rc) {
1261                        dev_err(hdev->dev,
1262                                "Failed to init memory module after hard reset\n");
1263                        goto out_err;
1264                }
1265
1266                hl_set_max_power(hdev);
1267        } else {
1268                rc = hdev->asic_funcs->soft_reset_late_init(hdev);
1269                if (rc) {
1270                        dev_err(hdev->dev,
1271                                "Failed late init after soft reset\n");
1272                        goto out_err;
1273                }
1274        }
1275
1276        atomic_set(&hdev->in_reset, 0);
1277        hdev->needs_reset = false;
1278
1279        dev_notice(hdev->dev, "Successfully finished resetting the device\n");
1280
1281        if (hard_reset) {
1282                hdev->hard_reset_cnt++;
1283
1284                /* After reset is done, we are ready to receive events from
1285                 * the F/W. We can't do it before because we will ignore events
1286                 * and if those events are fatal, we won't know about it and
1287                 * the device will be operational although it shouldn't be
1288                 */
1289                hdev->asic_funcs->enable_events_from_fw(hdev);
1290        } else {
1291                hdev->soft_reset_cnt++;
1292        }
1293
1294        return 0;
1295
1296out_err:
1297        hdev->disabled = true;
1298
1299        if (hard_reset) {
1300                dev_err(hdev->dev,
1301                        "Failed to reset! Device is NOT usable\n");
1302                hdev->hard_reset_cnt++;
1303        } else {
1304                dev_err(hdev->dev,
1305                        "Failed to do soft-reset, trying hard reset\n");
1306                hdev->soft_reset_cnt++;
1307                hard_reset = true;
1308                goto again;
1309        }
1310
1311        atomic_set(&hdev->in_reset, 0);
1312
1313        return rc;
1314}
1315
1316/*
1317 * hl_device_init - main initialization function for habanalabs device
1318 *
1319 * @hdev: pointer to habanalabs device structure
1320 *
1321 * Allocate an id for the device, do early initialization and then call the
1322 * ASIC specific initialization functions. Finally, create the cdev and the
1323 * Linux device to expose it to the user
1324 */
1325int hl_device_init(struct hl_device *hdev, struct class *hclass)
1326{
1327        int i, rc, cq_cnt, user_interrupt_cnt, cq_ready_cnt;
1328        char *name;
1329        bool add_cdev_sysfs_on_err = false;
1330
1331        name = kasprintf(GFP_KERNEL, "hl%d", hdev->id / 2);
1332        if (!name) {
1333                rc = -ENOMEM;
1334                goto out_disabled;
1335        }
1336
1337        /* Initialize cdev and device structures */
1338        rc = device_init_cdev(hdev, hclass, hdev->id, &hl_ops, name,
1339                                &hdev->cdev, &hdev->dev);
1340
1341        kfree(name);
1342
1343        if (rc)
1344                goto out_disabled;
1345
1346        name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->id / 2);
1347        if (!name) {
1348                rc = -ENOMEM;
1349                goto free_dev;
1350        }
1351
1352        /* Initialize cdev and device structures for control device */
1353        rc = device_init_cdev(hdev, hclass, hdev->id_control, &hl_ctrl_ops,
1354                                name, &hdev->cdev_ctrl, &hdev->dev_ctrl);
1355
1356        kfree(name);
1357
1358        if (rc)
1359                goto free_dev;
1360
1361        /* Initialize ASIC function pointers and perform early init */
1362        rc = device_early_init(hdev);
1363        if (rc)
1364                goto free_dev_ctrl;
1365
1366        user_interrupt_cnt = hdev->asic_prop.user_interrupt_count;
1367
1368        if (user_interrupt_cnt) {
1369                hdev->user_interrupt = kcalloc(user_interrupt_cnt,
1370                                sizeof(*hdev->user_interrupt),
1371                                GFP_KERNEL);
1372
1373                if (!hdev->user_interrupt) {
1374                        rc = -ENOMEM;
1375                        goto early_fini;
1376                }
1377        }
1378
1379        /*
1380         * Start calling ASIC initialization. First S/W then H/W and finally
1381         * late init
1382         */
1383        rc = hdev->asic_funcs->sw_init(hdev);
1384        if (rc)
1385                goto user_interrupts_fini;
1386
1387
1388        /* initialize completion structure for multi CS wait */
1389        hl_multi_cs_completion_init(hdev);
1390
1391        /*
1392         * Initialize the H/W queues. Must be done before hw_init, because
1393         * there the addresses of the kernel queue are being written to the
1394         * registers of the device
1395         */
1396        rc = hl_hw_queues_create(hdev);
1397        if (rc) {
1398                dev_err(hdev->dev, "failed to initialize kernel queues\n");
1399                goto sw_fini;
1400        }
1401
1402        cq_cnt = hdev->asic_prop.completion_queues_count;
1403
1404        /*
1405         * Initialize the completion queues. Must be done before hw_init,
1406         * because there the addresses of the completion queues are being
1407         * passed as arguments to request_irq
1408         */
1409        if (cq_cnt) {
1410                hdev->completion_queue = kcalloc(cq_cnt,
1411                                sizeof(*hdev->completion_queue),
1412                                GFP_KERNEL);
1413
1414                if (!hdev->completion_queue) {
1415                        dev_err(hdev->dev,
1416                                "failed to allocate completion queues\n");
1417                        rc = -ENOMEM;
1418                        goto hw_queues_destroy;
1419                }
1420        }
1421
1422        for (i = 0, cq_ready_cnt = 0 ; i < cq_cnt ; i++, cq_ready_cnt++) {
1423                rc = hl_cq_init(hdev, &hdev->completion_queue[i],
1424                                hdev->asic_funcs->get_queue_id_for_cq(hdev, i));
1425                if (rc) {
1426                        dev_err(hdev->dev,
1427                                "failed to initialize completion queue\n");
1428                        goto cq_fini;
1429                }
1430                hdev->completion_queue[i].cq_idx = i;
1431        }
1432
1433        /*
1434         * Initialize the event queue. Must be done before hw_init,
1435         * because there the address of the event queue is being
1436         * passed as argument to request_irq
1437         */
1438        rc = hl_eq_init(hdev, &hdev->event_queue);
1439        if (rc) {
1440                dev_err(hdev->dev, "failed to initialize event queue\n");
1441                goto cq_fini;
1442        }
1443
1444        /* MMU S/W must be initialized before kernel context is created */
1445        rc = hl_mmu_init(hdev);
1446        if (rc) {
1447                dev_err(hdev->dev, "Failed to initialize MMU S/W structures\n");
1448                goto eq_fini;
1449        }
1450
1451        /* Allocate the kernel context */
1452        hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx), GFP_KERNEL);
1453        if (!hdev->kernel_ctx) {
1454                rc = -ENOMEM;
1455                goto mmu_fini;
1456        }
1457
1458        hdev->compute_ctx = NULL;
1459
1460        hdev->asic_funcs->state_dump_init(hdev);
1461
1462        hl_debugfs_add_device(hdev);
1463
1464        /* debugfs nodes are created in hl_ctx_init so it must be called after
1465         * hl_debugfs_add_device.
1466         */
1467        rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
1468        if (rc) {
1469                dev_err(hdev->dev, "failed to initialize kernel context\n");
1470                kfree(hdev->kernel_ctx);
1471                goto remove_device_from_debugfs;
1472        }
1473
1474        rc = hl_cb_pool_init(hdev);
1475        if (rc) {
1476                dev_err(hdev->dev, "failed to initialize CB pool\n");
1477                goto release_ctx;
1478        }
1479
1480        /*
1481         * From this point, override rc (=0) in case of an error to allow
1482         * debugging (by adding char devices and create sysfs nodes as part of
1483         * the error flow).
1484         */
1485        add_cdev_sysfs_on_err = true;
1486
1487        /* Device is now enabled as part of the initialization requires
1488         * communication with the device firmware to get information that
1489         * is required for the initialization itself
1490         */
1491        hdev->disabled = false;
1492
1493        rc = hdev->asic_funcs->hw_init(hdev);
1494        if (rc) {
1495                dev_err(hdev->dev, "failed to initialize the H/W\n");
1496                rc = 0;
1497                goto out_disabled;
1498        }
1499
1500        /* Check that the communication with the device is working */
1501        rc = hdev->asic_funcs->test_queues(hdev);
1502        if (rc) {
1503                dev_err(hdev->dev, "Failed to detect if device is alive\n");
1504                rc = 0;
1505                goto out_disabled;
1506        }
1507
1508        rc = device_late_init(hdev);
1509        if (rc) {
1510                dev_err(hdev->dev, "Failed late initialization\n");
1511                rc = 0;
1512                goto out_disabled;
1513        }
1514
1515        dev_info(hdev->dev, "Found %s device with %lluGB DRAM\n",
1516                hdev->asic_name,
1517                hdev->asic_prop.dram_size / SZ_1G);
1518
1519        rc = hl_vm_init(hdev);
1520        if (rc) {
1521                dev_err(hdev->dev, "Failed to initialize memory module\n");
1522                rc = 0;
1523                goto out_disabled;
1524        }
1525
1526        /*
1527         * Expose devices and sysfs nodes to user.
1528         * From here there is no need to add char devices and create sysfs nodes
1529         * in case of an error.
1530         */
1531        add_cdev_sysfs_on_err = false;
1532        rc = device_cdev_sysfs_add(hdev);
1533        if (rc) {
1534                dev_err(hdev->dev,
1535                        "Failed to add char devices and sysfs nodes\n");
1536                rc = 0;
1537                goto out_disabled;
1538        }
1539
1540        /* Need to call this again because the max power might change,
1541         * depending on card type for certain ASICs
1542         */
1543        hl_set_max_power(hdev);
1544
1545        /*
1546         * hl_hwmon_init() must be called after device_late_init(), because only
1547         * there we get the information from the device about which
1548         * hwmon-related sensors the device supports.
1549         * Furthermore, it must be done after adding the device to the system.
1550         */
1551        rc = hl_hwmon_init(hdev);
1552        if (rc) {
1553                dev_err(hdev->dev, "Failed to initialize hwmon\n");
1554                rc = 0;
1555                goto out_disabled;
1556        }
1557
1558        dev_notice(hdev->dev,
1559                "Successfully added device to habanalabs driver\n");
1560
1561        hdev->init_done = true;
1562
1563        /* After initialization is done, we are ready to receive events from
1564         * the F/W. We can't do it before because we will ignore events and if
1565         * those events are fatal, we won't know about it and the device will
1566         * be operational although it shouldn't be
1567         */
1568        hdev->asic_funcs->enable_events_from_fw(hdev);
1569
1570        return 0;
1571
1572release_ctx:
1573        if (hl_ctx_put(hdev->kernel_ctx) != 1)
1574                dev_err(hdev->dev,
1575                        "kernel ctx is still alive on initialization failure\n");
1576remove_device_from_debugfs:
1577        hl_debugfs_remove_device(hdev);
1578mmu_fini:
1579        hl_mmu_fini(hdev);
1580eq_fini:
1581        hl_eq_fini(hdev, &hdev->event_queue);
1582cq_fini:
1583        for (i = 0 ; i < cq_ready_cnt ; i++)
1584                hl_cq_fini(hdev, &hdev->completion_queue[i]);
1585        kfree(hdev->completion_queue);
1586hw_queues_destroy:
1587        hl_hw_queues_destroy(hdev);
1588sw_fini:
1589        hdev->asic_funcs->sw_fini(hdev);
1590user_interrupts_fini:
1591        kfree(hdev->user_interrupt);
1592early_fini:
1593        device_early_fini(hdev);
1594free_dev_ctrl:
1595        put_device(hdev->dev_ctrl);
1596free_dev:
1597        put_device(hdev->dev);
1598out_disabled:
1599        hdev->disabled = true;
1600        if (add_cdev_sysfs_on_err)
1601                device_cdev_sysfs_add(hdev);
1602        if (hdev->pdev)
1603                dev_err(&hdev->pdev->dev,
1604                        "Failed to initialize hl%d. Device is NOT usable !\n",
1605                        hdev->id / 2);
1606        else
1607                pr_err("Failed to initialize hl%d. Device is NOT usable !\n",
1608                        hdev->id / 2);
1609
1610        return rc;
1611}
1612
1613/*
1614 * hl_device_fini - main tear-down function for habanalabs device
1615 *
1616 * @hdev: pointer to habanalabs device structure
1617 *
1618 * Destroy the device, call ASIC fini functions and release the id
1619 */
1620void hl_device_fini(struct hl_device *hdev)
1621{
1622        ktime_t timeout;
1623        u64 reset_sec;
1624        int i, rc;
1625
1626        dev_info(hdev->dev, "Removing device\n");
1627
1628        hdev->device_fini_pending = 1;
1629        flush_delayed_work(&hdev->device_reset_work.reset_work);
1630
1631        if (hdev->pldm)
1632                reset_sec = HL_PLDM_HARD_RESET_MAX_TIMEOUT;
1633        else
1634                reset_sec = HL_HARD_RESET_MAX_TIMEOUT;
1635
1636        /*
1637         * This function is competing with the reset function, so try to
1638         * take the reset atomic and if we are already in middle of reset,
1639         * wait until reset function is finished. Reset function is designed
1640         * to always finish. However, in Gaudi, because of all the network
1641         * ports, the hard reset could take between 10-30 seconds
1642         */
1643
1644        timeout = ktime_add_us(ktime_get(), reset_sec * 1000 * 1000);
1645        rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
1646        while (rc) {
1647                usleep_range(50, 200);
1648                rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
1649                if (ktime_compare(ktime_get(), timeout) > 0) {
1650                        dev_crit(hdev->dev,
1651                                "Failed to remove device because reset function did not finish\n");
1652                        return;
1653                }
1654        }
1655
1656        /* Disable PCI access from device F/W so it won't send us additional
1657         * interrupts. We disable MSI/MSI-X at the halt_engines function and we
1658         * can't have the F/W sending us interrupts after that. We need to
1659         * disable the access here because if the device is marked disable, the
1660         * message won't be send. Also, in case of heartbeat, the device CPU is
1661         * marked as disable so this message won't be sent
1662         */
1663        hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_DISABLE_PCI_ACCESS);
1664
1665        /* Mark device as disabled */
1666        hdev->disabled = true;
1667
1668        take_release_locks(hdev);
1669
1670        hdev->hard_reset_pending = true;
1671
1672        hl_hwmon_fini(hdev);
1673
1674        cleanup_resources(hdev, true, false);
1675
1676        /* Kill processes here after CS rollback. This is because the process
1677         * can't really exit until all its CSs are done, which is what we
1678         * do in cs rollback
1679         */
1680        dev_info(hdev->dev,
1681                "Waiting for all processes to exit (timeout of %u seconds)",
1682                HL_PENDING_RESET_LONG_SEC);
1683
1684        rc = device_kill_open_processes(hdev, HL_PENDING_RESET_LONG_SEC);
1685        if (rc) {
1686                dev_crit(hdev->dev, "Failed to kill all open processes\n");
1687                device_disable_open_processes(hdev);
1688        }
1689
1690        hl_cb_pool_fini(hdev);
1691
1692        /* Reset the H/W. It will be in idle state after this returns */
1693        hdev->asic_funcs->hw_fini(hdev, true, false);
1694
1695        hdev->fw_loader.linux_loaded = false;
1696
1697        /* Release kernel context */
1698        if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1))
1699                dev_err(hdev->dev, "kernel ctx is still alive\n");
1700
1701        hl_debugfs_remove_device(hdev);
1702
1703        hl_vm_fini(hdev);
1704
1705        hl_mmu_fini(hdev);
1706
1707        hl_eq_fini(hdev, &hdev->event_queue);
1708
1709        for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
1710                hl_cq_fini(hdev, &hdev->completion_queue[i]);
1711        kfree(hdev->completion_queue);
1712        kfree(hdev->user_interrupt);
1713
1714        hl_hw_queues_destroy(hdev);
1715
1716        /* Call ASIC S/W finalize function */
1717        hdev->asic_funcs->sw_fini(hdev);
1718
1719        device_early_fini(hdev);
1720
1721        /* Hide devices and sysfs nodes from user */
1722        device_cdev_sysfs_del(hdev);
1723
1724        pr_info("removed device successfully\n");
1725}
1726
1727/*
1728 * MMIO register access helper functions.
1729 */
1730
1731/*
1732 * hl_rreg - Read an MMIO register
1733 *
1734 * @hdev: pointer to habanalabs device structure
1735 * @reg: MMIO register offset (in bytes)
1736 *
1737 * Returns the value of the MMIO register we are asked to read
1738 *
1739 */
1740inline u32 hl_rreg(struct hl_device *hdev, u32 reg)
1741{
1742        return readl(hdev->rmmio + reg);
1743}
1744
1745/*
1746 * hl_wreg - Write to an MMIO register
1747 *
1748 * @hdev: pointer to habanalabs device structure
1749 * @reg: MMIO register offset (in bytes)
1750 * @val: 32-bit value
1751 *
1752 * Writes the 32-bit value into the MMIO register
1753 *
1754 */
1755inline void hl_wreg(struct hl_device *hdev, u32 reg, u32 val)
1756{
1757        writel(val, hdev->rmmio + reg);
1758}
1759