linux/drivers/misc/habanalabs/command_submission.c
<<
>>
Prefs
   1// SPDX-License-Identifier: GPL-2.0
   2
   3/*
   4 * Copyright 2016-2019 HabanaLabs, Ltd.
   5 * All Rights Reserved.
   6 */
   7
   8#include <uapi/misc/habanalabs.h>
   9#include "habanalabs.h"
  10
  11#include <linux/uaccess.h>
  12#include <linux/slab.h>
  13
  14static void job_wq_completion(struct work_struct *work);
  15static long _hl_cs_wait_ioctl(struct hl_device *hdev,
  16                struct hl_ctx *ctx, u64 timeout_us, u64 seq);
  17static void cs_do_release(struct kref *ref);
  18
  19static const char *hl_fence_get_driver_name(struct dma_fence *fence)
  20{
  21        return "HabanaLabs";
  22}
  23
  24static const char *hl_fence_get_timeline_name(struct dma_fence *fence)
  25{
  26        struct hl_dma_fence *hl_fence =
  27                container_of(fence, struct hl_dma_fence, base_fence);
  28
  29        return dev_name(hl_fence->hdev->dev);
  30}
  31
  32static bool hl_fence_enable_signaling(struct dma_fence *fence)
  33{
  34        return true;
  35}
  36
  37static void hl_fence_release(struct dma_fence *fence)
  38{
  39        struct hl_dma_fence *hl_fence =
  40                container_of(fence, struct hl_dma_fence, base_fence);
  41
  42        kfree_rcu(hl_fence, base_fence.rcu);
  43}
  44
  45static const struct dma_fence_ops hl_fence_ops = {
  46        .get_driver_name = hl_fence_get_driver_name,
  47        .get_timeline_name = hl_fence_get_timeline_name,
  48        .enable_signaling = hl_fence_enable_signaling,
  49        .wait = dma_fence_default_wait,
  50        .release = hl_fence_release
  51};
  52
  53static void cs_get(struct hl_cs *cs)
  54{
  55        kref_get(&cs->refcount);
  56}
  57
  58static int cs_get_unless_zero(struct hl_cs *cs)
  59{
  60        return kref_get_unless_zero(&cs->refcount);
  61}
  62
  63static void cs_put(struct hl_cs *cs)
  64{
  65        kref_put(&cs->refcount, cs_do_release);
  66}
  67
  68/*
  69 * cs_parser - parse the user command submission
  70 *
  71 * @hpriv       : pointer to the private data of the fd
  72 * @job        : pointer to the job that holds the command submission info
  73 *
  74 * The function parses the command submission of the user. It calls the
  75 * ASIC specific parser, which returns a list of memory blocks to send
  76 * to the device as different command buffers
  77 *
  78 */
  79static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
  80{
  81        struct hl_device *hdev = hpriv->hdev;
  82        struct hl_cs_parser parser;
  83        int rc;
  84
  85        parser.ctx_id = job->cs->ctx->asid;
  86        parser.cs_sequence = job->cs->sequence;
  87        parser.job_id = job->id;
  88
  89        parser.hw_queue_id = job->hw_queue_id;
  90        parser.job_userptr_list = &job->userptr_list;
  91        parser.patched_cb = NULL;
  92        parser.user_cb = job->user_cb;
  93        parser.user_cb_size = job->user_cb_size;
  94        parser.ext_queue = job->ext_queue;
  95        job->patched_cb = NULL;
  96
  97        rc = hdev->asic_funcs->cs_parser(hdev, &parser);
  98        if (job->ext_queue) {
  99                if (!rc) {
 100                        job->patched_cb = parser.patched_cb;
 101                        job->job_cb_size = parser.patched_cb_size;
 102
 103                        spin_lock(&job->patched_cb->lock);
 104                        job->patched_cb->cs_cnt++;
 105                        spin_unlock(&job->patched_cb->lock);
 106                }
 107
 108                /*
 109                 * Whether the parsing worked or not, we don't need the
 110                 * original CB anymore because it was already parsed and
 111                 * won't be accessed again for this CS
 112                 */
 113                spin_lock(&job->user_cb->lock);
 114                job->user_cb->cs_cnt--;
 115                spin_unlock(&job->user_cb->lock);
 116                hl_cb_put(job->user_cb);
 117                job->user_cb = NULL;
 118        }
 119
 120        return rc;
 121}
 122
 123static void free_job(struct hl_device *hdev, struct hl_cs_job *job)
 124{
 125        struct hl_cs *cs = job->cs;
 126
 127        if (job->ext_queue) {
 128                hl_userptr_delete_list(hdev, &job->userptr_list);
 129
 130                /*
 131                 * We might arrive here from rollback and patched CB wasn't
 132                 * created, so we need to check it's not NULL
 133                 */
 134                if (job->patched_cb) {
 135                        spin_lock(&job->patched_cb->lock);
 136                        job->patched_cb->cs_cnt--;
 137                        spin_unlock(&job->patched_cb->lock);
 138
 139                        hl_cb_put(job->patched_cb);
 140                }
 141        }
 142
 143        /*
 144         * This is the only place where there can be multiple threads
 145         * modifying the list at the same time
 146         */
 147        spin_lock(&cs->job_lock);
 148        list_del(&job->cs_node);
 149        spin_unlock(&cs->job_lock);
 150
 151        hl_debugfs_remove_job(hdev, job);
 152
 153        if (job->ext_queue)
 154                cs_put(cs);
 155
 156        kfree(job);
 157}
 158
 159static void cs_do_release(struct kref *ref)
 160{
 161        struct hl_cs *cs = container_of(ref, struct hl_cs,
 162                                                refcount);
 163        struct hl_device *hdev = cs->ctx->hdev;
 164        struct hl_cs_job *job, *tmp;
 165
 166        cs->completed = true;
 167
 168        /*
 169         * Although if we reached here it means that all external jobs have
 170         * finished, because each one of them took refcnt to CS, we still
 171         * need to go over the internal jobs and free them. Otherwise, we
 172         * will have leaked memory and what's worse, the CS object (and
 173         * potentially the CTX object) could be released, while the JOB
 174         * still holds a pointer to them (but no reference).
 175         */
 176        list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
 177                free_job(hdev, job);
 178
 179        /* We also need to update CI for internal queues */
 180        if (cs->submitted) {
 181                int cs_cnt = atomic_dec_return(&hdev->cs_active_cnt);
 182
 183                WARN_ONCE((cs_cnt < 0),
 184                        "hl%d: error in CS active cnt %d\n",
 185                        hdev->id, cs_cnt);
 186
 187                hl_int_hw_queue_update_ci(cs);
 188
 189                spin_lock(&hdev->hw_queues_mirror_lock);
 190                /* remove CS from hw_queues mirror list */
 191                list_del_init(&cs->mirror_node);
 192                spin_unlock(&hdev->hw_queues_mirror_lock);
 193
 194                /*
 195                 * Don't cancel TDR in case this CS was timedout because we
 196                 * might be running from the TDR context
 197                 */
 198                if ((!cs->timedout) &&
 199                        (hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT)) {
 200                        struct hl_cs *next;
 201
 202                        if (cs->tdr_active)
 203                                cancel_delayed_work_sync(&cs->work_tdr);
 204
 205                        spin_lock(&hdev->hw_queues_mirror_lock);
 206
 207                        /* queue TDR for next CS */
 208                        next = list_first_entry_or_null(
 209                                        &hdev->hw_queues_mirror_list,
 210                                        struct hl_cs, mirror_node);
 211
 212                        if ((next) && (!next->tdr_active)) {
 213                                next->tdr_active = true;
 214                                schedule_delayed_work(&next->work_tdr,
 215                                                        hdev->timeout_jiffies);
 216                        }
 217
 218                        spin_unlock(&hdev->hw_queues_mirror_lock);
 219                }
 220        }
 221
 222        /*
 223         * Must be called before hl_ctx_put because inside we use ctx to get
 224         * the device
 225         */
 226        hl_debugfs_remove_cs(cs);
 227
 228        hl_ctx_put(cs->ctx);
 229
 230        if (cs->timedout)
 231                dma_fence_set_error(cs->fence, -ETIMEDOUT);
 232        else if (cs->aborted)
 233                dma_fence_set_error(cs->fence, -EIO);
 234
 235        dma_fence_signal(cs->fence);
 236        dma_fence_put(cs->fence);
 237
 238        kfree(cs);
 239}
 240
 241static void cs_timedout(struct work_struct *work)
 242{
 243        struct hl_device *hdev;
 244        int ctx_asid, rc;
 245        struct hl_cs *cs = container_of(work, struct hl_cs,
 246                                                 work_tdr.work);
 247        rc = cs_get_unless_zero(cs);
 248        if (!rc)
 249                return;
 250
 251        if ((!cs->submitted) || (cs->completed)) {
 252                cs_put(cs);
 253                return;
 254        }
 255
 256        /* Mark the CS is timed out so we won't try to cancel its TDR */
 257        cs->timedout = true;
 258
 259        hdev = cs->ctx->hdev;
 260        ctx_asid = cs->ctx->asid;
 261
 262        /* TODO: add information about last signaled seq and last emitted seq */
 263        dev_err(hdev->dev, "User %d command submission %llu got stuck!\n",
 264                ctx_asid, cs->sequence);
 265
 266        cs_put(cs);
 267
 268        if (hdev->reset_on_lockup)
 269                hl_device_reset(hdev, false, false);
 270}
 271
 272static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
 273                        struct hl_cs **cs_new)
 274{
 275        struct hl_dma_fence *fence;
 276        struct dma_fence *other = NULL;
 277        struct hl_cs *cs;
 278        int rc;
 279
 280        cs = kzalloc(sizeof(*cs), GFP_ATOMIC);
 281        if (!cs)
 282                return -ENOMEM;
 283
 284        cs->ctx = ctx;
 285        cs->submitted = false;
 286        cs->completed = false;
 287        INIT_LIST_HEAD(&cs->job_list);
 288        INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout);
 289        kref_init(&cs->refcount);
 290        spin_lock_init(&cs->job_lock);
 291
 292        fence = kmalloc(sizeof(*fence), GFP_ATOMIC);
 293        if (!fence) {
 294                rc = -ENOMEM;
 295                goto free_cs;
 296        }
 297
 298        fence->hdev = hdev;
 299        spin_lock_init(&fence->lock);
 300        cs->fence = &fence->base_fence;
 301
 302        spin_lock(&ctx->cs_lock);
 303
 304        fence->cs_seq = ctx->cs_sequence;
 305        other = ctx->cs_pending[fence->cs_seq & (HL_MAX_PENDING_CS - 1)];
 306        if ((other) && (!dma_fence_is_signaled(other))) {
 307                spin_unlock(&ctx->cs_lock);
 308                rc = -EAGAIN;
 309                goto free_fence;
 310        }
 311
 312        dma_fence_init(&fence->base_fence, &hl_fence_ops, &fence->lock,
 313                        ctx->asid, ctx->cs_sequence);
 314
 315        cs->sequence = fence->cs_seq;
 316
 317        ctx->cs_pending[fence->cs_seq & (HL_MAX_PENDING_CS - 1)] =
 318                                                        &fence->base_fence;
 319        ctx->cs_sequence++;
 320
 321        dma_fence_get(&fence->base_fence);
 322
 323        dma_fence_put(other);
 324
 325        spin_unlock(&ctx->cs_lock);
 326
 327        *cs_new = cs;
 328
 329        return 0;
 330
 331free_fence:
 332        kfree(fence);
 333free_cs:
 334        kfree(cs);
 335        return rc;
 336}
 337
 338static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs)
 339{
 340        struct hl_cs_job *job, *tmp;
 341
 342        list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
 343                free_job(hdev, job);
 344}
 345
 346void hl_cs_rollback_all(struct hl_device *hdev)
 347{
 348        struct hl_cs *cs, *tmp;
 349
 350        /* flush all completions */
 351        flush_workqueue(hdev->cq_wq);
 352
 353        /* Make sure we don't have leftovers in the H/W queues mirror list */
 354        list_for_each_entry_safe(cs, tmp, &hdev->hw_queues_mirror_list,
 355                                mirror_node) {
 356                cs_get(cs);
 357                cs->aborted = true;
 358                dev_warn_ratelimited(hdev->dev, "Killing CS %d.%llu\n",
 359                                        cs->ctx->asid, cs->sequence);
 360                cs_rollback(hdev, cs);
 361                cs_put(cs);
 362        }
 363}
 364
 365static void job_wq_completion(struct work_struct *work)
 366{
 367        struct hl_cs_job *job = container_of(work, struct hl_cs_job,
 368                                                finish_work);
 369        struct hl_cs *cs = job->cs;
 370        struct hl_device *hdev = cs->ctx->hdev;
 371
 372        /* job is no longer needed */
 373        free_job(hdev, job);
 374}
 375
 376static struct hl_cb *validate_queue_index(struct hl_device *hdev,
 377                                        struct hl_cb_mgr *cb_mgr,
 378                                        struct hl_cs_chunk *chunk,
 379                                        bool *ext_queue)
 380{
 381        struct asic_fixed_properties *asic = &hdev->asic_prop;
 382        struct hw_queue_properties *hw_queue_prop;
 383        u32 cb_handle;
 384        struct hl_cb *cb;
 385
 386        /* Assume external queue */
 387        *ext_queue = true;
 388
 389        hw_queue_prop = &asic->hw_queues_props[chunk->queue_index];
 390
 391        if ((chunk->queue_index >= HL_MAX_QUEUES) ||
 392                        (hw_queue_prop->type == QUEUE_TYPE_NA)) {
 393                dev_err(hdev->dev, "Queue index %d is invalid\n",
 394                        chunk->queue_index);
 395                return NULL;
 396        }
 397
 398        if (hw_queue_prop->kmd_only) {
 399                dev_err(hdev->dev, "Queue index %d is restricted for KMD\n",
 400                        chunk->queue_index);
 401                return NULL;
 402        } else if (hw_queue_prop->type == QUEUE_TYPE_INT) {
 403                *ext_queue = false;
 404                return (struct hl_cb *) (uintptr_t) chunk->cb_handle;
 405        }
 406
 407        /* Retrieve CB object */
 408        cb_handle = (u32) (chunk->cb_handle >> PAGE_SHIFT);
 409
 410        cb = hl_cb_get(hdev, cb_mgr, cb_handle);
 411        if (!cb) {
 412                dev_err(hdev->dev, "CB handle 0x%x invalid\n", cb_handle);
 413                return NULL;
 414        }
 415
 416        if ((chunk->cb_size < 8) || (chunk->cb_size > cb->size)) {
 417                dev_err(hdev->dev, "CB size %u invalid\n", chunk->cb_size);
 418                goto release_cb;
 419        }
 420
 421        spin_lock(&cb->lock);
 422        cb->cs_cnt++;
 423        spin_unlock(&cb->lock);
 424
 425        return cb;
 426
 427release_cb:
 428        hl_cb_put(cb);
 429        return NULL;
 430}
 431
 432struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, bool ext_queue)
 433{
 434        struct hl_cs_job *job;
 435
 436        job = kzalloc(sizeof(*job), GFP_ATOMIC);
 437        if (!job)
 438                return NULL;
 439
 440        job->ext_queue = ext_queue;
 441
 442        if (job->ext_queue) {
 443                INIT_LIST_HEAD(&job->userptr_list);
 444                INIT_WORK(&job->finish_work, job_wq_completion);
 445        }
 446
 447        return job;
 448}
 449
 450static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
 451                        u32 num_chunks, u64 *cs_seq)
 452{
 453        struct hl_device *hdev = hpriv->hdev;
 454        struct hl_cs_chunk *cs_chunk_array;
 455        struct hl_cs_job *job;
 456        struct hl_cs *cs;
 457        struct hl_cb *cb;
 458        bool ext_queue_present = false;
 459        u32 size_to_copy;
 460        int rc, i, parse_cnt;
 461
 462        *cs_seq = ULLONG_MAX;
 463
 464        if (num_chunks > HL_MAX_JOBS_PER_CS) {
 465                dev_err(hdev->dev,
 466                        "Number of chunks can NOT be larger than %d\n",
 467                        HL_MAX_JOBS_PER_CS);
 468                rc = -EINVAL;
 469                goto out;
 470        }
 471
 472        cs_chunk_array = kmalloc_array(num_chunks, sizeof(*cs_chunk_array),
 473                                        GFP_ATOMIC);
 474        if (!cs_chunk_array) {
 475                rc = -ENOMEM;
 476                goto out;
 477        }
 478
 479        size_to_copy = num_chunks * sizeof(struct hl_cs_chunk);
 480        if (copy_from_user(cs_chunk_array, chunks, size_to_copy)) {
 481                dev_err(hdev->dev, "Failed to copy cs chunk array from user\n");
 482                rc = -EFAULT;
 483                goto free_cs_chunk_array;
 484        }
 485
 486        /* increment refcnt for context */
 487        hl_ctx_get(hdev, hpriv->ctx);
 488
 489        rc = allocate_cs(hdev, hpriv->ctx, &cs);
 490        if (rc) {
 491                hl_ctx_put(hpriv->ctx);
 492                goto free_cs_chunk_array;
 493        }
 494
 495        *cs_seq = cs->sequence;
 496
 497        hl_debugfs_add_cs(cs);
 498
 499        /* Validate ALL the CS chunks before submitting the CS */
 500        for (i = 0, parse_cnt = 0 ; i < num_chunks ; i++, parse_cnt++) {
 501                struct hl_cs_chunk *chunk = &cs_chunk_array[i];
 502                bool ext_queue;
 503
 504                cb = validate_queue_index(hdev, &hpriv->cb_mgr, chunk,
 505                                        &ext_queue);
 506                if (ext_queue) {
 507                        ext_queue_present = true;
 508                        if (!cb) {
 509                                rc = -EINVAL;
 510                                goto free_cs_object;
 511                        }
 512                }
 513
 514                job = hl_cs_allocate_job(hdev, ext_queue);
 515                if (!job) {
 516                        dev_err(hdev->dev, "Failed to allocate a new job\n");
 517                        rc = -ENOMEM;
 518                        if (ext_queue)
 519                                goto release_cb;
 520                        else
 521                                goto free_cs_object;
 522                }
 523
 524                job->id = i + 1;
 525                job->cs = cs;
 526                job->user_cb = cb;
 527                job->user_cb_size = chunk->cb_size;
 528                if (job->ext_queue)
 529                        job->job_cb_size = cb->size;
 530                else
 531                        job->job_cb_size = chunk->cb_size;
 532                job->hw_queue_id = chunk->queue_index;
 533
 534                cs->jobs_in_queue_cnt[job->hw_queue_id]++;
 535
 536                list_add_tail(&job->cs_node, &cs->job_list);
 537
 538                /*
 539                 * Increment CS reference. When CS reference is 0, CS is
 540                 * done and can be signaled to user and free all its resources
 541                 * Only increment for JOB on external queues, because only
 542                 * for those JOBs we get completion
 543                 */
 544                if (job->ext_queue)
 545                        cs_get(cs);
 546
 547                hl_debugfs_add_job(hdev, job);
 548
 549                rc = cs_parser(hpriv, job);
 550                if (rc) {
 551                        dev_err(hdev->dev,
 552                                "Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n",
 553                                cs->ctx->asid, cs->sequence, job->id, rc);
 554                        goto free_cs_object;
 555                }
 556        }
 557
 558        if (!ext_queue_present) {
 559                dev_err(hdev->dev,
 560                        "Reject CS %d.%llu because no external queues jobs\n",
 561                        cs->ctx->asid, cs->sequence);
 562                rc = -EINVAL;
 563                goto free_cs_object;
 564        }
 565
 566        rc = hl_hw_queue_schedule_cs(cs);
 567        if (rc) {
 568                dev_err(hdev->dev,
 569                        "Failed to submit CS %d.%llu to H/W queues, error %d\n",
 570                        cs->ctx->asid, cs->sequence, rc);
 571                goto free_cs_object;
 572        }
 573
 574        rc = HL_CS_STATUS_SUCCESS;
 575        goto put_cs;
 576
 577release_cb:
 578        spin_lock(&cb->lock);
 579        cb->cs_cnt--;
 580        spin_unlock(&cb->lock);
 581        hl_cb_put(cb);
 582free_cs_object:
 583        cs_rollback(hdev, cs);
 584        *cs_seq = ULLONG_MAX;
 585        /* The path below is both for good and erroneous exits */
 586put_cs:
 587        /* We finished with the CS in this function, so put the ref */
 588        cs_put(cs);
 589free_cs_chunk_array:
 590        kfree(cs_chunk_array);
 591out:
 592        return rc;
 593}
 594
 595int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
 596{
 597        struct hl_device *hdev = hpriv->hdev;
 598        union hl_cs_args *args = data;
 599        struct hl_ctx *ctx = hpriv->ctx;
 600        void __user *chunks;
 601        u32 num_chunks;
 602        u64 cs_seq = ULONG_MAX;
 603        int rc, do_ctx_switch;
 604        bool need_soft_reset = false;
 605
 606        if (hl_device_disabled_or_in_reset(hdev)) {
 607                dev_warn_ratelimited(hdev->dev,
 608                        "Device is %s. Can't submit new CS\n",
 609                        atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
 610                rc = -EBUSY;
 611                goto out;
 612        }
 613
 614        do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
 615
 616        if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) {
 617                long ret;
 618
 619                chunks = (void __user *)(uintptr_t)args->in.chunks_restore;
 620                num_chunks = args->in.num_chunks_restore;
 621
 622                mutex_lock(&hpriv->restore_phase_mutex);
 623
 624                if (do_ctx_switch) {
 625                        rc = hdev->asic_funcs->context_switch(hdev, ctx->asid);
 626                        if (rc) {
 627                                dev_err_ratelimited(hdev->dev,
 628                                        "Failed to switch to context %d, rejecting CS! %d\n",
 629                                        ctx->asid, rc);
 630                                /*
 631                                 * If we timedout, or if the device is not IDLE
 632                                 * while we want to do context-switch (-EBUSY),
 633                                 * we need to soft-reset because QMAN is
 634                                 * probably stuck. However, we can't call to
 635                                 * reset here directly because of deadlock, so
 636                                 * need to do it at the very end of this
 637                                 * function
 638                                 */
 639                                if ((rc == -ETIMEDOUT) || (rc == -EBUSY))
 640                                        need_soft_reset = true;
 641                                mutex_unlock(&hpriv->restore_phase_mutex);
 642                                goto out;
 643                        }
 644                }
 645
 646                hdev->asic_funcs->restore_phase_topology(hdev);
 647
 648                if (num_chunks == 0) {
 649                        dev_dbg(hdev->dev,
 650                        "Need to run restore phase but restore CS is empty\n");
 651                        rc = 0;
 652                } else {
 653                        rc = _hl_cs_ioctl(hpriv, chunks, num_chunks,
 654                                                &cs_seq);
 655                }
 656
 657                mutex_unlock(&hpriv->restore_phase_mutex);
 658
 659                if (rc) {
 660                        dev_err(hdev->dev,
 661                                "Failed to submit restore CS for context %d (%d)\n",
 662                                ctx->asid, rc);
 663                        goto out;
 664                }
 665
 666                /* Need to wait for restore completion before execution phase */
 667                if (num_chunks > 0) {
 668                        ret = _hl_cs_wait_ioctl(hdev, ctx,
 669                                        jiffies_to_usecs(hdev->timeout_jiffies),
 670                                        cs_seq);
 671                        if (ret <= 0) {
 672                                dev_err(hdev->dev,
 673                                        "Restore CS for context %d failed to complete %ld\n",
 674                                        ctx->asid, ret);
 675                                rc = -ENOEXEC;
 676                                goto out;
 677                        }
 678                }
 679
 680                ctx->thread_ctx_switch_wait_token = 1;
 681        } else if (!ctx->thread_ctx_switch_wait_token) {
 682                u32 tmp;
 683
 684                rc = hl_poll_timeout_memory(hdev,
 685                        &ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1),
 686                        100, jiffies_to_usecs(hdev->timeout_jiffies), false);
 687
 688                if (rc == -ETIMEDOUT) {
 689                        dev_err(hdev->dev,
 690                                "context switch phase timeout (%d)\n", tmp);
 691                        goto out;
 692                }
 693        }
 694
 695        chunks = (void __user *)(uintptr_t)args->in.chunks_execute;
 696        num_chunks = args->in.num_chunks_execute;
 697
 698        if (num_chunks == 0) {
 699                dev_err(hdev->dev,
 700                        "Got execute CS with 0 chunks, context %d\n",
 701                        ctx->asid);
 702                rc = -EINVAL;
 703                goto out;
 704        }
 705
 706        rc = _hl_cs_ioctl(hpriv, chunks, num_chunks, &cs_seq);
 707
 708out:
 709        if (rc != -EAGAIN) {
 710                memset(args, 0, sizeof(*args));
 711                args->out.status = rc;
 712                args->out.seq = cs_seq;
 713        }
 714
 715        if (((rc == -ETIMEDOUT) || (rc == -EBUSY)) && (need_soft_reset))
 716                hl_device_reset(hdev, false, false);
 717
 718        return rc;
 719}
 720
 721static long _hl_cs_wait_ioctl(struct hl_device *hdev,
 722                struct hl_ctx *ctx, u64 timeout_us, u64 seq)
 723{
 724        struct dma_fence *fence;
 725        unsigned long timeout;
 726        long rc;
 727
 728        if (timeout_us == MAX_SCHEDULE_TIMEOUT)
 729                timeout = timeout_us;
 730        else
 731                timeout = usecs_to_jiffies(timeout_us);
 732
 733        hl_ctx_get(hdev, ctx);
 734
 735        fence = hl_ctx_get_fence(ctx, seq);
 736        if (IS_ERR(fence)) {
 737                rc = PTR_ERR(fence);
 738        } else if (fence) {
 739                rc = dma_fence_wait_timeout(fence, true, timeout);
 740                if (fence->error == -ETIMEDOUT)
 741                        rc = -ETIMEDOUT;
 742                else if (fence->error == -EIO)
 743                        rc = -EIO;
 744                dma_fence_put(fence);
 745        } else
 746                rc = 1;
 747
 748        hl_ctx_put(ctx);
 749
 750        return rc;
 751}
 752
 753int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
 754{
 755        struct hl_device *hdev = hpriv->hdev;
 756        union hl_wait_cs_args *args = data;
 757        u64 seq = args->in.seq;
 758        long rc;
 759
 760        rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq);
 761
 762        memset(args, 0, sizeof(*args));
 763
 764        if (rc < 0) {
 765                dev_err(hdev->dev, "Error %ld on waiting for CS handle %llu\n",
 766                        rc, seq);
 767                if (rc == -ERESTARTSYS) {
 768                        args->out.status = HL_WAIT_CS_STATUS_INTERRUPTED;
 769                        rc = -EINTR;
 770                } else if (rc == -ETIMEDOUT) {
 771                        args->out.status = HL_WAIT_CS_STATUS_TIMEDOUT;
 772                } else if (rc == -EIO) {
 773                        args->out.status = HL_WAIT_CS_STATUS_ABORTED;
 774                }
 775                return rc;
 776        }
 777
 778        if (rc == 0)
 779                args->out.status = HL_WAIT_CS_STATUS_BUSY;
 780        else
 781                args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
 782
 783        return 0;
 784}
 785