linux/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
<<
>>
Prefs
   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 */
  23
  24#include <linux/ratelimit.h>
  25#include <linux/printk.h>
  26#include <linux/slab.h>
  27#include <linux/list.h>
  28#include <linux/types.h>
  29#include <linux/bitops.h>
  30#include <linux/sched.h>
  31#include "kfd_priv.h"
  32#include "kfd_device_queue_manager.h"
  33#include "kfd_mqd_manager.h"
  34#include "cik_regs.h"
  35#include "kfd_kernel_queue.h"
  36#include "amdgpu_amdkfd.h"
  37
  38/* Size of the per-pipe EOP queue */
  39#define CIK_HPD_EOP_BYTES_LOG2 11
  40#define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2)
  41
  42static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
  43                                  u32 pasid, unsigned int vmid);
  44
  45static int execute_queues_cpsch(struct device_queue_manager *dqm,
  46                                enum kfd_unmap_queues_filter filter,
  47                                uint32_t filter_param);
  48static int unmap_queues_cpsch(struct device_queue_manager *dqm,
  49                                enum kfd_unmap_queues_filter filter,
  50                                uint32_t filter_param);
  51
  52static int map_queues_cpsch(struct device_queue_manager *dqm);
  53
  54static void deallocate_sdma_queue(struct device_queue_manager *dqm,
  55                                struct queue *q);
  56
  57static inline void deallocate_hqd(struct device_queue_manager *dqm,
  58                                struct queue *q);
  59static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q);
  60static int allocate_sdma_queue(struct device_queue_manager *dqm,
  61                                struct queue *q);
  62static void kfd_process_hw_exception(struct work_struct *work);
  63
  64static inline
  65enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
  66{
  67        if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI)
  68                return KFD_MQD_TYPE_SDMA;
  69        return KFD_MQD_TYPE_CP;
  70}
  71
  72static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe)
  73{
  74        int i;
  75        int pipe_offset = (mec * dqm->dev->shared_resources.num_pipe_per_mec
  76                + pipe) * dqm->dev->shared_resources.num_queue_per_pipe;
  77
  78        /* queue is available for KFD usage if bit is 1 */
  79        for (i = 0; i <  dqm->dev->shared_resources.num_queue_per_pipe; ++i)
  80                if (test_bit(pipe_offset + i,
  81                              dqm->dev->shared_resources.cp_queue_bitmap))
  82                        return true;
  83        return false;
  84}
  85
  86unsigned int get_cp_queues_num(struct device_queue_manager *dqm)
  87{
  88        return bitmap_weight(dqm->dev->shared_resources.cp_queue_bitmap,
  89                                KGD_MAX_QUEUES);
  90}
  91
  92unsigned int get_queues_per_pipe(struct device_queue_manager *dqm)
  93{
  94        return dqm->dev->shared_resources.num_queue_per_pipe;
  95}
  96
  97unsigned int get_pipes_per_mec(struct device_queue_manager *dqm)
  98{
  99        return dqm->dev->shared_resources.num_pipe_per_mec;
 100}
 101
 102static unsigned int get_num_sdma_engines(struct device_queue_manager *dqm)
 103{
 104        return dqm->dev->device_info->num_sdma_engines;
 105}
 106
 107static unsigned int get_num_xgmi_sdma_engines(struct device_queue_manager *dqm)
 108{
 109        return dqm->dev->device_info->num_xgmi_sdma_engines;
 110}
 111
 112static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm)
 113{
 114        return get_num_sdma_engines(dqm) + get_num_xgmi_sdma_engines(dqm);
 115}
 116
 117unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
 118{
 119        return dqm->dev->device_info->num_sdma_engines
 120                        * dqm->dev->device_info->num_sdma_queues_per_engine;
 121}
 122
 123unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm)
 124{
 125        return dqm->dev->device_info->num_xgmi_sdma_engines
 126                        * dqm->dev->device_info->num_sdma_queues_per_engine;
 127}
 128
 129void program_sh_mem_settings(struct device_queue_manager *dqm,
 130                                        struct qcm_process_device *qpd)
 131{
 132        return dqm->dev->kfd2kgd->program_sh_mem_settings(
 133                                                dqm->dev->kgd, qpd->vmid,
 134                                                qpd->sh_mem_config,
 135                                                qpd->sh_mem_ape1_base,
 136                                                qpd->sh_mem_ape1_limit,
 137                                                qpd->sh_mem_bases);
 138}
 139
 140static void increment_queue_count(struct device_queue_manager *dqm,
 141                        enum kfd_queue_type type)
 142{
 143        dqm->active_queue_count++;
 144        if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ)
 145                dqm->active_cp_queue_count++;
 146}
 147
 148static void decrement_queue_count(struct device_queue_manager *dqm,
 149                        enum kfd_queue_type type)
 150{
 151        dqm->active_queue_count--;
 152        if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ)
 153                dqm->active_cp_queue_count--;
 154}
 155
 156static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
 157{
 158        struct kfd_dev *dev = qpd->dqm->dev;
 159
 160        if (!KFD_IS_SOC15(dev->device_info->asic_family)) {
 161                /* On pre-SOC15 chips we need to use the queue ID to
 162                 * preserve the user mode ABI.
 163                 */
 164                q->doorbell_id = q->properties.queue_id;
 165        } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
 166                        q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
 167                /* For SDMA queues on SOC15 with 8-byte doorbell, use static
 168                 * doorbell assignments based on the engine and queue id.
 169                 * The doobell index distance between RLC (2*i) and (2*i+1)
 170                 * for a SDMA engine is 512.
 171                 */
 172                uint32_t *idx_offset =
 173                                dev->shared_resources.sdma_doorbell_idx;
 174
 175                q->doorbell_id = idx_offset[q->properties.sdma_engine_id]
 176                        + (q->properties.sdma_queue_id & 1)
 177                        * KFD_QUEUE_DOORBELL_MIRROR_OFFSET
 178                        + (q->properties.sdma_queue_id >> 1);
 179        } else {
 180                /* For CP queues on SOC15 reserve a free doorbell ID */
 181                unsigned int found;
 182
 183                found = find_first_zero_bit(qpd->doorbell_bitmap,
 184                                            KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
 185                if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
 186                        pr_debug("No doorbells available");
 187                        return -EBUSY;
 188                }
 189                set_bit(found, qpd->doorbell_bitmap);
 190                q->doorbell_id = found;
 191        }
 192
 193        q->properties.doorbell_off =
 194                kfd_get_doorbell_dw_offset_in_bar(dev, qpd_to_pdd(qpd),
 195                                          q->doorbell_id);
 196        return 0;
 197}
 198
 199static void deallocate_doorbell(struct qcm_process_device *qpd,
 200                                struct queue *q)
 201{
 202        unsigned int old;
 203        struct kfd_dev *dev = qpd->dqm->dev;
 204
 205        if (!KFD_IS_SOC15(dev->device_info->asic_family) ||
 206            q->properties.type == KFD_QUEUE_TYPE_SDMA ||
 207            q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
 208                return;
 209
 210        old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap);
 211        WARN_ON(!old);
 212}
 213
 214static void program_trap_handler_settings(struct device_queue_manager *dqm,
 215                                struct qcm_process_device *qpd)
 216{
 217        if (dqm->dev->kfd2kgd->program_trap_handler_settings)
 218                dqm->dev->kfd2kgd->program_trap_handler_settings(
 219                                                dqm->dev->kgd, qpd->vmid,
 220                                                qpd->tba_addr, qpd->tma_addr);
 221}
 222
 223static int allocate_vmid(struct device_queue_manager *dqm,
 224                        struct qcm_process_device *qpd,
 225                        struct queue *q)
 226{
 227        int allocated_vmid = -1, i;
 228
 229        for (i = dqm->dev->vm_info.first_vmid_kfd;
 230                        i <= dqm->dev->vm_info.last_vmid_kfd; i++) {
 231                if (!dqm->vmid_pasid[i]) {
 232                        allocated_vmid = i;
 233                        break;
 234                }
 235        }
 236
 237        if (allocated_vmid < 0) {
 238                pr_err("no more vmid to allocate\n");
 239                return -ENOSPC;
 240        }
 241
 242        pr_debug("vmid allocated: %d\n", allocated_vmid);
 243
 244        dqm->vmid_pasid[allocated_vmid] = q->process->pasid;
 245
 246        set_pasid_vmid_mapping(dqm, q->process->pasid, allocated_vmid);
 247
 248        qpd->vmid = allocated_vmid;
 249        q->properties.vmid = allocated_vmid;
 250
 251        program_sh_mem_settings(dqm, qpd);
 252
 253        if (dqm->dev->device_info->asic_family >= CHIP_VEGA10 &&
 254            dqm->dev->cwsr_enabled)
 255                program_trap_handler_settings(dqm, qpd);
 256
 257        /* qpd->page_table_base is set earlier when register_process()
 258         * is called, i.e. when the first queue is created.
 259         */
 260        dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->kgd,
 261                        qpd->vmid,
 262                        qpd->page_table_base);
 263        /* invalidate the VM context after pasid and vmid mapping is set up */
 264        kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
 265
 266        if (dqm->dev->kfd2kgd->set_scratch_backing_va)
 267                dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->kgd,
 268                                qpd->sh_hidden_private_base, qpd->vmid);
 269
 270        return 0;
 271}
 272
 273static int flush_texture_cache_nocpsch(struct kfd_dev *kdev,
 274                                struct qcm_process_device *qpd)
 275{
 276        const struct packet_manager_funcs *pmf = qpd->dqm->packet_mgr.pmf;
 277        int ret;
 278
 279        if (!qpd->ib_kaddr)
 280                return -ENOMEM;
 281
 282        ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr);
 283        if (ret)
 284                return ret;
 285
 286        return amdgpu_amdkfd_submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid,
 287                                qpd->ib_base, (uint32_t *)qpd->ib_kaddr,
 288                                pmf->release_mem_size / sizeof(uint32_t));
 289}
 290
 291static void deallocate_vmid(struct device_queue_manager *dqm,
 292                                struct qcm_process_device *qpd,
 293                                struct queue *q)
 294{
 295        /* On GFX v7, CP doesn't flush TC at dequeue */
 296        if (q->device->device_info->asic_family == CHIP_HAWAII)
 297                if (flush_texture_cache_nocpsch(q->device, qpd))
 298                        pr_err("Failed to flush TC\n");
 299
 300        kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
 301
 302        /* Release the vmid mapping */
 303        set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
 304        dqm->vmid_pasid[qpd->vmid] = 0;
 305
 306        qpd->vmid = 0;
 307        q->properties.vmid = 0;
 308}
 309
 310static int create_queue_nocpsch(struct device_queue_manager *dqm,
 311                                struct queue *q,
 312                                struct qcm_process_device *qpd)
 313{
 314        struct mqd_manager *mqd_mgr;
 315        int retval;
 316
 317        dqm_lock(dqm);
 318
 319        if (dqm->total_queue_count >= max_num_of_queues_per_device) {
 320                pr_warn("Can't create new usermode queue because %d queues were already created\n",
 321                                dqm->total_queue_count);
 322                retval = -EPERM;
 323                goto out_unlock;
 324        }
 325
 326        if (list_empty(&qpd->queues_list)) {
 327                retval = allocate_vmid(dqm, qpd, q);
 328                if (retval)
 329                        goto out_unlock;
 330        }
 331        q->properties.vmid = qpd->vmid;
 332        /*
 333         * Eviction state logic: mark all queues as evicted, even ones
 334         * not currently active. Restoring inactive queues later only
 335         * updates the is_evicted flag but is a no-op otherwise.
 336         */
 337        q->properties.is_evicted = !!qpd->evicted;
 338
 339        q->properties.tba_addr = qpd->tba_addr;
 340        q->properties.tma_addr = qpd->tma_addr;
 341
 342        mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
 343                        q->properties.type)];
 344        if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
 345                retval = allocate_hqd(dqm, q);
 346                if (retval)
 347                        goto deallocate_vmid;
 348                pr_debug("Loading mqd to hqd on pipe %d, queue %d\n",
 349                        q->pipe, q->queue);
 350        } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
 351                q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
 352                retval = allocate_sdma_queue(dqm, q);
 353                if (retval)
 354                        goto deallocate_vmid;
 355                dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
 356        }
 357
 358        retval = allocate_doorbell(qpd, q);
 359        if (retval)
 360                goto out_deallocate_hqd;
 361
 362        /* Temporarily release dqm lock to avoid a circular lock dependency */
 363        dqm_unlock(dqm);
 364        q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties);
 365        dqm_lock(dqm);
 366
 367        if (!q->mqd_mem_obj) {
 368                retval = -ENOMEM;
 369                goto out_deallocate_doorbell;
 370        }
 371        mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
 372                                &q->gart_mqd_addr, &q->properties);
 373        if (q->properties.is_active) {
 374                if (!dqm->sched_running) {
 375                        WARN_ONCE(1, "Load non-HWS mqd while stopped\n");
 376                        goto add_queue_to_list;
 377                }
 378
 379                if (WARN(q->process->mm != current->mm,
 380                                        "should only run in user thread"))
 381                        retval = -EFAULT;
 382                else
 383                        retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
 384                                        q->queue, &q->properties, current->mm);
 385                if (retval)
 386                        goto out_free_mqd;
 387        }
 388
 389add_queue_to_list:
 390        list_add(&q->list, &qpd->queues_list);
 391        qpd->queue_count++;
 392        if (q->properties.is_active)
 393                increment_queue_count(dqm, q->properties.type);
 394
 395        /*
 396         * Unconditionally increment this counter, regardless of the queue's
 397         * type or whether the queue is active.
 398         */
 399        dqm->total_queue_count++;
 400        pr_debug("Total of %d queues are accountable so far\n",
 401                        dqm->total_queue_count);
 402        goto out_unlock;
 403
 404out_free_mqd:
 405        mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
 406out_deallocate_doorbell:
 407        deallocate_doorbell(qpd, q);
 408out_deallocate_hqd:
 409        if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
 410                deallocate_hqd(dqm, q);
 411        else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
 412                q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
 413                deallocate_sdma_queue(dqm, q);
 414deallocate_vmid:
 415        if (list_empty(&qpd->queues_list))
 416                deallocate_vmid(dqm, qpd, q);
 417out_unlock:
 418        dqm_unlock(dqm);
 419        return retval;
 420}
 421
 422static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
 423{
 424        bool set;
 425        int pipe, bit, i;
 426
 427        set = false;
 428
 429        for (pipe = dqm->next_pipe_to_allocate, i = 0;
 430                        i < get_pipes_per_mec(dqm);
 431                        pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) {
 432
 433                if (!is_pipe_enabled(dqm, 0, pipe))
 434                        continue;
 435
 436                if (dqm->allocated_queues[pipe] != 0) {
 437                        bit = ffs(dqm->allocated_queues[pipe]) - 1;
 438                        dqm->allocated_queues[pipe] &= ~(1 << bit);
 439                        q->pipe = pipe;
 440                        q->queue = bit;
 441                        set = true;
 442                        break;
 443                }
 444        }
 445
 446        if (!set)
 447                return -EBUSY;
 448
 449        pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue);
 450        /* horizontal hqd allocation */
 451        dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm);
 452
 453        return 0;
 454}
 455
 456static inline void deallocate_hqd(struct device_queue_manager *dqm,
 457                                struct queue *q)
 458{
 459        dqm->allocated_queues[q->pipe] |= (1 << q->queue);
 460}
 461
 462/* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked
 463 * to avoid asynchronized access
 464 */
 465static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
 466                                struct qcm_process_device *qpd,
 467                                struct queue *q)
 468{
 469        int retval;
 470        struct mqd_manager *mqd_mgr;
 471
 472        mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
 473                        q->properties.type)];
 474
 475        if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
 476                deallocate_hqd(dqm, q);
 477        else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
 478                deallocate_sdma_queue(dqm, q);
 479        else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
 480                deallocate_sdma_queue(dqm, q);
 481        else {
 482                pr_debug("q->properties.type %d is invalid\n",
 483                                q->properties.type);
 484                return -EINVAL;
 485        }
 486        dqm->total_queue_count--;
 487
 488        deallocate_doorbell(qpd, q);
 489
 490        if (!dqm->sched_running) {
 491                WARN_ONCE(1, "Destroy non-HWS queue while stopped\n");
 492                return 0;
 493        }
 494
 495        retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
 496                                KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
 497                                KFD_UNMAP_LATENCY_MS,
 498                                q->pipe, q->queue);
 499        if (retval == -ETIME)
 500                qpd->reset_wavefronts = true;
 501
 502        list_del(&q->list);
 503        if (list_empty(&qpd->queues_list)) {
 504                if (qpd->reset_wavefronts) {
 505                        pr_warn("Resetting wave fronts (nocpsch) on dev %p\n",
 506                                        dqm->dev);
 507                        /* dbgdev_wave_reset_wavefronts has to be called before
 508                         * deallocate_vmid(), i.e. when vmid is still in use.
 509                         */
 510                        dbgdev_wave_reset_wavefronts(dqm->dev,
 511                                        qpd->pqm->process);
 512                        qpd->reset_wavefronts = false;
 513                }
 514
 515                deallocate_vmid(dqm, qpd, q);
 516        }
 517        qpd->queue_count--;
 518        if (q->properties.is_active) {
 519                decrement_queue_count(dqm, q->properties.type);
 520                if (q->properties.is_gws) {
 521                        dqm->gws_queue_count--;
 522                        qpd->mapped_gws_queue = false;
 523                }
 524        }
 525
 526        return retval;
 527}
 528
 529static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
 530                                struct qcm_process_device *qpd,
 531                                struct queue *q)
 532{
 533        int retval;
 534        uint64_t sdma_val = 0;
 535        struct kfd_process_device *pdd = qpd_to_pdd(qpd);
 536        struct mqd_manager *mqd_mgr =
 537                dqm->mqd_mgrs[get_mqd_type_from_queue_type(q->properties.type)];
 538
 539        /* Get the SDMA queue stats */
 540        if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
 541            (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
 542                retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr,
 543                                                        &sdma_val);
 544                if (retval)
 545                        pr_err("Failed to read SDMA queue counter for queue: %d\n",
 546                                q->properties.queue_id);
 547        }
 548
 549        dqm_lock(dqm);
 550        retval = destroy_queue_nocpsch_locked(dqm, qpd, q);
 551        if (!retval)
 552                pdd->sdma_past_activity_counter += sdma_val;
 553        dqm_unlock(dqm);
 554
 555        mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
 556
 557        return retval;
 558}
 559
 560static int update_queue(struct device_queue_manager *dqm, struct queue *q)
 561{
 562        int retval = 0;
 563        struct mqd_manager *mqd_mgr;
 564        struct kfd_process_device *pdd;
 565        bool prev_active = false;
 566
 567        dqm_lock(dqm);
 568        pdd = kfd_get_process_device_data(q->device, q->process);
 569        if (!pdd) {
 570                retval = -ENODEV;
 571                goto out_unlock;
 572        }
 573        mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
 574                        q->properties.type)];
 575
 576        /* Save previous activity state for counters */
 577        prev_active = q->properties.is_active;
 578
 579        /* Make sure the queue is unmapped before updating the MQD */
 580        if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
 581                retval = unmap_queues_cpsch(dqm,
 582                                KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
 583                if (retval) {
 584                        pr_err("unmap queue failed\n");
 585                        goto out_unlock;
 586                }
 587        } else if (prev_active &&
 588                   (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
 589                    q->properties.type == KFD_QUEUE_TYPE_SDMA ||
 590                    q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
 591
 592                if (!dqm->sched_running) {
 593                        WARN_ONCE(1, "Update non-HWS queue while stopped\n");
 594                        goto out_unlock;
 595                }
 596
 597                retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
 598                                (dqm->dev->cwsr_enabled?
 599                                 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
 600                                KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN),
 601                                KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
 602                if (retval) {
 603                        pr_err("destroy mqd failed\n");
 604                        goto out_unlock;
 605                }
 606        }
 607
 608        mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties);
 609
 610        /*
 611         * check active state vs. the previous state and modify
 612         * counter accordingly. map_queues_cpsch uses the
 613         * dqm->active_queue_count to determine whether a new runlist must be
 614         * uploaded.
 615         */
 616        if (q->properties.is_active && !prev_active)
 617                increment_queue_count(dqm, q->properties.type);
 618        else if (!q->properties.is_active && prev_active)
 619                decrement_queue_count(dqm, q->properties.type);
 620
 621        if (q->gws && !q->properties.is_gws) {
 622                if (q->properties.is_active) {
 623                        dqm->gws_queue_count++;
 624                        pdd->qpd.mapped_gws_queue = true;
 625                }
 626                q->properties.is_gws = true;
 627        } else if (!q->gws && q->properties.is_gws) {
 628                if (q->properties.is_active) {
 629                        dqm->gws_queue_count--;
 630                        pdd->qpd.mapped_gws_queue = false;
 631                }
 632                q->properties.is_gws = false;
 633        }
 634
 635        if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS)
 636                retval = map_queues_cpsch(dqm);
 637        else if (q->properties.is_active &&
 638                 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
 639                  q->properties.type == KFD_QUEUE_TYPE_SDMA ||
 640                  q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
 641                if (WARN(q->process->mm != current->mm,
 642                         "should only run in user thread"))
 643                        retval = -EFAULT;
 644                else
 645                        retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd,
 646                                                   q->pipe, q->queue,
 647                                                   &q->properties, current->mm);
 648        }
 649
 650out_unlock:
 651        dqm_unlock(dqm);
 652        return retval;
 653}
 654
 655static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
 656                                        struct qcm_process_device *qpd)
 657{
 658        struct queue *q;
 659        struct mqd_manager *mqd_mgr;
 660        struct kfd_process_device *pdd;
 661        int retval, ret = 0;
 662
 663        dqm_lock(dqm);
 664        if (qpd->evicted++ > 0) /* already evicted, do nothing */
 665                goto out;
 666
 667        pdd = qpd_to_pdd(qpd);
 668        pr_debug_ratelimited("Evicting PASID 0x%x queues\n",
 669                            pdd->process->pasid);
 670
 671        pdd->last_evict_timestamp = get_jiffies_64();
 672        /* Mark all queues as evicted. Deactivate all active queues on
 673         * the qpd.
 674         */
 675        list_for_each_entry(q, &qpd->queues_list, list) {
 676                q->properties.is_evicted = true;
 677                if (!q->properties.is_active)
 678                        continue;
 679
 680                mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
 681                                q->properties.type)];
 682                q->properties.is_active = false;
 683                decrement_queue_count(dqm, q->properties.type);
 684                if (q->properties.is_gws) {
 685                        dqm->gws_queue_count--;
 686                        qpd->mapped_gws_queue = false;
 687                }
 688
 689                if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n"))
 690                        continue;
 691
 692                retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
 693                                (dqm->dev->cwsr_enabled?
 694                                 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE:
 695                                KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN),
 696                                KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
 697                if (retval && !ret)
 698                        /* Return the first error, but keep going to
 699                         * maintain a consistent eviction state
 700                         */
 701                        ret = retval;
 702        }
 703
 704out:
 705        dqm_unlock(dqm);
 706        return ret;
 707}
 708
 709static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
 710                                      struct qcm_process_device *qpd)
 711{
 712        struct queue *q;
 713        struct kfd_process_device *pdd;
 714        int retval = 0;
 715
 716        dqm_lock(dqm);
 717        if (qpd->evicted++ > 0) /* already evicted, do nothing */
 718                goto out;
 719
 720        pdd = qpd_to_pdd(qpd);
 721        pr_debug_ratelimited("Evicting PASID 0x%x queues\n",
 722                            pdd->process->pasid);
 723
 724        /* Mark all queues as evicted. Deactivate all active queues on
 725         * the qpd.
 726         */
 727        list_for_each_entry(q, &qpd->queues_list, list) {
 728                q->properties.is_evicted = true;
 729                if (!q->properties.is_active)
 730                        continue;
 731
 732                q->properties.is_active = false;
 733                decrement_queue_count(dqm, q->properties.type);
 734        }
 735        pdd->last_evict_timestamp = get_jiffies_64();
 736        retval = execute_queues_cpsch(dqm,
 737                                qpd->is_debug ?
 738                                KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
 739                                KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
 740
 741out:
 742        dqm_unlock(dqm);
 743        return retval;
 744}
 745
 746static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
 747                                          struct qcm_process_device *qpd)
 748{
 749        struct mm_struct *mm = NULL;
 750        struct queue *q;
 751        struct mqd_manager *mqd_mgr;
 752        struct kfd_process_device *pdd;
 753        uint64_t pd_base;
 754        uint64_t eviction_duration;
 755        int retval, ret = 0;
 756
 757        pdd = qpd_to_pdd(qpd);
 758        /* Retrieve PD base */
 759        pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
 760
 761        dqm_lock(dqm);
 762        if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
 763                goto out;
 764        if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
 765                qpd->evicted--;
 766                goto out;
 767        }
 768
 769        pr_debug_ratelimited("Restoring PASID 0x%x queues\n",
 770                            pdd->process->pasid);
 771
 772        /* Update PD Base in QPD */
 773        qpd->page_table_base = pd_base;
 774        pr_debug("Updated PD address to 0x%llx\n", pd_base);
 775
 776        if (!list_empty(&qpd->queues_list)) {
 777                dqm->dev->kfd2kgd->set_vm_context_page_table_base(
 778                                dqm->dev->kgd,
 779                                qpd->vmid,
 780                                qpd->page_table_base);
 781                kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY);
 782        }
 783
 784        /* Take a safe reference to the mm_struct, which may otherwise
 785         * disappear even while the kfd_process is still referenced.
 786         */
 787        mm = get_task_mm(pdd->process->lead_thread);
 788        if (!mm) {
 789                ret = -EFAULT;
 790                goto out;
 791        }
 792
 793        /* Remove the eviction flags. Activate queues that are not
 794         * inactive for other reasons.
 795         */
 796        list_for_each_entry(q, &qpd->queues_list, list) {
 797                q->properties.is_evicted = false;
 798                if (!QUEUE_IS_ACTIVE(q->properties))
 799                        continue;
 800
 801                mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
 802                                q->properties.type)];
 803                q->properties.is_active = true;
 804                increment_queue_count(dqm, q->properties.type);
 805                if (q->properties.is_gws) {
 806                        dqm->gws_queue_count++;
 807                        qpd->mapped_gws_queue = true;
 808                }
 809
 810                if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n"))
 811                        continue;
 812
 813                retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
 814                                       q->queue, &q->properties, mm);
 815                if (retval && !ret)
 816                        /* Return the first error, but keep going to
 817                         * maintain a consistent eviction state
 818                         */
 819                        ret = retval;
 820        }
 821        qpd->evicted = 0;
 822        eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp;
 823        atomic64_add(eviction_duration, &pdd->evict_duration_counter);
 824out:
 825        if (mm)
 826                mmput(mm);
 827        dqm_unlock(dqm);
 828        return ret;
 829}
 830
 831static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
 832                                        struct qcm_process_device *qpd)
 833{
 834        struct queue *q;
 835        struct kfd_process_device *pdd;
 836        uint64_t pd_base;
 837        uint64_t eviction_duration;
 838        int retval = 0;
 839
 840        pdd = qpd_to_pdd(qpd);
 841        /* Retrieve PD base */
 842        pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
 843
 844        dqm_lock(dqm);
 845        if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
 846                goto out;
 847        if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
 848                qpd->evicted--;
 849                goto out;
 850        }
 851
 852        pr_debug_ratelimited("Restoring PASID 0x%x queues\n",
 853                            pdd->process->pasid);
 854
 855        /* Update PD Base in QPD */
 856        qpd->page_table_base = pd_base;
 857        pr_debug("Updated PD address to 0x%llx\n", pd_base);
 858
 859        /* activate all active queues on the qpd */
 860        list_for_each_entry(q, &qpd->queues_list, list) {
 861                q->properties.is_evicted = false;
 862                if (!QUEUE_IS_ACTIVE(q->properties))
 863                        continue;
 864
 865                q->properties.is_active = true;
 866                increment_queue_count(dqm, q->properties.type);
 867        }
 868        retval = execute_queues_cpsch(dqm,
 869                                KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
 870        qpd->evicted = 0;
 871        eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp;
 872        atomic64_add(eviction_duration, &pdd->evict_duration_counter);
 873out:
 874        dqm_unlock(dqm);
 875        return retval;
 876}
 877
 878static int register_process(struct device_queue_manager *dqm,
 879                                        struct qcm_process_device *qpd)
 880{
 881        struct device_process_node *n;
 882        struct kfd_process_device *pdd;
 883        uint64_t pd_base;
 884        int retval;
 885
 886        n = kzalloc(sizeof(*n), GFP_KERNEL);
 887        if (!n)
 888                return -ENOMEM;
 889
 890        n->qpd = qpd;
 891
 892        pdd = qpd_to_pdd(qpd);
 893        /* Retrieve PD base */
 894        pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
 895
 896        dqm_lock(dqm);
 897        list_add(&n->list, &dqm->queues);
 898
 899        /* Update PD Base in QPD */
 900        qpd->page_table_base = pd_base;
 901        pr_debug("Updated PD address to 0x%llx\n", pd_base);
 902
 903        retval = dqm->asic_ops.update_qpd(dqm, qpd);
 904
 905        dqm->processes_count++;
 906
 907        dqm_unlock(dqm);
 908
 909        /* Outside the DQM lock because under the DQM lock we can't do
 910         * reclaim or take other locks that others hold while reclaiming.
 911         */
 912        kfd_inc_compute_active(dqm->dev);
 913
 914        return retval;
 915}
 916
 917static int unregister_process(struct device_queue_manager *dqm,
 918                                        struct qcm_process_device *qpd)
 919{
 920        int retval;
 921        struct device_process_node *cur, *next;
 922
 923        pr_debug("qpd->queues_list is %s\n",
 924                        list_empty(&qpd->queues_list) ? "empty" : "not empty");
 925
 926        retval = 0;
 927        dqm_lock(dqm);
 928
 929        list_for_each_entry_safe(cur, next, &dqm->queues, list) {
 930                if (qpd == cur->qpd) {
 931                        list_del(&cur->list);
 932                        kfree(cur);
 933                        dqm->processes_count--;
 934                        goto out;
 935                }
 936        }
 937        /* qpd not found in dqm list */
 938        retval = 1;
 939out:
 940        dqm_unlock(dqm);
 941
 942        /* Outside the DQM lock because under the DQM lock we can't do
 943         * reclaim or take other locks that others hold while reclaiming.
 944         */
 945        if (!retval)
 946                kfd_dec_compute_active(dqm->dev);
 947
 948        return retval;
 949}
 950
 951static int
 952set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid,
 953                        unsigned int vmid)
 954{
 955        return dqm->dev->kfd2kgd->set_pasid_vmid_mapping(
 956                                                dqm->dev->kgd, pasid, vmid);
 957}
 958
 959static void init_interrupts(struct device_queue_manager *dqm)
 960{
 961        unsigned int i;
 962
 963        for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++)
 964                if (is_pipe_enabled(dqm, 0, i))
 965                        dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, i);
 966}
 967
 968static int initialize_nocpsch(struct device_queue_manager *dqm)
 969{
 970        int pipe, queue;
 971
 972        pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
 973
 974        dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm),
 975                                        sizeof(unsigned int), GFP_KERNEL);
 976        if (!dqm->allocated_queues)
 977                return -ENOMEM;
 978
 979        mutex_init(&dqm->lock_hidden);
 980        INIT_LIST_HEAD(&dqm->queues);
 981        dqm->active_queue_count = dqm->next_pipe_to_allocate = 0;
 982        dqm->active_cp_queue_count = 0;
 983        dqm->gws_queue_count = 0;
 984
 985        for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
 986                int pipe_offset = pipe * get_queues_per_pipe(dqm);
 987
 988                for (queue = 0; queue < get_queues_per_pipe(dqm); queue++)
 989                        if (test_bit(pipe_offset + queue,
 990                                     dqm->dev->shared_resources.cp_queue_bitmap))
 991                                dqm->allocated_queues[pipe] |= 1 << queue;
 992        }
 993
 994        memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid));
 995
 996        dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm));
 997        dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm));
 998
 999        return 0;
1000}
1001
1002static void uninitialize(struct device_queue_manager *dqm)
1003{
1004        int i;
1005
1006        WARN_ON(dqm->active_queue_count > 0 || dqm->processes_count > 0);
1007
1008        kfree(dqm->allocated_queues);
1009        for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
1010                kfree(dqm->mqd_mgrs[i]);
1011        mutex_destroy(&dqm->lock_hidden);
1012}
1013
1014static int start_nocpsch(struct device_queue_manager *dqm)
1015{
1016        pr_info("SW scheduler is used");
1017        init_interrupts(dqm);
1018        
1019        if (dqm->dev->device_info->asic_family == CHIP_HAWAII)
1020                return pm_init(&dqm->packet_mgr, dqm);
1021        dqm->sched_running = true;
1022
1023        return 0;
1024}
1025
1026static int stop_nocpsch(struct device_queue_manager *dqm)
1027{
1028        if (dqm->dev->device_info->asic_family == CHIP_HAWAII)
1029                pm_uninit(&dqm->packet_mgr, false);
1030        dqm->sched_running = false;
1031
1032        return 0;
1033}
1034
1035static void pre_reset(struct device_queue_manager *dqm)
1036{
1037        dqm_lock(dqm);
1038        dqm->is_resetting = true;
1039        dqm_unlock(dqm);
1040}
1041
1042static int allocate_sdma_queue(struct device_queue_manager *dqm,
1043                                struct queue *q)
1044{
1045        int bit;
1046
1047        if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
1048                if (dqm->sdma_bitmap == 0) {
1049                        pr_err("No more SDMA queue to allocate\n");
1050                        return -ENOMEM;
1051                }
1052
1053                bit = __ffs64(dqm->sdma_bitmap);
1054                dqm->sdma_bitmap &= ~(1ULL << bit);
1055                q->sdma_id = bit;
1056                q->properties.sdma_engine_id = q->sdma_id %
1057                                get_num_sdma_engines(dqm);
1058                q->properties.sdma_queue_id = q->sdma_id /
1059                                get_num_sdma_engines(dqm);
1060        } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1061                if (dqm->xgmi_sdma_bitmap == 0) {
1062                        pr_err("No more XGMI SDMA queue to allocate\n");
1063                        return -ENOMEM;
1064                }
1065                bit = __ffs64(dqm->xgmi_sdma_bitmap);
1066                dqm->xgmi_sdma_bitmap &= ~(1ULL << bit);
1067                q->sdma_id = bit;
1068                /* sdma_engine_id is sdma id including
1069                 * both PCIe-optimized SDMAs and XGMI-
1070                 * optimized SDMAs. The calculation below
1071                 * assumes the first N engines are always
1072                 * PCIe-optimized ones
1073                 */
1074                q->properties.sdma_engine_id = get_num_sdma_engines(dqm) +
1075                                q->sdma_id % get_num_xgmi_sdma_engines(dqm);
1076                q->properties.sdma_queue_id = q->sdma_id /
1077                                get_num_xgmi_sdma_engines(dqm);
1078        }
1079
1080        pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
1081        pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
1082
1083        return 0;
1084}
1085
1086static void deallocate_sdma_queue(struct device_queue_manager *dqm,
1087                                struct queue *q)
1088{
1089        if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
1090                if (q->sdma_id >= get_num_sdma_queues(dqm))
1091                        return;
1092                dqm->sdma_bitmap |= (1ULL << q->sdma_id);
1093        } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1094                if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm))
1095                        return;
1096                dqm->xgmi_sdma_bitmap |= (1ULL << q->sdma_id);
1097        }
1098}
1099
1100/*
1101 * Device Queue Manager implementation for cp scheduler
1102 */
1103
1104static int set_sched_resources(struct device_queue_manager *dqm)
1105{
1106        int i, mec;
1107        struct scheduling_resources res;
1108
1109        res.vmid_mask = dqm->dev->shared_resources.compute_vmid_bitmap;
1110
1111        res.queue_mask = 0;
1112        for (i = 0; i < KGD_MAX_QUEUES; ++i) {
1113                mec = (i / dqm->dev->shared_resources.num_queue_per_pipe)
1114                        / dqm->dev->shared_resources.num_pipe_per_mec;
1115
1116                if (!test_bit(i, dqm->dev->shared_resources.cp_queue_bitmap))
1117                        continue;
1118
1119                /* only acquire queues from the first MEC */
1120                if (mec > 0)
1121                        continue;
1122
1123                /* This situation may be hit in the future if a new HW
1124                 * generation exposes more than 64 queues. If so, the
1125                 * definition of res.queue_mask needs updating
1126                 */
1127                if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) {
1128                        pr_err("Invalid queue enabled by amdgpu: %d\n", i);
1129                        break;
1130                }
1131
1132                res.queue_mask |= 1ull
1133                        << amdgpu_queue_mask_bit_to_set_resource_bit(
1134                                (struct amdgpu_device *)dqm->dev->kgd, i);
1135        }
1136        res.gws_mask = ~0ull;
1137        res.oac_mask = res.gds_heap_base = res.gds_heap_size = 0;
1138
1139        pr_debug("Scheduling resources:\n"
1140                        "vmid mask: 0x%8X\n"
1141                        "queue mask: 0x%8llX\n",
1142                        res.vmid_mask, res.queue_mask);
1143
1144        return pm_send_set_resources(&dqm->packet_mgr, &res);
1145}
1146
1147static int initialize_cpsch(struct device_queue_manager *dqm)
1148{
1149        uint64_t num_sdma_queues;
1150        uint64_t num_xgmi_sdma_queues;
1151
1152        pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
1153
1154        mutex_init(&dqm->lock_hidden);
1155        INIT_LIST_HEAD(&dqm->queues);
1156        dqm->active_queue_count = dqm->processes_count = 0;
1157        dqm->active_cp_queue_count = 0;
1158        dqm->gws_queue_count = 0;
1159        dqm->active_runlist = false;
1160
1161        num_sdma_queues = get_num_sdma_queues(dqm);
1162        if (num_sdma_queues >= BITS_PER_TYPE(dqm->sdma_bitmap))
1163                dqm->sdma_bitmap = ULLONG_MAX;
1164        else
1165                dqm->sdma_bitmap = (BIT_ULL(num_sdma_queues) - 1);
1166
1167        num_xgmi_sdma_queues = get_num_xgmi_sdma_queues(dqm);
1168        if (num_xgmi_sdma_queues >= BITS_PER_TYPE(dqm->xgmi_sdma_bitmap))
1169                dqm->xgmi_sdma_bitmap = ULLONG_MAX;
1170        else
1171                dqm->xgmi_sdma_bitmap = (BIT_ULL(num_xgmi_sdma_queues) - 1);
1172
1173        INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception);
1174
1175        return 0;
1176}
1177
1178static int start_cpsch(struct device_queue_manager *dqm)
1179{
1180        int retval;
1181
1182        retval = 0;
1183
1184        dqm_lock(dqm);
1185        retval = pm_init(&dqm->packet_mgr, dqm);
1186        if (retval)
1187                goto fail_packet_manager_init;
1188
1189        retval = set_sched_resources(dqm);
1190        if (retval)
1191                goto fail_set_sched_resources;
1192
1193        pr_debug("Allocating fence memory\n");
1194
1195        /* allocate fence memory on the gart */
1196        retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr),
1197                                        &dqm->fence_mem);
1198
1199        if (retval)
1200                goto fail_allocate_vidmem;
1201
1202        dqm->fence_addr = (uint64_t *)dqm->fence_mem->cpu_ptr;
1203        dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr;
1204
1205        init_interrupts(dqm);
1206
1207        /* clear hang status when driver try to start the hw scheduler */
1208        dqm->is_hws_hang = false;
1209        dqm->is_resetting = false;
1210        dqm->sched_running = true;
1211        execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1212        dqm_unlock(dqm);
1213
1214        return 0;
1215fail_allocate_vidmem:
1216fail_set_sched_resources:
1217        pm_uninit(&dqm->packet_mgr, false);
1218fail_packet_manager_init:
1219        dqm_unlock(dqm);
1220        return retval;
1221}
1222
1223static int stop_cpsch(struct device_queue_manager *dqm)
1224{
1225        bool hanging;
1226
1227        dqm_lock(dqm);
1228        if (!dqm->is_hws_hang)
1229                unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
1230        hanging = dqm->is_hws_hang || dqm->is_resetting;
1231        dqm->sched_running = false;
1232
1233        pm_release_ib(&dqm->packet_mgr);
1234
1235        kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
1236        pm_uninit(&dqm->packet_mgr, hanging);
1237        dqm_unlock(dqm);
1238
1239        return 0;
1240}
1241
1242static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
1243                                        struct kernel_queue *kq,
1244                                        struct qcm_process_device *qpd)
1245{
1246        dqm_lock(dqm);
1247        if (dqm->total_queue_count >= max_num_of_queues_per_device) {
1248                pr_warn("Can't create new kernel queue because %d queues were already created\n",
1249                                dqm->total_queue_count);
1250                dqm_unlock(dqm);
1251                return -EPERM;
1252        }
1253
1254        /*
1255         * Unconditionally increment this counter, regardless of the queue's
1256         * type or whether the queue is active.
1257         */
1258        dqm->total_queue_count++;
1259        pr_debug("Total of %d queues are accountable so far\n",
1260                        dqm->total_queue_count);
1261
1262        list_add(&kq->list, &qpd->priv_queue_list);
1263        increment_queue_count(dqm, kq->queue->properties.type);
1264        qpd->is_debug = true;
1265        execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1266        dqm_unlock(dqm);
1267
1268        return 0;
1269}
1270
1271static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
1272                                        struct kernel_queue *kq,
1273                                        struct qcm_process_device *qpd)
1274{
1275        dqm_lock(dqm);
1276        list_del(&kq->list);
1277        decrement_queue_count(dqm, kq->queue->properties.type);
1278        qpd->is_debug = false;
1279        execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
1280        /*
1281         * Unconditionally decrement this counter, regardless of the queue's
1282         * type.
1283         */
1284        dqm->total_queue_count--;
1285        pr_debug("Total of %d queues are accountable so far\n",
1286                        dqm->total_queue_count);
1287        dqm_unlock(dqm);
1288}
1289
1290static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
1291                        struct qcm_process_device *qpd)
1292{
1293        int retval;
1294        struct mqd_manager *mqd_mgr;
1295
1296        if (dqm->total_queue_count >= max_num_of_queues_per_device) {
1297                pr_warn("Can't create new usermode queue because %d queues were already created\n",
1298                                dqm->total_queue_count);
1299                retval = -EPERM;
1300                goto out;
1301        }
1302
1303        if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
1304                q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1305                dqm_lock(dqm);
1306                retval = allocate_sdma_queue(dqm, q);
1307                dqm_unlock(dqm);
1308                if (retval)
1309                        goto out;
1310        }
1311
1312        retval = allocate_doorbell(qpd, q);
1313        if (retval)
1314                goto out_deallocate_sdma_queue;
1315
1316        mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1317                        q->properties.type)];
1318
1319        if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
1320                q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
1321                dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
1322        q->properties.tba_addr = qpd->tba_addr;
1323        q->properties.tma_addr = qpd->tma_addr;
1324        q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties);
1325        if (!q->mqd_mem_obj) {
1326                retval = -ENOMEM;
1327                goto out_deallocate_doorbell;
1328        }
1329
1330        dqm_lock(dqm);
1331        /*
1332         * Eviction state logic: mark all queues as evicted, even ones
1333         * not currently active. Restoring inactive queues later only
1334         * updates the is_evicted flag but is a no-op otherwise.
1335         */
1336        q->properties.is_evicted = !!qpd->evicted;
1337        mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
1338                                &q->gart_mqd_addr, &q->properties);
1339
1340        list_add(&q->list, &qpd->queues_list);
1341        qpd->queue_count++;
1342
1343        if (q->properties.is_active) {
1344                increment_queue_count(dqm, q->properties.type);
1345
1346                execute_queues_cpsch(dqm,
1347                                KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1348        }
1349
1350        /*
1351         * Unconditionally increment this counter, regardless of the queue's
1352         * type or whether the queue is active.
1353         */
1354        dqm->total_queue_count++;
1355
1356        pr_debug("Total of %d queues are accountable so far\n",
1357                        dqm->total_queue_count);
1358
1359        dqm_unlock(dqm);
1360        return retval;
1361
1362out_deallocate_doorbell:
1363        deallocate_doorbell(qpd, q);
1364out_deallocate_sdma_queue:
1365        if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
1366                q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1367                dqm_lock(dqm);
1368                deallocate_sdma_queue(dqm, q);
1369                dqm_unlock(dqm);
1370        }
1371out:
1372        return retval;
1373}
1374
1375int amdkfd_fence_wait_timeout(uint64_t *fence_addr,
1376                                uint64_t fence_value,
1377                                unsigned int timeout_ms)
1378{
1379        unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies;
1380
1381        while (*fence_addr != fence_value) {
1382                if (time_after(jiffies, end_jiffies)) {
1383                        pr_err("qcm fence wait loop timeout expired\n");
1384                        /* In HWS case, this is used to halt the driver thread
1385                         * in order not to mess up CP states before doing
1386                         * scandumps for FW debugging.
1387                         */
1388                        while (halt_if_hws_hang)
1389                                schedule();
1390
1391                        return -ETIME;
1392                }
1393                schedule();
1394        }
1395
1396        return 0;
1397}
1398
1399/* dqm->lock mutex has to be locked before calling this function */
1400static int map_queues_cpsch(struct device_queue_manager *dqm)
1401{
1402        int retval;
1403
1404        if (!dqm->sched_running)
1405                return 0;
1406        if (dqm->active_queue_count <= 0 || dqm->processes_count <= 0)
1407                return 0;
1408        if (dqm->active_runlist)
1409                return 0;
1410
1411        retval = pm_send_runlist(&dqm->packet_mgr, &dqm->queues);
1412        pr_debug("%s sent runlist\n", __func__);
1413        if (retval) {
1414                pr_err("failed to execute runlist\n");
1415                return retval;
1416        }
1417        dqm->active_runlist = true;
1418
1419        return retval;
1420}
1421
1422/* dqm->lock mutex has to be locked before calling this function */
1423static int unmap_queues_cpsch(struct device_queue_manager *dqm,
1424                                enum kfd_unmap_queues_filter filter,
1425                                uint32_t filter_param)
1426{
1427        int retval = 0;
1428        struct mqd_manager *mqd_mgr;
1429
1430        if (!dqm->sched_running)
1431                return 0;
1432        if (dqm->is_hws_hang)
1433                return -EIO;
1434        if (!dqm->active_runlist)
1435                return retval;
1436
1437        retval = pm_send_unmap_queue(&dqm->packet_mgr, KFD_QUEUE_TYPE_COMPUTE,
1438                        filter, filter_param, false, 0);
1439        if (retval)
1440                return retval;
1441
1442        *dqm->fence_addr = KFD_FENCE_INIT;
1443        pm_send_query_status(&dqm->packet_mgr, dqm->fence_gpu_addr,
1444                                KFD_FENCE_COMPLETED);
1445        /* should be timed out */
1446        retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
1447                                queue_preemption_timeout_ms);
1448        if (retval) {
1449                pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n");
1450                dqm->is_hws_hang = true;
1451                /* It's possible we're detecting a HWS hang in the
1452                 * middle of a GPU reset. No need to schedule another
1453                 * reset in this case.
1454                 */
1455                if (!dqm->is_resetting)
1456                        schedule_work(&dqm->hw_exception_work);
1457                return retval;
1458        }
1459
1460        /* In the current MEC firmware implementation, if compute queue
1461         * doesn't response to the preemption request in time, HIQ will
1462         * abandon the unmap request without returning any timeout error
1463         * to driver. Instead, MEC firmware will log the doorbell of the
1464         * unresponding compute queue to HIQ.MQD.queue_doorbell_id fields.
1465         * To make sure the queue unmap was successful, driver need to
1466         * check those fields
1467         */
1468        mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ];
1469        if (mqd_mgr->read_doorbell_id(dqm->packet_mgr.priv_queue->queue->mqd)) {
1470                pr_err("HIQ MQD's queue_doorbell_id0 is not 0, Queue preemption time out\n");
1471                while (halt_if_hws_hang)
1472                        schedule();
1473                return -ETIME;
1474        }
1475
1476        pm_release_ib(&dqm->packet_mgr);
1477        dqm->active_runlist = false;
1478
1479        return retval;
1480}
1481
1482/* dqm->lock mutex has to be locked before calling this function */
1483static int execute_queues_cpsch(struct device_queue_manager *dqm,
1484                                enum kfd_unmap_queues_filter filter,
1485                                uint32_t filter_param)
1486{
1487        int retval;
1488
1489        if (dqm->is_hws_hang)
1490                return -EIO;
1491        retval = unmap_queues_cpsch(dqm, filter, filter_param);
1492        if (retval)
1493                return retval;
1494
1495        return map_queues_cpsch(dqm);
1496}
1497
1498static int destroy_queue_cpsch(struct device_queue_manager *dqm,
1499                                struct qcm_process_device *qpd,
1500                                struct queue *q)
1501{
1502        int retval;
1503        struct mqd_manager *mqd_mgr;
1504        uint64_t sdma_val = 0;
1505        struct kfd_process_device *pdd = qpd_to_pdd(qpd);
1506
1507        /* Get the SDMA queue stats */
1508        if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
1509            (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
1510                retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr,
1511                                                        &sdma_val);
1512                if (retval)
1513                        pr_err("Failed to read SDMA queue counter for queue: %d\n",
1514                                q->properties.queue_id);
1515        }
1516
1517        retval = 0;
1518
1519        /* remove queue from list to prevent rescheduling after preemption */
1520        dqm_lock(dqm);
1521
1522        if (qpd->is_debug) {
1523                /*
1524                 * error, currently we do not allow to destroy a queue
1525                 * of a currently debugged process
1526                 */
1527                retval = -EBUSY;
1528                goto failed_try_destroy_debugged_queue;
1529
1530        }
1531
1532        mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1533                        q->properties.type)];
1534
1535        deallocate_doorbell(qpd, q);
1536
1537        if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
1538            (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
1539                deallocate_sdma_queue(dqm, q);
1540                pdd->sdma_past_activity_counter += sdma_val;
1541        }
1542
1543        list_del(&q->list);
1544        qpd->queue_count--;
1545        if (q->properties.is_active) {
1546                decrement_queue_count(dqm, q->properties.type);
1547                retval = execute_queues_cpsch(dqm,
1548                                KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1549                if (retval == -ETIME)
1550                        qpd->reset_wavefronts = true;
1551                if (q->properties.is_gws) {
1552                        dqm->gws_queue_count--;
1553                        qpd->mapped_gws_queue = false;
1554                }
1555        }
1556
1557        /*
1558         * Unconditionally decrement this counter, regardless of the queue's
1559         * type
1560         */
1561        dqm->total_queue_count--;
1562        pr_debug("Total of %d queues are accountable so far\n",
1563                        dqm->total_queue_count);
1564
1565        dqm_unlock(dqm);
1566
1567        /* Do free_mqd after dqm_unlock(dqm) to avoid circular locking */
1568        mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
1569
1570        return retval;
1571
1572failed_try_destroy_debugged_queue:
1573
1574        dqm_unlock(dqm);
1575        return retval;
1576}
1577
1578/*
1579 * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
1580 * stay in user mode.
1581 */
1582#define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
1583/* APE1 limit is inclusive and 64K aligned. */
1584#define APE1_LIMIT_ALIGNMENT 0xFFFF
1585
1586static bool set_cache_memory_policy(struct device_queue_manager *dqm,
1587                                   struct qcm_process_device *qpd,
1588                                   enum cache_policy default_policy,
1589                                   enum cache_policy alternate_policy,
1590                                   void __user *alternate_aperture_base,
1591                                   uint64_t alternate_aperture_size)
1592{
1593        bool retval = true;
1594
1595        if (!dqm->asic_ops.set_cache_memory_policy)
1596                return retval;
1597
1598        dqm_lock(dqm);
1599
1600        if (alternate_aperture_size == 0) {
1601                /* base > limit disables APE1 */
1602                qpd->sh_mem_ape1_base = 1;
1603                qpd->sh_mem_ape1_limit = 0;
1604        } else {
1605                /*
1606                 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
1607                 *                      SH_MEM_APE1_BASE[31:0], 0x0000 }
1608                 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
1609                 *                      SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
1610                 * Verify that the base and size parameters can be
1611                 * represented in this format and convert them.
1612                 * Additionally restrict APE1 to user-mode addresses.
1613                 */
1614
1615                uint64_t base = (uintptr_t)alternate_aperture_base;
1616                uint64_t limit = base + alternate_aperture_size - 1;
1617
1618                if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
1619                   (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
1620                        retval = false;
1621                        goto out;
1622                }
1623
1624                qpd->sh_mem_ape1_base = base >> 16;
1625                qpd->sh_mem_ape1_limit = limit >> 16;
1626        }
1627
1628        retval = dqm->asic_ops.set_cache_memory_policy(
1629                        dqm,
1630                        qpd,
1631                        default_policy,
1632                        alternate_policy,
1633                        alternate_aperture_base,
1634                        alternate_aperture_size);
1635
1636        if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
1637                program_sh_mem_settings(dqm, qpd);
1638
1639        pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n",
1640                qpd->sh_mem_config, qpd->sh_mem_ape1_base,
1641                qpd->sh_mem_ape1_limit);
1642
1643out:
1644        dqm_unlock(dqm);
1645        return retval;
1646}
1647
1648static int process_termination_nocpsch(struct device_queue_manager *dqm,
1649                struct qcm_process_device *qpd)
1650{
1651        struct queue *q;
1652        struct device_process_node *cur, *next_dpn;
1653        int retval = 0;
1654        bool found = false;
1655
1656        dqm_lock(dqm);
1657
1658        /* Clear all user mode queues */
1659        while (!list_empty(&qpd->queues_list)) {
1660                struct mqd_manager *mqd_mgr;
1661                int ret;
1662
1663                q = list_first_entry(&qpd->queues_list, struct queue, list);
1664                mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1665                                q->properties.type)];
1666                ret = destroy_queue_nocpsch_locked(dqm, qpd, q);
1667                if (ret)
1668                        retval = ret;
1669                dqm_unlock(dqm);
1670                mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
1671                dqm_lock(dqm);
1672        }
1673
1674        /* Unregister process */
1675        list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
1676                if (qpd == cur->qpd) {
1677                        list_del(&cur->list);
1678                        kfree(cur);
1679                        dqm->processes_count--;
1680                        found = true;
1681                        break;
1682                }
1683        }
1684
1685        dqm_unlock(dqm);
1686
1687        /* Outside the DQM lock because under the DQM lock we can't do
1688         * reclaim or take other locks that others hold while reclaiming.
1689         */
1690        if (found)
1691                kfd_dec_compute_active(dqm->dev);
1692
1693        return retval;
1694}
1695
1696static int get_wave_state(struct device_queue_manager *dqm,
1697                          struct queue *q,
1698                          void __user *ctl_stack,
1699                          u32 *ctl_stack_used_size,
1700                          u32 *save_area_used_size)
1701{
1702        struct mqd_manager *mqd_mgr;
1703
1704        dqm_lock(dqm);
1705
1706        mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP];
1707
1708        if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE ||
1709            q->properties.is_active || !q->device->cwsr_enabled ||
1710            !mqd_mgr->get_wave_state) {
1711                dqm_unlock(dqm);
1712                return -EINVAL;
1713        }
1714
1715        dqm_unlock(dqm);
1716
1717        /*
1718         * get_wave_state is outside the dqm lock to prevent circular locking
1719         * and the queue should be protected against destruction by the process
1720         * lock.
1721         */
1722        return mqd_mgr->get_wave_state(mqd_mgr, q->mqd, ctl_stack,
1723                        ctl_stack_used_size, save_area_used_size);
1724}
1725
1726static int process_termination_cpsch(struct device_queue_manager *dqm,
1727                struct qcm_process_device *qpd)
1728{
1729        int retval;
1730        struct queue *q;
1731        struct kernel_queue *kq, *kq_next;
1732        struct mqd_manager *mqd_mgr;
1733        struct device_process_node *cur, *next_dpn;
1734        enum kfd_unmap_queues_filter filter =
1735                KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES;
1736        bool found = false;
1737
1738        retval = 0;
1739
1740        dqm_lock(dqm);
1741
1742        /* Clean all kernel queues */
1743        list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) {
1744                list_del(&kq->list);
1745                decrement_queue_count(dqm, kq->queue->properties.type);
1746                qpd->is_debug = false;
1747                dqm->total_queue_count--;
1748                filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES;
1749        }
1750
1751        /* Clear all user mode queues */
1752        list_for_each_entry(q, &qpd->queues_list, list) {
1753                if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
1754                        deallocate_sdma_queue(dqm, q);
1755                else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
1756                        deallocate_sdma_queue(dqm, q);
1757
1758                if (q->properties.is_active) {
1759                        decrement_queue_count(dqm, q->properties.type);
1760                        if (q->properties.is_gws) {
1761                                dqm->gws_queue_count--;
1762                                qpd->mapped_gws_queue = false;
1763                        }
1764                }
1765
1766                dqm->total_queue_count--;
1767        }
1768
1769        /* Unregister process */
1770        list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
1771                if (qpd == cur->qpd) {
1772                        list_del(&cur->list);
1773                        kfree(cur);
1774                        dqm->processes_count--;
1775                        found = true;
1776                        break;
1777                }
1778        }
1779
1780        retval = execute_queues_cpsch(dqm, filter, 0);
1781        if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) {
1782                pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev);
1783                dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process);
1784                qpd->reset_wavefronts = false;
1785        }
1786
1787        /* Lastly, free mqd resources.
1788         * Do free_mqd() after dqm_unlock to avoid circular locking.
1789         */
1790        while (!list_empty(&qpd->queues_list)) {
1791                q = list_first_entry(&qpd->queues_list, struct queue, list);
1792                mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
1793                                q->properties.type)];
1794                list_del(&q->list);
1795                qpd->queue_count--;
1796                dqm_unlock(dqm);
1797                mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
1798                dqm_lock(dqm);
1799        }
1800        dqm_unlock(dqm);
1801
1802        /* Outside the DQM lock because under the DQM lock we can't do
1803         * reclaim or take other locks that others hold while reclaiming.
1804         */
1805        if (found)
1806                kfd_dec_compute_active(dqm->dev);
1807
1808        return retval;
1809}
1810
1811static int init_mqd_managers(struct device_queue_manager *dqm)
1812{
1813        int i, j;
1814        struct mqd_manager *mqd_mgr;
1815
1816        for (i = 0; i < KFD_MQD_TYPE_MAX; i++) {
1817                mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev);
1818                if (!mqd_mgr) {
1819                        pr_err("mqd manager [%d] initialization failed\n", i);
1820                        goto out_free;
1821                }
1822                dqm->mqd_mgrs[i] = mqd_mgr;
1823        }
1824
1825        return 0;
1826
1827out_free:
1828        for (j = 0; j < i; j++) {
1829                kfree(dqm->mqd_mgrs[j]);
1830                dqm->mqd_mgrs[j] = NULL;
1831        }
1832
1833        return -ENOMEM;
1834}
1835
1836/* Allocate one hiq mqd (HWS) and all SDMA mqd in a continuous trunk*/
1837static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm)
1838{
1839        int retval;
1840        struct kfd_dev *dev = dqm->dev;
1841        struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd;
1842        uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size *
1843                get_num_all_sdma_engines(dqm) *
1844                dev->device_info->num_sdma_queues_per_engine +
1845                dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;
1846
1847        retval = amdgpu_amdkfd_alloc_gtt_mem(dev->kgd, size,
1848                &(mem_obj->gtt_mem), &(mem_obj->gpu_addr),
1849                (void *)&(mem_obj->cpu_ptr), false);
1850
1851        return retval;
1852}
1853
1854struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
1855{
1856        struct device_queue_manager *dqm;
1857
1858        pr_debug("Loading device queue manager\n");
1859
1860        dqm = kzalloc(sizeof(*dqm), GFP_KERNEL);
1861        if (!dqm)
1862                return NULL;
1863
1864        switch (dev->device_info->asic_family) {
1865        /* HWS is not available on Hawaii. */
1866        case CHIP_HAWAII:
1867        /* HWS depends on CWSR for timely dequeue. CWSR is not
1868         * available on Tonga.
1869         *
1870         * FIXME: This argument also applies to Kaveri.
1871         */
1872        case CHIP_TONGA:
1873                dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS;
1874                break;
1875        default:
1876                dqm->sched_policy = sched_policy;
1877                break;
1878        }
1879
1880        dqm->dev = dev;
1881        switch (dqm->sched_policy) {
1882        case KFD_SCHED_POLICY_HWS:
1883        case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION:
1884                /* initialize dqm for cp scheduling */
1885                dqm->ops.create_queue = create_queue_cpsch;
1886                dqm->ops.initialize = initialize_cpsch;
1887                dqm->ops.start = start_cpsch;
1888                dqm->ops.stop = stop_cpsch;
1889                dqm->ops.pre_reset = pre_reset;
1890                dqm->ops.destroy_queue = destroy_queue_cpsch;
1891                dqm->ops.update_queue = update_queue;
1892                dqm->ops.register_process = register_process;
1893                dqm->ops.unregister_process = unregister_process;
1894                dqm->ops.uninitialize = uninitialize;
1895                dqm->ops.create_kernel_queue = create_kernel_queue_cpsch;
1896                dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch;
1897                dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
1898                dqm->ops.process_termination = process_termination_cpsch;
1899                dqm->ops.evict_process_queues = evict_process_queues_cpsch;
1900                dqm->ops.restore_process_queues = restore_process_queues_cpsch;
1901                dqm->ops.get_wave_state = get_wave_state;
1902                break;
1903        case KFD_SCHED_POLICY_NO_HWS:
1904                /* initialize dqm for no cp scheduling */
1905                dqm->ops.start = start_nocpsch;
1906                dqm->ops.stop = stop_nocpsch;
1907                dqm->ops.pre_reset = pre_reset;
1908                dqm->ops.create_queue = create_queue_nocpsch;
1909                dqm->ops.destroy_queue = destroy_queue_nocpsch;
1910                dqm->ops.update_queue = update_queue;
1911                dqm->ops.register_process = register_process;
1912                dqm->ops.unregister_process = unregister_process;
1913                dqm->ops.initialize = initialize_nocpsch;
1914                dqm->ops.uninitialize = uninitialize;
1915                dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
1916                dqm->ops.process_termination = process_termination_nocpsch;
1917                dqm->ops.evict_process_queues = evict_process_queues_nocpsch;
1918                dqm->ops.restore_process_queues =
1919                        restore_process_queues_nocpsch;
1920                dqm->ops.get_wave_state = get_wave_state;
1921                break;
1922        default:
1923                pr_err("Invalid scheduling policy %d\n", dqm->sched_policy);
1924                goto out_free;
1925        }
1926
1927        switch (dev->device_info->asic_family) {
1928        case CHIP_CARRIZO:
1929                device_queue_manager_init_vi(&dqm->asic_ops);
1930                break;
1931
1932        case CHIP_KAVERI:
1933                device_queue_manager_init_cik(&dqm->asic_ops);
1934                break;
1935
1936        case CHIP_HAWAII:
1937                device_queue_manager_init_cik_hawaii(&dqm->asic_ops);
1938                break;
1939
1940        case CHIP_TONGA:
1941        case CHIP_FIJI:
1942        case CHIP_POLARIS10:
1943        case CHIP_POLARIS11:
1944        case CHIP_POLARIS12:
1945        case CHIP_VEGAM:
1946                device_queue_manager_init_vi_tonga(&dqm->asic_ops);
1947                break;
1948
1949        case CHIP_VEGA10:
1950        case CHIP_VEGA12:
1951        case CHIP_VEGA20:
1952        case CHIP_RAVEN:
1953        case CHIP_RENOIR:
1954        case CHIP_ARCTURUS:
1955        case CHIP_ALDEBARAN:
1956                device_queue_manager_init_v9(&dqm->asic_ops);
1957                break;
1958        case CHIP_NAVI10:
1959        case CHIP_NAVI12:
1960        case CHIP_NAVI14:
1961        case CHIP_SIENNA_CICHLID:
1962        case CHIP_NAVY_FLOUNDER:
1963        case CHIP_VANGOGH:
1964        case CHIP_DIMGREY_CAVEFISH:
1965        case CHIP_BEIGE_GOBY:
1966        case CHIP_YELLOW_CARP:
1967        case CHIP_CYAN_SKILLFISH:
1968                device_queue_manager_init_v10_navi10(&dqm->asic_ops);
1969                break;
1970        default:
1971                WARN(1, "Unexpected ASIC family %u",
1972                     dev->device_info->asic_family);
1973                goto out_free;
1974        }
1975
1976        if (init_mqd_managers(dqm))
1977                goto out_free;
1978
1979        if (allocate_hiq_sdma_mqd(dqm)) {
1980                pr_err("Failed to allocate hiq sdma mqd trunk buffer\n");
1981                goto out_free;
1982        }
1983
1984        if (!dqm->ops.initialize(dqm))
1985                return dqm;
1986
1987out_free:
1988        kfree(dqm);
1989        return NULL;
1990}
1991
1992static void deallocate_hiq_sdma_mqd(struct kfd_dev *dev,
1993                                    struct kfd_mem_obj *mqd)
1994{
1995        WARN(!mqd, "No hiq sdma mqd trunk to free");
1996
1997        amdgpu_amdkfd_free_gtt_mem(dev->kgd, mqd->gtt_mem);
1998}
1999
2000void device_queue_manager_uninit(struct device_queue_manager *dqm)
2001{
2002        dqm->ops.uninitialize(dqm);
2003        deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd);
2004        kfree(dqm);
2005}
2006
2007int kfd_process_vm_fault(struct device_queue_manager *dqm, u32 pasid)
2008{
2009        struct kfd_process_device *pdd;
2010        struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
2011        int ret = 0;
2012
2013        if (!p)
2014                return -EINVAL;
2015        WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);
2016        pdd = kfd_get_process_device_data(dqm->dev, p);
2017        if (pdd)
2018                ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd);
2019        kfd_unref_process(p);
2020
2021        return ret;
2022}
2023
2024static void kfd_process_hw_exception(struct work_struct *work)
2025{
2026        struct device_queue_manager *dqm = container_of(work,
2027                        struct device_queue_manager, hw_exception_work);
2028        amdgpu_amdkfd_gpu_reset(dqm->dev->kgd);
2029}
2030
2031#if defined(CONFIG_DEBUG_FS)
2032
2033static void seq_reg_dump(struct seq_file *m,
2034                         uint32_t (*dump)[2], uint32_t n_regs)
2035{
2036        uint32_t i, count;
2037
2038        for (i = 0, count = 0; i < n_regs; i++) {
2039                if (count == 0 ||
2040                    dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) {
2041                        seq_printf(m, "%s    %08x: %08x",
2042                                   i ? "\n" : "",
2043                                   dump[i][0], dump[i][1]);
2044                        count = 7;
2045                } else {
2046                        seq_printf(m, " %08x", dump[i][1]);
2047                        count--;
2048                }
2049        }
2050
2051        seq_puts(m, "\n");
2052}
2053
2054int dqm_debugfs_hqds(struct seq_file *m, void *data)
2055{
2056        struct device_queue_manager *dqm = data;
2057        uint32_t (*dump)[2], n_regs;
2058        int pipe, queue;
2059        int r = 0;
2060
2061        if (!dqm->sched_running) {
2062                seq_printf(m, " Device is stopped\n");
2063
2064                return 0;
2065        }
2066
2067        r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->kgd,
2068                                        KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE,
2069                                        &dump, &n_regs);
2070        if (!r) {
2071                seq_printf(m, "  HIQ on MEC %d Pipe %d Queue %d\n",
2072                           KFD_CIK_HIQ_PIPE/get_pipes_per_mec(dqm)+1,
2073                           KFD_CIK_HIQ_PIPE%get_pipes_per_mec(dqm),
2074                           KFD_CIK_HIQ_QUEUE);
2075                seq_reg_dump(m, dump, n_regs);
2076
2077                kfree(dump);
2078        }
2079
2080        for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
2081                int pipe_offset = pipe * get_queues_per_pipe(dqm);
2082
2083                for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) {
2084                        if (!test_bit(pipe_offset + queue,
2085                                      dqm->dev->shared_resources.cp_queue_bitmap))
2086                                continue;
2087
2088                        r = dqm->dev->kfd2kgd->hqd_dump(
2089                                dqm->dev->kgd, pipe, queue, &dump, &n_regs);
2090                        if (r)
2091                                break;
2092
2093                        seq_printf(m, "  CP Pipe %d, Queue %d\n",
2094                                  pipe, queue);
2095                        seq_reg_dump(m, dump, n_regs);
2096
2097                        kfree(dump);
2098                }
2099        }
2100
2101        for (pipe = 0; pipe < get_num_all_sdma_engines(dqm); pipe++) {
2102                for (queue = 0;
2103                     queue < dqm->dev->device_info->num_sdma_queues_per_engine;
2104                     queue++) {
2105                        r = dqm->dev->kfd2kgd->hqd_sdma_dump(
2106                                dqm->dev->kgd, pipe, queue, &dump, &n_regs);
2107                        if (r)
2108                                break;
2109
2110                        seq_printf(m, "  SDMA Engine %d, RLC %d\n",
2111                                  pipe, queue);
2112                        seq_reg_dump(m, dump, n_regs);
2113
2114                        kfree(dump);
2115                }
2116        }
2117
2118        return r;
2119}
2120
2121int dqm_debugfs_hang_hws(struct device_queue_manager *dqm)
2122{
2123        int r = 0;
2124
2125        dqm_lock(dqm);
2126        r = pm_debugfs_hang_hws(&dqm->packet_mgr);
2127        if (r) {
2128                dqm_unlock(dqm);
2129                return r;
2130        }
2131        dqm->active_runlist = true;
2132        r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
2133        dqm_unlock(dqm);
2134
2135        return r;
2136}
2137
2138#endif
2139