linux/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
<<
>>
Prefs
   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 *
  22 */
  23
  24#include <linux/slab.h>
  25#include <linux/list.h>
  26#include <linux/types.h>
  27#include <linux/printk.h>
  28#include <linux/bitops.h>
  29#include <linux/sched.h>
  30#include "kfd_priv.h"
  31#include "kfd_device_queue_manager.h"
  32#include "kfd_mqd_manager.h"
  33#include "cik_regs.h"
  34#include "kfd_kernel_queue.h"
  35
  36/* Size of the per-pipe EOP queue */
  37#define CIK_HPD_EOP_BYTES_LOG2 11
  38#define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2)
  39
  40static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
  41                                        unsigned int pasid, unsigned int vmid);
  42
  43static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
  44                                        struct queue *q,
  45                                        struct qcm_process_device *qpd);
  46
  47static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock);
  48static int destroy_queues_cpsch(struct device_queue_manager *dqm,
  49                                bool preempt_static_queues, bool lock);
  50
  51static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
  52                                        struct queue *q,
  53                                        struct qcm_process_device *qpd);
  54
  55static void deallocate_sdma_queue(struct device_queue_manager *dqm,
  56                                unsigned int sdma_queue_id);
  57
  58static inline
  59enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
  60{
  61        if (type == KFD_QUEUE_TYPE_SDMA)
  62                return KFD_MQD_TYPE_SDMA;
  63        return KFD_MQD_TYPE_CP;
  64}
  65
  66static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe)
  67{
  68        int i;
  69        int pipe_offset = mec * dqm->dev->shared_resources.num_pipe_per_mec
  70                + pipe * dqm->dev->shared_resources.num_queue_per_pipe;
  71
  72        /* queue is available for KFD usage if bit is 1 */
  73        for (i = 0; i <  dqm->dev->shared_resources.num_queue_per_pipe; ++i)
  74                if (test_bit(pipe_offset + i,
  75                              dqm->dev->shared_resources.queue_bitmap))
  76                        return true;
  77        return false;
  78}
  79
  80unsigned int get_queues_num(struct device_queue_manager *dqm)
  81{
  82        BUG_ON(!dqm || !dqm->dev);
  83        return bitmap_weight(dqm->dev->shared_resources.queue_bitmap,
  84                                KGD_MAX_QUEUES);
  85}
  86
  87unsigned int get_queues_per_pipe(struct device_queue_manager *dqm)
  88{
  89        BUG_ON(!dqm || !dqm->dev);
  90        return dqm->dev->shared_resources.num_queue_per_pipe;
  91}
  92
  93unsigned int get_pipes_per_mec(struct device_queue_manager *dqm)
  94{
  95        BUG_ON(!dqm || !dqm->dev);
  96        return dqm->dev->shared_resources.num_pipe_per_mec;
  97}
  98
  99void program_sh_mem_settings(struct device_queue_manager *dqm,
 100                                        struct qcm_process_device *qpd)
 101{
 102        return dqm->dev->kfd2kgd->program_sh_mem_settings(
 103                                                dqm->dev->kgd, qpd->vmid,
 104                                                qpd->sh_mem_config,
 105                                                qpd->sh_mem_ape1_base,
 106                                                qpd->sh_mem_ape1_limit,
 107                                                qpd->sh_mem_bases);
 108}
 109
 110static int allocate_vmid(struct device_queue_manager *dqm,
 111                        struct qcm_process_device *qpd,
 112                        struct queue *q)
 113{
 114        int bit, allocated_vmid;
 115
 116        if (dqm->vmid_bitmap == 0)
 117                return -ENOMEM;
 118
 119        bit = find_first_bit((unsigned long *)&dqm->vmid_bitmap, CIK_VMID_NUM);
 120        clear_bit(bit, (unsigned long *)&dqm->vmid_bitmap);
 121
 122        /* Kaveri kfd vmid's starts from vmid 8 */
 123        allocated_vmid = bit + KFD_VMID_START_OFFSET;
 124        pr_debug("kfd: vmid allocation %d\n", allocated_vmid);
 125        qpd->vmid = allocated_vmid;
 126        q->properties.vmid = allocated_vmid;
 127
 128        set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid);
 129        program_sh_mem_settings(dqm, qpd);
 130
 131        return 0;
 132}
 133
 134static void deallocate_vmid(struct device_queue_manager *dqm,
 135                                struct qcm_process_device *qpd,
 136                                struct queue *q)
 137{
 138        int bit = qpd->vmid - KFD_VMID_START_OFFSET;
 139
 140        /* Release the vmid mapping */
 141        set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
 142
 143        set_bit(bit, (unsigned long *)&dqm->vmid_bitmap);
 144        qpd->vmid = 0;
 145        q->properties.vmid = 0;
 146}
 147
 148static int create_queue_nocpsch(struct device_queue_manager *dqm,
 149                                struct queue *q,
 150                                struct qcm_process_device *qpd,
 151                                int *allocated_vmid)
 152{
 153        int retval;
 154
 155        BUG_ON(!dqm || !q || !qpd || !allocated_vmid);
 156
 157        pr_debug("kfd: In func %s\n", __func__);
 158        print_queue(q);
 159
 160        mutex_lock(&dqm->lock);
 161
 162        if (dqm->total_queue_count >= max_num_of_queues_per_device) {
 163                pr_warn("amdkfd: Can't create new usermode queue because %d queues were already created\n",
 164                                dqm->total_queue_count);
 165                mutex_unlock(&dqm->lock);
 166                return -EPERM;
 167        }
 168
 169        if (list_empty(&qpd->queues_list)) {
 170                retval = allocate_vmid(dqm, qpd, q);
 171                if (retval != 0) {
 172                        mutex_unlock(&dqm->lock);
 173                        return retval;
 174                }
 175        }
 176        *allocated_vmid = qpd->vmid;
 177        q->properties.vmid = qpd->vmid;
 178
 179        if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
 180                retval = create_compute_queue_nocpsch(dqm, q, qpd);
 181        if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
 182                retval = create_sdma_queue_nocpsch(dqm, q, qpd);
 183
 184        if (retval != 0) {
 185                if (list_empty(&qpd->queues_list)) {
 186                        deallocate_vmid(dqm, qpd, q);
 187                        *allocated_vmid = 0;
 188                }
 189                mutex_unlock(&dqm->lock);
 190                return retval;
 191        }
 192
 193        list_add(&q->list, &qpd->queues_list);
 194        if (q->properties.is_active)
 195                dqm->queue_count++;
 196
 197        if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
 198                dqm->sdma_queue_count++;
 199
 200        /*
 201         * Unconditionally increment this counter, regardless of the queue's
 202         * type or whether the queue is active.
 203         */
 204        dqm->total_queue_count++;
 205        pr_debug("Total of %d queues are accountable so far\n",
 206                        dqm->total_queue_count);
 207
 208        mutex_unlock(&dqm->lock);
 209        return 0;
 210}
 211
 212static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
 213{
 214        bool set;
 215        int pipe, bit, i;
 216
 217        set = false;
 218
 219        for (pipe = dqm->next_pipe_to_allocate, i = 0; i < get_pipes_per_mec(dqm);
 220                        pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) {
 221
 222                if (!is_pipe_enabled(dqm, 0, pipe))
 223                        continue;
 224
 225                if (dqm->allocated_queues[pipe] != 0) {
 226                        bit = find_first_bit(
 227                                (unsigned long *)&dqm->allocated_queues[pipe],
 228                                get_queues_per_pipe(dqm));
 229
 230                        clear_bit(bit,
 231                                (unsigned long *)&dqm->allocated_queues[pipe]);
 232                        q->pipe = pipe;
 233                        q->queue = bit;
 234                        set = true;
 235                        break;
 236                }
 237        }
 238
 239        if (!set)
 240                return -EBUSY;
 241
 242        pr_debug("kfd: DQM %s hqd slot - pipe (%d) queue(%d)\n",
 243                                __func__, q->pipe, q->queue);
 244        /* horizontal hqd allocation */
 245        dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm);
 246
 247        return 0;
 248}
 249
 250static inline void deallocate_hqd(struct device_queue_manager *dqm,
 251                                struct queue *q)
 252{
 253        set_bit(q->queue, (unsigned long *)&dqm->allocated_queues[q->pipe]);
 254}
 255
 256static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
 257                                        struct queue *q,
 258                                        struct qcm_process_device *qpd)
 259{
 260        int retval;
 261        struct mqd_manager *mqd;
 262
 263        BUG_ON(!dqm || !q || !qpd);
 264
 265        mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
 266        if (mqd == NULL)
 267                return -ENOMEM;
 268
 269        retval = allocate_hqd(dqm, q);
 270        if (retval != 0)
 271                return retval;
 272
 273        retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
 274                                &q->gart_mqd_addr, &q->properties);
 275        if (retval != 0) {
 276                deallocate_hqd(dqm, q);
 277                return retval;
 278        }
 279
 280        pr_debug("kfd: loading mqd to hqd on pipe (%d) queue (%d)\n",
 281                        q->pipe,
 282                        q->queue);
 283
 284        retval = mqd->load_mqd(mqd, q->mqd, q->pipe,
 285                        q->queue, (uint32_t __user *) q->properties.write_ptr);
 286        if (retval != 0) {
 287                deallocate_hqd(dqm, q);
 288                mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
 289                return retval;
 290        }
 291
 292        return 0;
 293}
 294
 295static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
 296                                struct qcm_process_device *qpd,
 297                                struct queue *q)
 298{
 299        int retval;
 300        struct mqd_manager *mqd;
 301
 302        BUG_ON(!dqm || !q || !q->mqd || !qpd);
 303
 304        retval = 0;
 305
 306        pr_debug("kfd: In Func %s\n", __func__);
 307
 308        mutex_lock(&dqm->lock);
 309
 310        if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
 311                mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
 312                if (mqd == NULL) {
 313                        retval = -ENOMEM;
 314                        goto out;
 315                }
 316                deallocate_hqd(dqm, q);
 317        } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
 318                mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA);
 319                if (mqd == NULL) {
 320                        retval = -ENOMEM;
 321                        goto out;
 322                }
 323                dqm->sdma_queue_count--;
 324                deallocate_sdma_queue(dqm, q->sdma_id);
 325        } else {
 326                pr_debug("q->properties.type is invalid (%d)\n",
 327                                q->properties.type);
 328                retval = -EINVAL;
 329                goto out;
 330        }
 331
 332        retval = mqd->destroy_mqd(mqd, q->mqd,
 333                                KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
 334                                QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS,
 335                                q->pipe, q->queue);
 336
 337        if (retval != 0)
 338                goto out;
 339
 340        mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
 341
 342        list_del(&q->list);
 343        if (list_empty(&qpd->queues_list))
 344                deallocate_vmid(dqm, qpd, q);
 345        if (q->properties.is_active)
 346                dqm->queue_count--;
 347
 348        /*
 349         * Unconditionally decrement this counter, regardless of the queue's
 350         * type
 351         */
 352        dqm->total_queue_count--;
 353        pr_debug("Total of %d queues are accountable so far\n",
 354                        dqm->total_queue_count);
 355
 356out:
 357        mutex_unlock(&dqm->lock);
 358        return retval;
 359}
 360
 361static int update_queue(struct device_queue_manager *dqm, struct queue *q)
 362{
 363        int retval;
 364        struct mqd_manager *mqd;
 365        bool prev_active = false;
 366
 367        BUG_ON(!dqm || !q || !q->mqd);
 368
 369        mutex_lock(&dqm->lock);
 370        mqd = dqm->ops.get_mqd_manager(dqm,
 371                        get_mqd_type_from_queue_type(q->properties.type));
 372        if (mqd == NULL) {
 373                mutex_unlock(&dqm->lock);
 374                return -ENOMEM;
 375        }
 376
 377        if (q->properties.is_active)
 378                prev_active = true;
 379
 380        /*
 381         *
 382         * check active state vs. the previous state
 383         * and modify counter accordingly
 384         */
 385        retval = mqd->update_mqd(mqd, q->mqd, &q->properties);
 386        if ((q->properties.is_active) && (!prev_active))
 387                dqm->queue_count++;
 388        else if ((!q->properties.is_active) && (prev_active))
 389                dqm->queue_count--;
 390
 391        if (sched_policy != KFD_SCHED_POLICY_NO_HWS)
 392                retval = execute_queues_cpsch(dqm, false);
 393
 394        mutex_unlock(&dqm->lock);
 395        return retval;
 396}
 397
 398static struct mqd_manager *get_mqd_manager_nocpsch(
 399                struct device_queue_manager *dqm, enum KFD_MQD_TYPE type)
 400{
 401        struct mqd_manager *mqd;
 402
 403        BUG_ON(!dqm || type >= KFD_MQD_TYPE_MAX);
 404
 405        pr_debug("kfd: In func %s mqd type %d\n", __func__, type);
 406
 407        mqd = dqm->mqds[type];
 408        if (!mqd) {
 409                mqd = mqd_manager_init(type, dqm->dev);
 410                if (mqd == NULL)
 411                        pr_err("kfd: mqd manager is NULL");
 412                dqm->mqds[type] = mqd;
 413        }
 414
 415        return mqd;
 416}
 417
 418static int register_process_nocpsch(struct device_queue_manager *dqm,
 419                                        struct qcm_process_device *qpd)
 420{
 421        struct device_process_node *n;
 422        int retval;
 423
 424        BUG_ON(!dqm || !qpd);
 425
 426        pr_debug("kfd: In func %s\n", __func__);
 427
 428        n = kzalloc(sizeof(struct device_process_node), GFP_KERNEL);
 429        if (!n)
 430                return -ENOMEM;
 431
 432        n->qpd = qpd;
 433
 434        mutex_lock(&dqm->lock);
 435        list_add(&n->list, &dqm->queues);
 436
 437        retval = dqm->ops_asic_specific.register_process(dqm, qpd);
 438
 439        dqm->processes_count++;
 440
 441        mutex_unlock(&dqm->lock);
 442
 443        return retval;
 444}
 445
 446static int unregister_process_nocpsch(struct device_queue_manager *dqm,
 447                                        struct qcm_process_device *qpd)
 448{
 449        int retval;
 450        struct device_process_node *cur, *next;
 451
 452        BUG_ON(!dqm || !qpd);
 453
 454        pr_debug("In func %s\n", __func__);
 455
 456        pr_debug("qpd->queues_list is %s\n",
 457                        list_empty(&qpd->queues_list) ? "empty" : "not empty");
 458
 459        retval = 0;
 460        mutex_lock(&dqm->lock);
 461
 462        list_for_each_entry_safe(cur, next, &dqm->queues, list) {
 463                if (qpd == cur->qpd) {
 464                        list_del(&cur->list);
 465                        kfree(cur);
 466                        dqm->processes_count--;
 467                        goto out;
 468                }
 469        }
 470        /* qpd not found in dqm list */
 471        retval = 1;
 472out:
 473        mutex_unlock(&dqm->lock);
 474        return retval;
 475}
 476
 477static int
 478set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid,
 479                        unsigned int vmid)
 480{
 481        uint32_t pasid_mapping;
 482
 483        pasid_mapping = (pasid == 0) ? 0 :
 484                (uint32_t)pasid |
 485                ATC_VMID_PASID_MAPPING_VALID;
 486
 487        return dqm->dev->kfd2kgd->set_pasid_vmid_mapping(
 488                                                dqm->dev->kgd, pasid_mapping,
 489                                                vmid);
 490}
 491
 492static void init_interrupts(struct device_queue_manager *dqm)
 493{
 494        unsigned int i;
 495
 496        BUG_ON(dqm == NULL);
 497
 498        for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++)
 499                if (is_pipe_enabled(dqm, 0, i))
 500                        dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, i);
 501}
 502
 503static int init_scheduler(struct device_queue_manager *dqm)
 504{
 505        int retval = 0;
 506
 507        BUG_ON(!dqm);
 508
 509        pr_debug("kfd: In %s\n", __func__);
 510
 511        return retval;
 512}
 513
 514static int initialize_nocpsch(struct device_queue_manager *dqm)
 515{
 516        int i;
 517
 518        BUG_ON(!dqm);
 519
 520        pr_debug("kfd: In func %s num of pipes: %d\n",
 521                        __func__, get_pipes_per_mec(dqm));
 522
 523        mutex_init(&dqm->lock);
 524        INIT_LIST_HEAD(&dqm->queues);
 525        dqm->queue_count = dqm->next_pipe_to_allocate = 0;
 526        dqm->sdma_queue_count = 0;
 527        dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm),
 528                                        sizeof(unsigned int), GFP_KERNEL);
 529        if (!dqm->allocated_queues) {
 530                mutex_destroy(&dqm->lock);
 531                return -ENOMEM;
 532        }
 533
 534        for (i = 0; i < get_pipes_per_mec(dqm); i++)
 535                dqm->allocated_queues[i] = (1 << get_queues_per_pipe(dqm)) - 1;
 536
 537        dqm->vmid_bitmap = (1 << VMID_PER_DEVICE) - 1;
 538        dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1;
 539
 540        init_scheduler(dqm);
 541        return 0;
 542}
 543
 544static void uninitialize_nocpsch(struct device_queue_manager *dqm)
 545{
 546        int i;
 547
 548        BUG_ON(!dqm);
 549
 550        BUG_ON(dqm->queue_count > 0 || dqm->processes_count > 0);
 551
 552        kfree(dqm->allocated_queues);
 553        for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
 554                kfree(dqm->mqds[i]);
 555        mutex_destroy(&dqm->lock);
 556        kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem);
 557}
 558
 559static int start_nocpsch(struct device_queue_manager *dqm)
 560{
 561        init_interrupts(dqm);
 562        return 0;
 563}
 564
 565static int stop_nocpsch(struct device_queue_manager *dqm)
 566{
 567        return 0;
 568}
 569
 570static int allocate_sdma_queue(struct device_queue_manager *dqm,
 571                                unsigned int *sdma_queue_id)
 572{
 573        int bit;
 574
 575        if (dqm->sdma_bitmap == 0)
 576                return -ENOMEM;
 577
 578        bit = find_first_bit((unsigned long *)&dqm->sdma_bitmap,
 579                                CIK_SDMA_QUEUES);
 580
 581        clear_bit(bit, (unsigned long *)&dqm->sdma_bitmap);
 582        *sdma_queue_id = bit;
 583
 584        return 0;
 585}
 586
 587static void deallocate_sdma_queue(struct device_queue_manager *dqm,
 588                                unsigned int sdma_queue_id)
 589{
 590        if (sdma_queue_id >= CIK_SDMA_QUEUES)
 591                return;
 592        set_bit(sdma_queue_id, (unsigned long *)&dqm->sdma_bitmap);
 593}
 594
 595static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
 596                                        struct queue *q,
 597                                        struct qcm_process_device *qpd)
 598{
 599        struct mqd_manager *mqd;
 600        int retval;
 601
 602        mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA);
 603        if (!mqd)
 604                return -ENOMEM;
 605
 606        retval = allocate_sdma_queue(dqm, &q->sdma_id);
 607        if (retval != 0)
 608                return retval;
 609
 610        q->properties.sdma_queue_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE;
 611        q->properties.sdma_engine_id = q->sdma_id / CIK_SDMA_ENGINE_NUM;
 612
 613        pr_debug("kfd: sdma id is:    %d\n", q->sdma_id);
 614        pr_debug("     sdma queue id: %d\n", q->properties.sdma_queue_id);
 615        pr_debug("     sdma engine id: %d\n", q->properties.sdma_engine_id);
 616
 617        dqm->ops_asic_specific.init_sdma_vm(dqm, q, qpd);
 618        retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
 619                                &q->gart_mqd_addr, &q->properties);
 620        if (retval != 0) {
 621                deallocate_sdma_queue(dqm, q->sdma_id);
 622                return retval;
 623        }
 624
 625        retval = mqd->load_mqd(mqd, q->mqd, 0,
 626                                0, NULL);
 627        if (retval != 0) {
 628                deallocate_sdma_queue(dqm, q->sdma_id);
 629                mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
 630                return retval;
 631        }
 632
 633        return 0;
 634}
 635
 636/*
 637 * Device Queue Manager implementation for cp scheduler
 638 */
 639
 640static int set_sched_resources(struct device_queue_manager *dqm)
 641{
 642        int i, mec;
 643        struct scheduling_resources res;
 644
 645        BUG_ON(!dqm);
 646
 647        pr_debug("kfd: In func %s\n", __func__);
 648
 649        res.vmid_mask = (1 << VMID_PER_DEVICE) - 1;
 650        res.vmid_mask <<= KFD_VMID_START_OFFSET;
 651
 652        res.queue_mask = 0;
 653        for (i = 0; i < KGD_MAX_QUEUES; ++i) {
 654                mec = (i / dqm->dev->shared_resources.num_queue_per_pipe)
 655                        / dqm->dev->shared_resources.num_pipe_per_mec;
 656
 657                if (!test_bit(i, dqm->dev->shared_resources.queue_bitmap))
 658                        continue;
 659
 660                /* only acquire queues from the first MEC */
 661                if (mec > 0)
 662                        continue;
 663
 664                /* This situation may be hit in the future if a new HW
 665                 * generation exposes more than 64 queues. If so, the
 666                 * definition of res.queue_mask needs updating */
 667                if (WARN_ON(i > (sizeof(res.queue_mask)*8))) {
 668                        pr_err("Invalid queue enabled by amdgpu: %d\n", i);
 669                        break;
 670                }
 671
 672                res.queue_mask |= (1ull << i);
 673        }
 674        res.gws_mask = res.oac_mask = res.gds_heap_base =
 675                                                res.gds_heap_size = 0;
 676
 677        pr_debug("kfd: scheduling resources:\n"
 678                        "      vmid mask: 0x%8X\n"
 679                        "      queue mask: 0x%8llX\n",
 680                        res.vmid_mask, res.queue_mask);
 681
 682        return pm_send_set_resources(&dqm->packets, &res);
 683}
 684
 685static int initialize_cpsch(struct device_queue_manager *dqm)
 686{
 687        int retval;
 688
 689        BUG_ON(!dqm);
 690
 691        pr_debug("kfd: In func %s num of pipes: %d\n",
 692                        __func__, get_pipes_per_mec(dqm));
 693
 694        mutex_init(&dqm->lock);
 695        INIT_LIST_HEAD(&dqm->queues);
 696        dqm->queue_count = dqm->processes_count = 0;
 697        dqm->sdma_queue_count = 0;
 698        dqm->active_runlist = false;
 699        retval = dqm->ops_asic_specific.initialize(dqm);
 700        if (retval != 0)
 701                goto fail_init_pipelines;
 702
 703        return 0;
 704
 705fail_init_pipelines:
 706        mutex_destroy(&dqm->lock);
 707        return retval;
 708}
 709
 710static int start_cpsch(struct device_queue_manager *dqm)
 711{
 712        struct device_process_node *node;
 713        int retval;
 714
 715        BUG_ON(!dqm);
 716
 717        retval = 0;
 718
 719        retval = pm_init(&dqm->packets, dqm);
 720        if (retval != 0)
 721                goto fail_packet_manager_init;
 722
 723        retval = set_sched_resources(dqm);
 724        if (retval != 0)
 725                goto fail_set_sched_resources;
 726
 727        pr_debug("kfd: allocating fence memory\n");
 728
 729        /* allocate fence memory on the gart */
 730        retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr),
 731                                        &dqm->fence_mem);
 732
 733        if (retval != 0)
 734                goto fail_allocate_vidmem;
 735
 736        dqm->fence_addr = dqm->fence_mem->cpu_ptr;
 737        dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr;
 738
 739        init_interrupts(dqm);
 740
 741        list_for_each_entry(node, &dqm->queues, list)
 742                if (node->qpd->pqm->process && dqm->dev)
 743                        kfd_bind_process_to_device(dqm->dev,
 744                                                node->qpd->pqm->process);
 745
 746        execute_queues_cpsch(dqm, true);
 747
 748        return 0;
 749fail_allocate_vidmem:
 750fail_set_sched_resources:
 751        pm_uninit(&dqm->packets);
 752fail_packet_manager_init:
 753        return retval;
 754}
 755
 756static int stop_cpsch(struct device_queue_manager *dqm)
 757{
 758        struct device_process_node *node;
 759        struct kfd_process_device *pdd;
 760
 761        BUG_ON(!dqm);
 762
 763        destroy_queues_cpsch(dqm, true, true);
 764
 765        list_for_each_entry(node, &dqm->queues, list) {
 766                pdd = qpd_to_pdd(node->qpd);
 767                pdd->bound = false;
 768        }
 769        kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
 770        pm_uninit(&dqm->packets);
 771
 772        return 0;
 773}
 774
 775static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
 776                                        struct kernel_queue *kq,
 777                                        struct qcm_process_device *qpd)
 778{
 779        BUG_ON(!dqm || !kq || !qpd);
 780
 781        pr_debug("kfd: In func %s\n", __func__);
 782
 783        mutex_lock(&dqm->lock);
 784        if (dqm->total_queue_count >= max_num_of_queues_per_device) {
 785                pr_warn("amdkfd: Can't create new kernel queue because %d queues were already created\n",
 786                                dqm->total_queue_count);
 787                mutex_unlock(&dqm->lock);
 788                return -EPERM;
 789        }
 790
 791        /*
 792         * Unconditionally increment this counter, regardless of the queue's
 793         * type or whether the queue is active.
 794         */
 795        dqm->total_queue_count++;
 796        pr_debug("Total of %d queues are accountable so far\n",
 797                        dqm->total_queue_count);
 798
 799        list_add(&kq->list, &qpd->priv_queue_list);
 800        dqm->queue_count++;
 801        qpd->is_debug = true;
 802        execute_queues_cpsch(dqm, false);
 803        mutex_unlock(&dqm->lock);
 804
 805        return 0;
 806}
 807
 808static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
 809                                        struct kernel_queue *kq,
 810                                        struct qcm_process_device *qpd)
 811{
 812        BUG_ON(!dqm || !kq);
 813
 814        pr_debug("kfd: In %s\n", __func__);
 815
 816        mutex_lock(&dqm->lock);
 817        /* here we actually preempt the DIQ */
 818        destroy_queues_cpsch(dqm, true, false);
 819        list_del(&kq->list);
 820        dqm->queue_count--;
 821        qpd->is_debug = false;
 822        execute_queues_cpsch(dqm, false);
 823        /*
 824         * Unconditionally decrement this counter, regardless of the queue's
 825         * type.
 826         */
 827        dqm->total_queue_count--;
 828        pr_debug("Total of %d queues are accountable so far\n",
 829                        dqm->total_queue_count);
 830        mutex_unlock(&dqm->lock);
 831}
 832
 833static void select_sdma_engine_id(struct queue *q)
 834{
 835        static int sdma_id;
 836
 837        q->sdma_id = sdma_id;
 838        sdma_id = (sdma_id + 1) % 2;
 839}
 840
 841static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
 842                        struct qcm_process_device *qpd, int *allocate_vmid)
 843{
 844        int retval;
 845        struct mqd_manager *mqd;
 846
 847        BUG_ON(!dqm || !q || !qpd);
 848
 849        retval = 0;
 850
 851        if (allocate_vmid)
 852                *allocate_vmid = 0;
 853
 854        mutex_lock(&dqm->lock);
 855
 856        if (dqm->total_queue_count >= max_num_of_queues_per_device) {
 857                pr_warn("amdkfd: Can't create new usermode queue because %d queues were already created\n",
 858                                dqm->total_queue_count);
 859                retval = -EPERM;
 860                goto out;
 861        }
 862
 863        if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
 864                select_sdma_engine_id(q);
 865
 866        mqd = dqm->ops.get_mqd_manager(dqm,
 867                        get_mqd_type_from_queue_type(q->properties.type));
 868
 869        if (mqd == NULL) {
 870                mutex_unlock(&dqm->lock);
 871                return -ENOMEM;
 872        }
 873
 874        dqm->ops_asic_specific.init_sdma_vm(dqm, q, qpd);
 875        retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
 876                                &q->gart_mqd_addr, &q->properties);
 877        if (retval != 0)
 878                goto out;
 879
 880        list_add(&q->list, &qpd->queues_list);
 881        if (q->properties.is_active) {
 882                dqm->queue_count++;
 883                retval = execute_queues_cpsch(dqm, false);
 884        }
 885
 886        if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
 887                        dqm->sdma_queue_count++;
 888        /*
 889         * Unconditionally increment this counter, regardless of the queue's
 890         * type or whether the queue is active.
 891         */
 892        dqm->total_queue_count++;
 893
 894        pr_debug("Total of %d queues are accountable so far\n",
 895                        dqm->total_queue_count);
 896
 897out:
 898        mutex_unlock(&dqm->lock);
 899        return retval;
 900}
 901
 902int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
 903                                unsigned int fence_value,
 904                                unsigned long timeout)
 905{
 906        BUG_ON(!fence_addr);
 907        timeout += jiffies;
 908
 909        while (*fence_addr != fence_value) {
 910                if (time_after(jiffies, timeout)) {
 911                        pr_err("kfd: qcm fence wait loop timeout expired\n");
 912                        return -ETIME;
 913                }
 914                schedule();
 915        }
 916
 917        return 0;
 918}
 919
 920static int destroy_sdma_queues(struct device_queue_manager *dqm,
 921                                unsigned int sdma_engine)
 922{
 923        return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
 924                        KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES, 0, false,
 925                        sdma_engine);
 926}
 927
 928static int destroy_queues_cpsch(struct device_queue_manager *dqm,
 929                                bool preempt_static_queues, bool lock)
 930{
 931        int retval;
 932        enum kfd_preempt_type_filter preempt_type;
 933        struct kfd_process_device *pdd;
 934
 935        BUG_ON(!dqm);
 936
 937        retval = 0;
 938
 939        if (lock)
 940                mutex_lock(&dqm->lock);
 941        if (!dqm->active_runlist)
 942                goto out;
 943
 944        pr_debug("kfd: Before destroying queues, sdma queue count is : %u\n",
 945                dqm->sdma_queue_count);
 946
 947        if (dqm->sdma_queue_count > 0) {
 948                destroy_sdma_queues(dqm, 0);
 949                destroy_sdma_queues(dqm, 1);
 950        }
 951
 952        preempt_type = preempt_static_queues ?
 953                        KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES :
 954                        KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES;
 955
 956        retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
 957                        preempt_type, 0, false, 0);
 958        if (retval != 0)
 959                goto out;
 960
 961        *dqm->fence_addr = KFD_FENCE_INIT;
 962        pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr,
 963                                KFD_FENCE_COMPLETED);
 964        /* should be timed out */
 965        retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
 966                                QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS);
 967        if (retval != 0) {
 968                pdd = kfd_get_process_device_data(dqm->dev,
 969                                kfd_get_process(current));
 970                pdd->reset_wavefronts = true;
 971                goto out;
 972        }
 973        pm_release_ib(&dqm->packets);
 974        dqm->active_runlist = false;
 975
 976out:
 977        if (lock)
 978                mutex_unlock(&dqm->lock);
 979        return retval;
 980}
 981
 982static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock)
 983{
 984        int retval;
 985
 986        BUG_ON(!dqm);
 987
 988        if (lock)
 989                mutex_lock(&dqm->lock);
 990
 991        retval = destroy_queues_cpsch(dqm, false, false);
 992        if (retval != 0) {
 993                pr_err("kfd: the cp might be in an unrecoverable state due to an unsuccessful queues preemption");
 994                goto out;
 995        }
 996
 997        if (dqm->queue_count <= 0 || dqm->processes_count <= 0) {
 998                retval = 0;
 999                goto out;
1000        }
1001
1002        if (dqm->active_runlist) {
1003                retval = 0;
1004                goto out;
1005        }
1006
1007        retval = pm_send_runlist(&dqm->packets, &dqm->queues);
1008        if (retval != 0) {
1009                pr_err("kfd: failed to execute runlist");
1010                goto out;
1011        }
1012        dqm->active_runlist = true;
1013
1014out:
1015        if (lock)
1016                mutex_unlock(&dqm->lock);
1017        return retval;
1018}
1019
1020static int destroy_queue_cpsch(struct device_queue_manager *dqm,
1021                                struct qcm_process_device *qpd,
1022                                struct queue *q)
1023{
1024        int retval;
1025        struct mqd_manager *mqd;
1026        bool preempt_all_queues;
1027
1028        BUG_ON(!dqm || !qpd || !q);
1029
1030        preempt_all_queues = false;
1031
1032        retval = 0;
1033
1034        /* remove queue from list to prevent rescheduling after preemption */
1035        mutex_lock(&dqm->lock);
1036
1037        if (qpd->is_debug) {
1038                /*
1039                 * error, currently we do not allow to destroy a queue
1040                 * of a currently debugged process
1041                 */
1042                retval = -EBUSY;
1043                goto failed_try_destroy_debugged_queue;
1044
1045        }
1046
1047        mqd = dqm->ops.get_mqd_manager(dqm,
1048                        get_mqd_type_from_queue_type(q->properties.type));
1049        if (!mqd) {
1050                retval = -ENOMEM;
1051                goto failed;
1052        }
1053
1054        if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
1055                dqm->sdma_queue_count--;
1056
1057        list_del(&q->list);
1058        if (q->properties.is_active)
1059                dqm->queue_count--;
1060
1061        execute_queues_cpsch(dqm, false);
1062
1063        mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
1064
1065        /*
1066         * Unconditionally decrement this counter, regardless of the queue's
1067         * type
1068         */
1069        dqm->total_queue_count--;
1070        pr_debug("Total of %d queues are accountable so far\n",
1071                        dqm->total_queue_count);
1072
1073        mutex_unlock(&dqm->lock);
1074
1075        return 0;
1076
1077failed:
1078failed_try_destroy_debugged_queue:
1079
1080        mutex_unlock(&dqm->lock);
1081        return retval;
1082}
1083
1084/*
1085 * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
1086 * stay in user mode.
1087 */
1088#define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
1089/* APE1 limit is inclusive and 64K aligned. */
1090#define APE1_LIMIT_ALIGNMENT 0xFFFF
1091
1092static bool set_cache_memory_policy(struct device_queue_manager *dqm,
1093                                   struct qcm_process_device *qpd,
1094                                   enum cache_policy default_policy,
1095                                   enum cache_policy alternate_policy,
1096                                   void __user *alternate_aperture_base,
1097                                   uint64_t alternate_aperture_size)
1098{
1099        bool retval;
1100
1101        pr_debug("kfd: In func %s\n", __func__);
1102
1103        mutex_lock(&dqm->lock);
1104
1105        if (alternate_aperture_size == 0) {
1106                /* base > limit disables APE1 */
1107                qpd->sh_mem_ape1_base = 1;
1108                qpd->sh_mem_ape1_limit = 0;
1109        } else {
1110                /*
1111                 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
1112                 *                      SH_MEM_APE1_BASE[31:0], 0x0000 }
1113                 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
1114                 *                      SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
1115                 * Verify that the base and size parameters can be
1116                 * represented in this format and convert them.
1117                 * Additionally restrict APE1 to user-mode addresses.
1118                 */
1119
1120                uint64_t base = (uintptr_t)alternate_aperture_base;
1121                uint64_t limit = base + alternate_aperture_size - 1;
1122
1123                if (limit <= base)
1124                        goto out;
1125
1126                if ((base & APE1_FIXED_BITS_MASK) != 0)
1127                        goto out;
1128
1129                if ((limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT)
1130                        goto out;
1131
1132                qpd->sh_mem_ape1_base = base >> 16;
1133                qpd->sh_mem_ape1_limit = limit >> 16;
1134        }
1135
1136        retval = dqm->ops_asic_specific.set_cache_memory_policy(
1137                        dqm,
1138                        qpd,
1139                        default_policy,
1140                        alternate_policy,
1141                        alternate_aperture_base,
1142                        alternate_aperture_size);
1143
1144        if ((sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
1145                program_sh_mem_settings(dqm, qpd);
1146
1147        pr_debug("kfd: sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n",
1148                qpd->sh_mem_config, qpd->sh_mem_ape1_base,
1149                qpd->sh_mem_ape1_limit);
1150
1151        mutex_unlock(&dqm->lock);
1152        return retval;
1153
1154out:
1155        mutex_unlock(&dqm->lock);
1156        return false;
1157}
1158
1159struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
1160{
1161        struct device_queue_manager *dqm;
1162
1163        BUG_ON(!dev);
1164
1165        pr_debug("kfd: loading device queue manager\n");
1166
1167        dqm = kzalloc(sizeof(struct device_queue_manager), GFP_KERNEL);
1168        if (!dqm)
1169                return NULL;
1170
1171        dqm->dev = dev;
1172        switch (sched_policy) {
1173        case KFD_SCHED_POLICY_HWS:
1174        case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION:
1175                /* initialize dqm for cp scheduling */
1176                dqm->ops.create_queue = create_queue_cpsch;
1177                dqm->ops.initialize = initialize_cpsch;
1178                dqm->ops.start = start_cpsch;
1179                dqm->ops.stop = stop_cpsch;
1180                dqm->ops.destroy_queue = destroy_queue_cpsch;
1181                dqm->ops.update_queue = update_queue;
1182                dqm->ops.get_mqd_manager = get_mqd_manager_nocpsch;
1183                dqm->ops.register_process = register_process_nocpsch;
1184                dqm->ops.unregister_process = unregister_process_nocpsch;
1185                dqm->ops.uninitialize = uninitialize_nocpsch;
1186                dqm->ops.create_kernel_queue = create_kernel_queue_cpsch;
1187                dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch;
1188                dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
1189                break;
1190        case KFD_SCHED_POLICY_NO_HWS:
1191                /* initialize dqm for no cp scheduling */
1192                dqm->ops.start = start_nocpsch;
1193                dqm->ops.stop = stop_nocpsch;
1194                dqm->ops.create_queue = create_queue_nocpsch;
1195                dqm->ops.destroy_queue = destroy_queue_nocpsch;
1196                dqm->ops.update_queue = update_queue;
1197                dqm->ops.get_mqd_manager = get_mqd_manager_nocpsch;
1198                dqm->ops.register_process = register_process_nocpsch;
1199                dqm->ops.unregister_process = unregister_process_nocpsch;
1200                dqm->ops.initialize = initialize_nocpsch;
1201                dqm->ops.uninitialize = uninitialize_nocpsch;
1202                dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
1203                break;
1204        default:
1205                BUG();
1206                break;
1207        }
1208
1209        switch (dev->device_info->asic_family) {
1210        case CHIP_CARRIZO:
1211                device_queue_manager_init_vi(&dqm->ops_asic_specific);
1212                break;
1213
1214        case CHIP_KAVERI:
1215                device_queue_manager_init_cik(&dqm->ops_asic_specific);
1216                break;
1217        }
1218
1219        if (dqm->ops.initialize(dqm) != 0) {
1220                kfree(dqm);
1221                return NULL;
1222        }
1223
1224        return dqm;
1225}
1226
1227void device_queue_manager_uninit(struct device_queue_manager *dqm)
1228{
1229        BUG_ON(!dqm);
1230
1231        dqm->ops.uninitialize(dqm);
1232        kfree(dqm);
1233}
1234