linux/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
<<
>>
Prefs
   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 */
  22
  23#include <linux/device.h>
  24#include <linux/export.h>
  25#include <linux/err.h>
  26#include <linux/fs.h>
  27#include <linux/file.h>
  28#include <linux/sched.h>
  29#include <linux/slab.h>
  30#include <linux/uaccess.h>
  31#include <linux/compat.h>
  32#include <uapi/linux/kfd_ioctl.h>
  33#include <linux/time.h>
  34#include <linux/mm.h>
  35#include <linux/mman.h>
  36#include <linux/dma-buf.h>
  37#include <asm/processor.h>
  38#include "kfd_priv.h"
  39#include "kfd_device_queue_manager.h"
  40#include "kfd_dbgmgr.h"
  41#include "kfd_svm.h"
  42#include "amdgpu_amdkfd.h"
  43#include "kfd_smi_events.h"
  44
  45static long kfd_ioctl(struct file *, unsigned int, unsigned long);
  46static int kfd_open(struct inode *, struct file *);
  47static int kfd_release(struct inode *, struct file *);
  48static int kfd_mmap(struct file *, struct vm_area_struct *);
  49
  50static const char kfd_dev_name[] = "kfd";
  51
  52static const struct file_operations kfd_fops = {
  53        .owner = THIS_MODULE,
  54        .unlocked_ioctl = kfd_ioctl,
  55        .compat_ioctl = compat_ptr_ioctl,
  56        .open = kfd_open,
  57        .release = kfd_release,
  58        .mmap = kfd_mmap,
  59};
  60
  61static int kfd_char_dev_major = -1;
  62static struct class *kfd_class;
  63struct device *kfd_device;
  64
  65int kfd_chardev_init(void)
  66{
  67        int err = 0;
  68
  69        kfd_char_dev_major = register_chrdev(0, kfd_dev_name, &kfd_fops);
  70        err = kfd_char_dev_major;
  71        if (err < 0)
  72                goto err_register_chrdev;
  73
  74        kfd_class = class_create(THIS_MODULE, kfd_dev_name);
  75        err = PTR_ERR(kfd_class);
  76        if (IS_ERR(kfd_class))
  77                goto err_class_create;
  78
  79        kfd_device = device_create(kfd_class, NULL,
  80                                        MKDEV(kfd_char_dev_major, 0),
  81                                        NULL, kfd_dev_name);
  82        err = PTR_ERR(kfd_device);
  83        if (IS_ERR(kfd_device))
  84                goto err_device_create;
  85
  86        return 0;
  87
  88err_device_create:
  89        class_destroy(kfd_class);
  90err_class_create:
  91        unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
  92err_register_chrdev:
  93        return err;
  94}
  95
  96void kfd_chardev_exit(void)
  97{
  98        device_destroy(kfd_class, MKDEV(kfd_char_dev_major, 0));
  99        class_destroy(kfd_class);
 100        unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
 101        kfd_device = NULL;
 102}
 103
 104struct device *kfd_chardev(void)
 105{
 106        return kfd_device;
 107}
 108
 109
 110static int kfd_open(struct inode *inode, struct file *filep)
 111{
 112        struct kfd_process *process;
 113        bool is_32bit_user_mode;
 114
 115        if (iminor(inode) != 0)
 116                return -ENODEV;
 117
 118        is_32bit_user_mode = in_compat_syscall();
 119
 120        if (is_32bit_user_mode) {
 121                dev_warn(kfd_device,
 122                        "Process %d (32-bit) failed to open /dev/kfd\n"
 123                        "32-bit processes are not supported by amdkfd\n",
 124                        current->pid);
 125                return -EPERM;
 126        }
 127
 128        process = kfd_create_process(filep);
 129        if (IS_ERR(process))
 130                return PTR_ERR(process);
 131
 132        if (kfd_is_locked()) {
 133                dev_dbg(kfd_device, "kfd is locked!\n"
 134                                "process %d unreferenced", process->pasid);
 135                kfd_unref_process(process);
 136                return -EAGAIN;
 137        }
 138
 139        /* filep now owns the reference returned by kfd_create_process */
 140        filep->private_data = process;
 141
 142        dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n",
 143                process->pasid, process->is_32bit_user_mode);
 144
 145        return 0;
 146}
 147
 148static int kfd_release(struct inode *inode, struct file *filep)
 149{
 150        struct kfd_process *process = filep->private_data;
 151
 152        if (process)
 153                kfd_unref_process(process);
 154
 155        return 0;
 156}
 157
 158static int kfd_ioctl_get_version(struct file *filep, struct kfd_process *p,
 159                                        void *data)
 160{
 161        struct kfd_ioctl_get_version_args *args = data;
 162
 163        args->major_version = KFD_IOCTL_MAJOR_VERSION;
 164        args->minor_version = KFD_IOCTL_MINOR_VERSION;
 165
 166        return 0;
 167}
 168
 169static int set_queue_properties_from_user(struct queue_properties *q_properties,
 170                                struct kfd_ioctl_create_queue_args *args)
 171{
 172        if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
 173                pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
 174                return -EINVAL;
 175        }
 176
 177        if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
 178                pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
 179                return -EINVAL;
 180        }
 181
 182        if ((args->ring_base_address) &&
 183                (!access_ok((const void __user *) args->ring_base_address,
 184                        sizeof(uint64_t)))) {
 185                pr_err("Can't access ring base address\n");
 186                return -EFAULT;
 187        }
 188
 189        if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
 190                pr_err("Ring size must be a power of 2 or 0\n");
 191                return -EINVAL;
 192        }
 193
 194        if (!access_ok((const void __user *) args->read_pointer_address,
 195                        sizeof(uint32_t))) {
 196                pr_err("Can't access read pointer\n");
 197                return -EFAULT;
 198        }
 199
 200        if (!access_ok((const void __user *) args->write_pointer_address,
 201                        sizeof(uint32_t))) {
 202                pr_err("Can't access write pointer\n");
 203                return -EFAULT;
 204        }
 205
 206        if (args->eop_buffer_address &&
 207                !access_ok((const void __user *) args->eop_buffer_address,
 208                        sizeof(uint32_t))) {
 209                pr_debug("Can't access eop buffer");
 210                return -EFAULT;
 211        }
 212
 213        if (args->ctx_save_restore_address &&
 214                !access_ok((const void __user *) args->ctx_save_restore_address,
 215                        sizeof(uint32_t))) {
 216                pr_debug("Can't access ctx save restore buffer");
 217                return -EFAULT;
 218        }
 219
 220        q_properties->is_interop = false;
 221        q_properties->is_gws = false;
 222        q_properties->queue_percent = args->queue_percentage;
 223        q_properties->priority = args->queue_priority;
 224        q_properties->queue_address = args->ring_base_address;
 225        q_properties->queue_size = args->ring_size;
 226        q_properties->read_ptr = (uint32_t *) args->read_pointer_address;
 227        q_properties->write_ptr = (uint32_t *) args->write_pointer_address;
 228        q_properties->eop_ring_buffer_address = args->eop_buffer_address;
 229        q_properties->eop_ring_buffer_size = args->eop_buffer_size;
 230        q_properties->ctx_save_restore_area_address =
 231                        args->ctx_save_restore_address;
 232        q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size;
 233        q_properties->ctl_stack_size = args->ctl_stack_size;
 234        if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE ||
 235                args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
 236                q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
 237        else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA)
 238                q_properties->type = KFD_QUEUE_TYPE_SDMA;
 239        else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI)
 240                q_properties->type = KFD_QUEUE_TYPE_SDMA_XGMI;
 241        else
 242                return -ENOTSUPP;
 243
 244        if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
 245                q_properties->format = KFD_QUEUE_FORMAT_AQL;
 246        else
 247                q_properties->format = KFD_QUEUE_FORMAT_PM4;
 248
 249        pr_debug("Queue Percentage: %d, %d\n",
 250                        q_properties->queue_percent, args->queue_percentage);
 251
 252        pr_debug("Queue Priority: %d, %d\n",
 253                        q_properties->priority, args->queue_priority);
 254
 255        pr_debug("Queue Address: 0x%llX, 0x%llX\n",
 256                        q_properties->queue_address, args->ring_base_address);
 257
 258        pr_debug("Queue Size: 0x%llX, %u\n",
 259                        q_properties->queue_size, args->ring_size);
 260
 261        pr_debug("Queue r/w Pointers: %px, %px\n",
 262                        q_properties->read_ptr,
 263                        q_properties->write_ptr);
 264
 265        pr_debug("Queue Format: %d\n", q_properties->format);
 266
 267        pr_debug("Queue EOP: 0x%llX\n", q_properties->eop_ring_buffer_address);
 268
 269        pr_debug("Queue CTX save area: 0x%llX\n",
 270                        q_properties->ctx_save_restore_area_address);
 271
 272        return 0;
 273}
 274
 275static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
 276                                        void *data)
 277{
 278        struct kfd_ioctl_create_queue_args *args = data;
 279        struct kfd_dev *dev;
 280        int err = 0;
 281        unsigned int queue_id;
 282        struct kfd_process_device *pdd;
 283        struct queue_properties q_properties;
 284        uint32_t doorbell_offset_in_process = 0;
 285
 286        memset(&q_properties, 0, sizeof(struct queue_properties));
 287
 288        pr_debug("Creating queue ioctl\n");
 289
 290        err = set_queue_properties_from_user(&q_properties, args);
 291        if (err)
 292                return err;
 293
 294        pr_debug("Looking for gpu id 0x%x\n", args->gpu_id);
 295        dev = kfd_device_by_id(args->gpu_id);
 296        if (!dev) {
 297                pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
 298                return -EINVAL;
 299        }
 300
 301        mutex_lock(&p->mutex);
 302
 303        pdd = kfd_bind_process_to_device(dev, p);
 304        if (IS_ERR(pdd)) {
 305                err = -ESRCH;
 306                goto err_bind_process;
 307        }
 308
 309        pr_debug("Creating queue for PASID 0x%x on gpu 0x%x\n",
 310                        p->pasid,
 311                        dev->id);
 312
 313        err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id,
 314                        &doorbell_offset_in_process);
 315        if (err != 0)
 316                goto err_create_queue;
 317
 318        args->queue_id = queue_id;
 319
 320
 321        /* Return gpu_id as doorbell offset for mmap usage */
 322        args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;
 323        args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
 324        if (KFD_IS_SOC15(dev->device_info->asic_family))
 325                /* On SOC15 ASICs, include the doorbell offset within the
 326                 * process doorbell frame, which is 2 pages.
 327                 */
 328                args->doorbell_offset |= doorbell_offset_in_process;
 329
 330        mutex_unlock(&p->mutex);
 331
 332        pr_debug("Queue id %d was created successfully\n", args->queue_id);
 333
 334        pr_debug("Ring buffer address == 0x%016llX\n",
 335                        args->ring_base_address);
 336
 337        pr_debug("Read ptr address    == 0x%016llX\n",
 338                        args->read_pointer_address);
 339
 340        pr_debug("Write ptr address   == 0x%016llX\n",
 341                        args->write_pointer_address);
 342
 343        return 0;
 344
 345err_create_queue:
 346err_bind_process:
 347        mutex_unlock(&p->mutex);
 348        return err;
 349}
 350
 351static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p,
 352                                        void *data)
 353{
 354        int retval;
 355        struct kfd_ioctl_destroy_queue_args *args = data;
 356
 357        pr_debug("Destroying queue id %d for pasid 0x%x\n",
 358                                args->queue_id,
 359                                p->pasid);
 360
 361        mutex_lock(&p->mutex);
 362
 363        retval = pqm_destroy_queue(&p->pqm, args->queue_id);
 364
 365        mutex_unlock(&p->mutex);
 366        return retval;
 367}
 368
 369static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
 370                                        void *data)
 371{
 372        int retval;
 373        struct kfd_ioctl_update_queue_args *args = data;
 374        struct queue_properties properties;
 375
 376        if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
 377                pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
 378                return -EINVAL;
 379        }
 380
 381        if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
 382                pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
 383                return -EINVAL;
 384        }
 385
 386        if ((args->ring_base_address) &&
 387                (!access_ok((const void __user *) args->ring_base_address,
 388                        sizeof(uint64_t)))) {
 389                pr_err("Can't access ring base address\n");
 390                return -EFAULT;
 391        }
 392
 393        if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
 394                pr_err("Ring size must be a power of 2 or 0\n");
 395                return -EINVAL;
 396        }
 397
 398        properties.queue_address = args->ring_base_address;
 399        properties.queue_size = args->ring_size;
 400        properties.queue_percent = args->queue_percentage;
 401        properties.priority = args->queue_priority;
 402
 403        pr_debug("Updating queue id %d for pasid 0x%x\n",
 404                        args->queue_id, p->pasid);
 405
 406        mutex_lock(&p->mutex);
 407
 408        retval = pqm_update_queue(&p->pqm, args->queue_id, &properties);
 409
 410        mutex_unlock(&p->mutex);
 411
 412        return retval;
 413}
 414
 415static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p,
 416                                        void *data)
 417{
 418        int retval;
 419        const int max_num_cus = 1024;
 420        struct kfd_ioctl_set_cu_mask_args *args = data;
 421        struct queue_properties properties;
 422        uint32_t __user *cu_mask_ptr = (uint32_t __user *)args->cu_mask_ptr;
 423        size_t cu_mask_size = sizeof(uint32_t) * (args->num_cu_mask / 32);
 424
 425        if ((args->num_cu_mask % 32) != 0) {
 426                pr_debug("num_cu_mask 0x%x must be a multiple of 32",
 427                                args->num_cu_mask);
 428                return -EINVAL;
 429        }
 430
 431        properties.cu_mask_count = args->num_cu_mask;
 432        if (properties.cu_mask_count == 0) {
 433                pr_debug("CU mask cannot be 0");
 434                return -EINVAL;
 435        }
 436
 437        /* To prevent an unreasonably large CU mask size, set an arbitrary
 438         * limit of max_num_cus bits.  We can then just drop any CU mask bits
 439         * past max_num_cus bits and just use the first max_num_cus bits.
 440         */
 441        if (properties.cu_mask_count > max_num_cus) {
 442                pr_debug("CU mask cannot be greater than 1024 bits");
 443                properties.cu_mask_count = max_num_cus;
 444                cu_mask_size = sizeof(uint32_t) * (max_num_cus/32);
 445        }
 446
 447        properties.cu_mask = kzalloc(cu_mask_size, GFP_KERNEL);
 448        if (!properties.cu_mask)
 449                return -ENOMEM;
 450
 451        retval = copy_from_user(properties.cu_mask, cu_mask_ptr, cu_mask_size);
 452        if (retval) {
 453                pr_debug("Could not copy CU mask from userspace");
 454                kfree(properties.cu_mask);
 455                return -EFAULT;
 456        }
 457
 458        mutex_lock(&p->mutex);
 459
 460        retval = pqm_set_cu_mask(&p->pqm, args->queue_id, &properties);
 461
 462        mutex_unlock(&p->mutex);
 463
 464        if (retval)
 465                kfree(properties.cu_mask);
 466
 467        return retval;
 468}
 469
 470static int kfd_ioctl_get_queue_wave_state(struct file *filep,
 471                                          struct kfd_process *p, void *data)
 472{
 473        struct kfd_ioctl_get_queue_wave_state_args *args = data;
 474        int r;
 475
 476        mutex_lock(&p->mutex);
 477
 478        r = pqm_get_wave_state(&p->pqm, args->queue_id,
 479                               (void __user *)args->ctl_stack_address,
 480                               &args->ctl_stack_used_size,
 481                               &args->save_area_used_size);
 482
 483        mutex_unlock(&p->mutex);
 484
 485        return r;
 486}
 487
 488static int kfd_ioctl_set_memory_policy(struct file *filep,
 489                                        struct kfd_process *p, void *data)
 490{
 491        struct kfd_ioctl_set_memory_policy_args *args = data;
 492        struct kfd_dev *dev;
 493        int err = 0;
 494        struct kfd_process_device *pdd;
 495        enum cache_policy default_policy, alternate_policy;
 496
 497        if (args->default_policy != KFD_IOC_CACHE_POLICY_COHERENT
 498            && args->default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
 499                return -EINVAL;
 500        }
 501
 502        if (args->alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT
 503            && args->alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
 504                return -EINVAL;
 505        }
 506
 507        dev = kfd_device_by_id(args->gpu_id);
 508        if (!dev)
 509                return -EINVAL;
 510
 511        mutex_lock(&p->mutex);
 512
 513        pdd = kfd_bind_process_to_device(dev, p);
 514        if (IS_ERR(pdd)) {
 515                err = -ESRCH;
 516                goto out;
 517        }
 518
 519        default_policy = (args->default_policy == KFD_IOC_CACHE_POLICY_COHERENT)
 520                         ? cache_policy_coherent : cache_policy_noncoherent;
 521
 522        alternate_policy =
 523                (args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT)
 524                   ? cache_policy_coherent : cache_policy_noncoherent;
 525
 526        if (!dev->dqm->ops.set_cache_memory_policy(dev->dqm,
 527                                &pdd->qpd,
 528                                default_policy,
 529                                alternate_policy,
 530                                (void __user *)args->alternate_aperture_base,
 531                                args->alternate_aperture_size))
 532                err = -EINVAL;
 533
 534out:
 535        mutex_unlock(&p->mutex);
 536
 537        return err;
 538}
 539
 540static int kfd_ioctl_set_trap_handler(struct file *filep,
 541                                        struct kfd_process *p, void *data)
 542{
 543        struct kfd_ioctl_set_trap_handler_args *args = data;
 544        struct kfd_dev *dev;
 545        int err = 0;
 546        struct kfd_process_device *pdd;
 547
 548        dev = kfd_device_by_id(args->gpu_id);
 549        if (!dev)
 550                return -EINVAL;
 551
 552        mutex_lock(&p->mutex);
 553
 554        pdd = kfd_bind_process_to_device(dev, p);
 555        if (IS_ERR(pdd)) {
 556                err = -ESRCH;
 557                goto out;
 558        }
 559
 560        kfd_process_set_trap_handler(&pdd->qpd, args->tba_addr, args->tma_addr);
 561
 562out:
 563        mutex_unlock(&p->mutex);
 564
 565        return err;
 566}
 567
 568static int kfd_ioctl_dbg_register(struct file *filep,
 569                                struct kfd_process *p, void *data)
 570{
 571        struct kfd_ioctl_dbg_register_args *args = data;
 572        struct kfd_dev *dev;
 573        struct kfd_dbgmgr *dbgmgr_ptr;
 574        struct kfd_process_device *pdd;
 575        bool create_ok;
 576        long status = 0;
 577
 578        dev = kfd_device_by_id(args->gpu_id);
 579        if (!dev)
 580                return -EINVAL;
 581
 582        if (dev->device_info->asic_family == CHIP_CARRIZO) {
 583                pr_debug("kfd_ioctl_dbg_register not supported on CZ\n");
 584                return -EINVAL;
 585        }
 586
 587        mutex_lock(&p->mutex);
 588        mutex_lock(kfd_get_dbgmgr_mutex());
 589
 590        /*
 591         * make sure that we have pdd, if this the first queue created for
 592         * this process
 593         */
 594        pdd = kfd_bind_process_to_device(dev, p);
 595        if (IS_ERR(pdd)) {
 596                status = PTR_ERR(pdd);
 597                goto out;
 598        }
 599
 600        if (!dev->dbgmgr) {
 601                /* In case of a legal call, we have no dbgmgr yet */
 602                create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev);
 603                if (create_ok) {
 604                        status = kfd_dbgmgr_register(dbgmgr_ptr, p);
 605                        if (status != 0)
 606                                kfd_dbgmgr_destroy(dbgmgr_ptr);
 607                        else
 608                                dev->dbgmgr = dbgmgr_ptr;
 609                }
 610        } else {
 611                pr_debug("debugger already registered\n");
 612                status = -EINVAL;
 613        }
 614
 615out:
 616        mutex_unlock(kfd_get_dbgmgr_mutex());
 617        mutex_unlock(&p->mutex);
 618
 619        return status;
 620}
 621
 622static int kfd_ioctl_dbg_unregister(struct file *filep,
 623                                struct kfd_process *p, void *data)
 624{
 625        struct kfd_ioctl_dbg_unregister_args *args = data;
 626        struct kfd_dev *dev;
 627        long status;
 628
 629        dev = kfd_device_by_id(args->gpu_id);
 630        if (!dev || !dev->dbgmgr)
 631                return -EINVAL;
 632
 633        if (dev->device_info->asic_family == CHIP_CARRIZO) {
 634                pr_debug("kfd_ioctl_dbg_unregister not supported on CZ\n");
 635                return -EINVAL;
 636        }
 637
 638        mutex_lock(kfd_get_dbgmgr_mutex());
 639
 640        status = kfd_dbgmgr_unregister(dev->dbgmgr, p);
 641        if (!status) {
 642                kfd_dbgmgr_destroy(dev->dbgmgr);
 643                dev->dbgmgr = NULL;
 644        }
 645
 646        mutex_unlock(kfd_get_dbgmgr_mutex());
 647
 648        return status;
 649}
 650
 651/*
 652 * Parse and generate variable size data structure for address watch.
 653 * Total size of the buffer and # watch points is limited in order
 654 * to prevent kernel abuse. (no bearing to the much smaller HW limitation
 655 * which is enforced by dbgdev module)
 656 * please also note that the watch address itself are not "copied from user",
 657 * since it be set into the HW in user mode values.
 658 *
 659 */
 660static int kfd_ioctl_dbg_address_watch(struct file *filep,
 661                                        struct kfd_process *p, void *data)
 662{
 663        struct kfd_ioctl_dbg_address_watch_args *args = data;
 664        struct kfd_dev *dev;
 665        struct dbg_address_watch_info aw_info;
 666        unsigned char *args_buff;
 667        long status;
 668        void __user *cmd_from_user;
 669        uint64_t watch_mask_value = 0;
 670        unsigned int args_idx = 0;
 671
 672        memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info));
 673
 674        dev = kfd_device_by_id(args->gpu_id);
 675        if (!dev)
 676                return -EINVAL;
 677
 678        if (dev->device_info->asic_family == CHIP_CARRIZO) {
 679                pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
 680                return -EINVAL;
 681        }
 682
 683        cmd_from_user = (void __user *) args->content_ptr;
 684
 685        /* Validate arguments */
 686
 687        if ((args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE) ||
 688                (args->buf_size_in_bytes <= sizeof(*args) + sizeof(int) * 2) ||
 689                (cmd_from_user == NULL))
 690                return -EINVAL;
 691
 692        /* this is the actual buffer to work with */
 693        args_buff = memdup_user(cmd_from_user,
 694                                args->buf_size_in_bytes - sizeof(*args));
 695        if (IS_ERR(args_buff))
 696                return PTR_ERR(args_buff);
 697
 698        aw_info.process = p;
 699
 700        aw_info.num_watch_points = *((uint32_t *)(&args_buff[args_idx]));
 701        args_idx += sizeof(aw_info.num_watch_points);
 702
 703        aw_info.watch_mode = (enum HSA_DBG_WATCH_MODE *) &args_buff[args_idx];
 704        args_idx += sizeof(enum HSA_DBG_WATCH_MODE) * aw_info.num_watch_points;
 705
 706        /*
 707         * set watch address base pointer to point on the array base
 708         * within args_buff
 709         */
 710        aw_info.watch_address = (uint64_t *) &args_buff[args_idx];
 711
 712        /* skip over the addresses buffer */
 713        args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points;
 714
 715        if (args_idx >= args->buf_size_in_bytes - sizeof(*args)) {
 716                status = -EINVAL;
 717                goto out;
 718        }
 719
 720        watch_mask_value = (uint64_t) args_buff[args_idx];
 721
 722        if (watch_mask_value > 0) {
 723                /*
 724                 * There is an array of masks.
 725                 * set watch mask base pointer to point on the array base
 726                 * within args_buff
 727                 */
 728                aw_info.watch_mask = (uint64_t *) &args_buff[args_idx];
 729
 730                /* skip over the masks buffer */
 731                args_idx += sizeof(aw_info.watch_mask) *
 732                                aw_info.num_watch_points;
 733        } else {
 734                /* just the NULL mask, set to NULL and skip over it */
 735                aw_info.watch_mask = NULL;
 736                args_idx += sizeof(aw_info.watch_mask);
 737        }
 738
 739        if (args_idx >= args->buf_size_in_bytes - sizeof(args)) {
 740                status = -EINVAL;
 741                goto out;
 742        }
 743
 744        /* Currently HSA Event is not supported for DBG */
 745        aw_info.watch_event = NULL;
 746
 747        mutex_lock(kfd_get_dbgmgr_mutex());
 748
 749        status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info);
 750
 751        mutex_unlock(kfd_get_dbgmgr_mutex());
 752
 753out:
 754        kfree(args_buff);
 755
 756        return status;
 757}
 758
 759/* Parse and generate fixed size data structure for wave control */
 760static int kfd_ioctl_dbg_wave_control(struct file *filep,
 761                                        struct kfd_process *p, void *data)
 762{
 763        struct kfd_ioctl_dbg_wave_control_args *args = data;
 764        struct kfd_dev *dev;
 765        struct dbg_wave_control_info wac_info;
 766        unsigned char *args_buff;
 767        uint32_t computed_buff_size;
 768        long status;
 769        void __user *cmd_from_user;
 770        unsigned int args_idx = 0;
 771
 772        memset((void *) &wac_info, 0, sizeof(struct dbg_wave_control_info));
 773
 774        /* we use compact form, independent of the packing attribute value */
 775        computed_buff_size = sizeof(*args) +
 776                                sizeof(wac_info.mode) +
 777                                sizeof(wac_info.operand) +
 778                                sizeof(wac_info.dbgWave_msg.DbgWaveMsg) +
 779                                sizeof(wac_info.dbgWave_msg.MemoryVA) +
 780                                sizeof(wac_info.trapId);
 781
 782        dev = kfd_device_by_id(args->gpu_id);
 783        if (!dev)
 784                return -EINVAL;
 785
 786        if (dev->device_info->asic_family == CHIP_CARRIZO) {
 787                pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
 788                return -EINVAL;
 789        }
 790
 791        /* input size must match the computed "compact" size */
 792        if (args->buf_size_in_bytes != computed_buff_size) {
 793                pr_debug("size mismatch, computed : actual %u : %u\n",
 794                                args->buf_size_in_bytes, computed_buff_size);
 795                return -EINVAL;
 796        }
 797
 798        cmd_from_user = (void __user *) args->content_ptr;
 799
 800        if (cmd_from_user == NULL)
 801                return -EINVAL;
 802
 803        /* copy the entire buffer from user */
 804
 805        args_buff = memdup_user(cmd_from_user,
 806                                args->buf_size_in_bytes - sizeof(*args));
 807        if (IS_ERR(args_buff))
 808                return PTR_ERR(args_buff);
 809
 810        /* move ptr to the start of the "pay-load" area */
 811        wac_info.process = p;
 812
 813        wac_info.operand = *((enum HSA_DBG_WAVEOP *)(&args_buff[args_idx]));
 814        args_idx += sizeof(wac_info.operand);
 815
 816        wac_info.mode = *((enum HSA_DBG_WAVEMODE *)(&args_buff[args_idx]));
 817        args_idx += sizeof(wac_info.mode);
 818
 819        wac_info.trapId = *((uint32_t *)(&args_buff[args_idx]));
 820        args_idx += sizeof(wac_info.trapId);
 821
 822        wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value =
 823                                        *((uint32_t *)(&args_buff[args_idx]));
 824        wac_info.dbgWave_msg.MemoryVA = NULL;
 825
 826        mutex_lock(kfd_get_dbgmgr_mutex());
 827
 828        pr_debug("Calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n",
 829                        wac_info.process, wac_info.operand,
 830                        wac_info.mode, wac_info.trapId,
 831                        wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
 832
 833        status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info);
 834
 835        pr_debug("Returned status of dbg manager is %ld\n", status);
 836
 837        mutex_unlock(kfd_get_dbgmgr_mutex());
 838
 839        kfree(args_buff);
 840
 841        return status;
 842}
 843
 844static int kfd_ioctl_get_clock_counters(struct file *filep,
 845                                struct kfd_process *p, void *data)
 846{
 847        struct kfd_ioctl_get_clock_counters_args *args = data;
 848        struct kfd_dev *dev;
 849
 850        dev = kfd_device_by_id(args->gpu_id);
 851        if (dev)
 852                /* Reading GPU clock counter from KGD */
 853                args->gpu_clock_counter = amdgpu_amdkfd_get_gpu_clock_counter(dev->kgd);
 854        else
 855                /* Node without GPU resource */
 856                args->gpu_clock_counter = 0;
 857
 858        /* No access to rdtsc. Using raw monotonic time */
 859        args->cpu_clock_counter = ktime_get_raw_ns();
 860        args->system_clock_counter = ktime_get_boottime_ns();
 861
 862        /* Since the counter is in nano-seconds we use 1GHz frequency */
 863        args->system_clock_freq = 1000000000;
 864
 865        return 0;
 866}
 867
 868
 869static int kfd_ioctl_get_process_apertures(struct file *filp,
 870                                struct kfd_process *p, void *data)
 871{
 872        struct kfd_ioctl_get_process_apertures_args *args = data;
 873        struct kfd_process_device_apertures *pAperture;
 874        int i;
 875
 876        dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
 877
 878        args->num_of_nodes = 0;
 879
 880        mutex_lock(&p->mutex);
 881        /* Run over all pdd of the process */
 882        for (i = 0; i < p->n_pdds; i++) {
 883                struct kfd_process_device *pdd = p->pdds[i];
 884
 885                pAperture =
 886                        &args->process_apertures[args->num_of_nodes];
 887                pAperture->gpu_id = pdd->dev->id;
 888                pAperture->lds_base = pdd->lds_base;
 889                pAperture->lds_limit = pdd->lds_limit;
 890                pAperture->gpuvm_base = pdd->gpuvm_base;
 891                pAperture->gpuvm_limit = pdd->gpuvm_limit;
 892                pAperture->scratch_base = pdd->scratch_base;
 893                pAperture->scratch_limit = pdd->scratch_limit;
 894
 895                dev_dbg(kfd_device,
 896                        "node id %u\n", args->num_of_nodes);
 897                dev_dbg(kfd_device,
 898                        "gpu id %u\n", pdd->dev->id);
 899                dev_dbg(kfd_device,
 900                        "lds_base %llX\n", pdd->lds_base);
 901                dev_dbg(kfd_device,
 902                        "lds_limit %llX\n", pdd->lds_limit);
 903                dev_dbg(kfd_device,
 904                        "gpuvm_base %llX\n", pdd->gpuvm_base);
 905                dev_dbg(kfd_device,
 906                        "gpuvm_limit %llX\n", pdd->gpuvm_limit);
 907                dev_dbg(kfd_device,
 908                        "scratch_base %llX\n", pdd->scratch_base);
 909                dev_dbg(kfd_device,
 910                        "scratch_limit %llX\n", pdd->scratch_limit);
 911
 912                if (++args->num_of_nodes >= NUM_OF_SUPPORTED_GPUS)
 913                        break;
 914        }
 915        mutex_unlock(&p->mutex);
 916
 917        return 0;
 918}
 919
 920static int kfd_ioctl_get_process_apertures_new(struct file *filp,
 921                                struct kfd_process *p, void *data)
 922{
 923        struct kfd_ioctl_get_process_apertures_new_args *args = data;
 924        struct kfd_process_device_apertures *pa;
 925        int ret;
 926        int i;
 927
 928        dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
 929
 930        if (args->num_of_nodes == 0) {
 931                /* Return number of nodes, so that user space can alloacate
 932                 * sufficient memory
 933                 */
 934                mutex_lock(&p->mutex);
 935                args->num_of_nodes = p->n_pdds;
 936                goto out_unlock;
 937        }
 938
 939        /* Fill in process-aperture information for all available
 940         * nodes, but not more than args->num_of_nodes as that is
 941         * the amount of memory allocated by user
 942         */
 943        pa = kzalloc((sizeof(struct kfd_process_device_apertures) *
 944                                args->num_of_nodes), GFP_KERNEL);
 945        if (!pa)
 946                return -ENOMEM;
 947
 948        mutex_lock(&p->mutex);
 949
 950        if (!p->n_pdds) {
 951                args->num_of_nodes = 0;
 952                kfree(pa);
 953                goto out_unlock;
 954        }
 955
 956        /* Run over all pdd of the process */
 957        for (i = 0; i < min(p->n_pdds, args->num_of_nodes); i++) {
 958                struct kfd_process_device *pdd = p->pdds[i];
 959
 960                pa[i].gpu_id = pdd->dev->id;
 961                pa[i].lds_base = pdd->lds_base;
 962                pa[i].lds_limit = pdd->lds_limit;
 963                pa[i].gpuvm_base = pdd->gpuvm_base;
 964                pa[i].gpuvm_limit = pdd->gpuvm_limit;
 965                pa[i].scratch_base = pdd->scratch_base;
 966                pa[i].scratch_limit = pdd->scratch_limit;
 967
 968                dev_dbg(kfd_device,
 969                        "gpu id %u\n", pdd->dev->id);
 970                dev_dbg(kfd_device,
 971                        "lds_base %llX\n", pdd->lds_base);
 972                dev_dbg(kfd_device,
 973                        "lds_limit %llX\n", pdd->lds_limit);
 974                dev_dbg(kfd_device,
 975                        "gpuvm_base %llX\n", pdd->gpuvm_base);
 976                dev_dbg(kfd_device,
 977                        "gpuvm_limit %llX\n", pdd->gpuvm_limit);
 978                dev_dbg(kfd_device,
 979                        "scratch_base %llX\n", pdd->scratch_base);
 980                dev_dbg(kfd_device,
 981                        "scratch_limit %llX\n", pdd->scratch_limit);
 982        }
 983        mutex_unlock(&p->mutex);
 984
 985        args->num_of_nodes = i;
 986        ret = copy_to_user(
 987                        (void __user *)args->kfd_process_device_apertures_ptr,
 988                        pa,
 989                        (i * sizeof(struct kfd_process_device_apertures)));
 990        kfree(pa);
 991        return ret ? -EFAULT : 0;
 992
 993out_unlock:
 994        mutex_unlock(&p->mutex);
 995        return 0;
 996}
 997
 998static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
 999                                        void *data)
1000{
1001        struct kfd_ioctl_create_event_args *args = data;
1002        int err;
1003
1004        /* For dGPUs the event page is allocated in user mode. The
1005         * handle is passed to KFD with the first call to this IOCTL
1006         * through the event_page_offset field.
1007         */
1008        if (args->event_page_offset) {
1009                struct kfd_dev *kfd;
1010                struct kfd_process_device *pdd;
1011                void *mem, *kern_addr;
1012                uint64_t size;
1013
1014                if (p->signal_page) {
1015                        pr_err("Event page is already set\n");
1016                        return -EINVAL;
1017                }
1018
1019                kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset));
1020                if (!kfd) {
1021                        pr_err("Getting device by id failed in %s\n", __func__);
1022                        return -EINVAL;
1023                }
1024
1025                mutex_lock(&p->mutex);
1026                pdd = kfd_bind_process_to_device(kfd, p);
1027                if (IS_ERR(pdd)) {
1028                        err = PTR_ERR(pdd);
1029                        goto out_unlock;
1030                }
1031
1032                mem = kfd_process_device_translate_handle(pdd,
1033                                GET_IDR_HANDLE(args->event_page_offset));
1034                if (!mem) {
1035                        pr_err("Can't find BO, offset is 0x%llx\n",
1036                               args->event_page_offset);
1037                        err = -EINVAL;
1038                        goto out_unlock;
1039                }
1040                mutex_unlock(&p->mutex);
1041
1042                err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->kgd,
1043                                                mem, &kern_addr, &size);
1044                if (err) {
1045                        pr_err("Failed to map event page to kernel\n");
1046                        return err;
1047                }
1048
1049                err = kfd_event_page_set(p, kern_addr, size);
1050                if (err) {
1051                        pr_err("Failed to set event page\n");
1052                        return err;
1053                }
1054        }
1055
1056        err = kfd_event_create(filp, p, args->event_type,
1057                                args->auto_reset != 0, args->node_id,
1058                                &args->event_id, &args->event_trigger_data,
1059                                &args->event_page_offset,
1060                                &args->event_slot_index);
1061
1062        return err;
1063
1064out_unlock:
1065        mutex_unlock(&p->mutex);
1066        return err;
1067}
1068
1069static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p,
1070                                        void *data)
1071{
1072        struct kfd_ioctl_destroy_event_args *args = data;
1073
1074        return kfd_event_destroy(p, args->event_id);
1075}
1076
1077static int kfd_ioctl_set_event(struct file *filp, struct kfd_process *p,
1078                                void *data)
1079{
1080        struct kfd_ioctl_set_event_args *args = data;
1081
1082        return kfd_set_event(p, args->event_id);
1083}
1084
1085static int kfd_ioctl_reset_event(struct file *filp, struct kfd_process *p,
1086                                void *data)
1087{
1088        struct kfd_ioctl_reset_event_args *args = data;
1089
1090        return kfd_reset_event(p, args->event_id);
1091}
1092
1093static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p,
1094                                void *data)
1095{
1096        struct kfd_ioctl_wait_events_args *args = data;
1097        int err;
1098
1099        err = kfd_wait_on_events(p, args->num_events,
1100                        (void __user *)args->events_ptr,
1101                        (args->wait_for_all != 0),
1102                        args->timeout, &args->wait_result);
1103
1104        return err;
1105}
1106static int kfd_ioctl_set_scratch_backing_va(struct file *filep,
1107                                        struct kfd_process *p, void *data)
1108{
1109        struct kfd_ioctl_set_scratch_backing_va_args *args = data;
1110        struct kfd_process_device *pdd;
1111        struct kfd_dev *dev;
1112        long err;
1113
1114        dev = kfd_device_by_id(args->gpu_id);
1115        if (!dev)
1116                return -EINVAL;
1117
1118        mutex_lock(&p->mutex);
1119
1120        pdd = kfd_bind_process_to_device(dev, p);
1121        if (IS_ERR(pdd)) {
1122                err = PTR_ERR(pdd);
1123                goto bind_process_to_device_fail;
1124        }
1125
1126        pdd->qpd.sh_hidden_private_base = args->va_addr;
1127
1128        mutex_unlock(&p->mutex);
1129
1130        if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS &&
1131            pdd->qpd.vmid != 0 && dev->kfd2kgd->set_scratch_backing_va)
1132                dev->kfd2kgd->set_scratch_backing_va(
1133                        dev->kgd, args->va_addr, pdd->qpd.vmid);
1134
1135        return 0;
1136
1137bind_process_to_device_fail:
1138        mutex_unlock(&p->mutex);
1139        return err;
1140}
1141
1142static int kfd_ioctl_get_tile_config(struct file *filep,
1143                struct kfd_process *p, void *data)
1144{
1145        struct kfd_ioctl_get_tile_config_args *args = data;
1146        struct kfd_dev *dev;
1147        struct tile_config config;
1148        int err = 0;
1149
1150        dev = kfd_device_by_id(args->gpu_id);
1151        if (!dev)
1152                return -EINVAL;
1153
1154        amdgpu_amdkfd_get_tile_config(dev->kgd, &config);
1155
1156        args->gb_addr_config = config.gb_addr_config;
1157        args->num_banks = config.num_banks;
1158        args->num_ranks = config.num_ranks;
1159
1160        if (args->num_tile_configs > config.num_tile_configs)
1161                args->num_tile_configs = config.num_tile_configs;
1162        err = copy_to_user((void __user *)args->tile_config_ptr,
1163                        config.tile_config_ptr,
1164                        args->num_tile_configs * sizeof(uint32_t));
1165        if (err) {
1166                args->num_tile_configs = 0;
1167                return -EFAULT;
1168        }
1169
1170        if (args->num_macro_tile_configs > config.num_macro_tile_configs)
1171                args->num_macro_tile_configs =
1172                                config.num_macro_tile_configs;
1173        err = copy_to_user((void __user *)args->macro_tile_config_ptr,
1174                        config.macro_tile_config_ptr,
1175                        args->num_macro_tile_configs * sizeof(uint32_t));
1176        if (err) {
1177                args->num_macro_tile_configs = 0;
1178                return -EFAULT;
1179        }
1180
1181        return 0;
1182}
1183
1184static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p,
1185                                void *data)
1186{
1187        struct kfd_ioctl_acquire_vm_args *args = data;
1188        struct kfd_process_device *pdd;
1189        struct kfd_dev *dev;
1190        struct file *drm_file;
1191        int ret;
1192
1193        dev = kfd_device_by_id(args->gpu_id);
1194        if (!dev)
1195                return -EINVAL;
1196
1197        drm_file = fget(args->drm_fd);
1198        if (!drm_file)
1199                return -EINVAL;
1200
1201        mutex_lock(&p->mutex);
1202
1203        pdd = kfd_get_process_device_data(dev, p);
1204        if (!pdd) {
1205                ret = -EINVAL;
1206                goto err_unlock;
1207        }
1208
1209        if (pdd->drm_file) {
1210                ret = pdd->drm_file == drm_file ? 0 : -EBUSY;
1211                goto err_unlock;
1212        }
1213
1214        ret = kfd_process_device_init_vm(pdd, drm_file);
1215        if (ret)
1216                goto err_unlock;
1217        /* On success, the PDD keeps the drm_file reference */
1218        mutex_unlock(&p->mutex);
1219
1220        return 0;
1221
1222err_unlock:
1223        mutex_unlock(&p->mutex);
1224        fput(drm_file);
1225        return ret;
1226}
1227
1228bool kfd_dev_is_large_bar(struct kfd_dev *dev)
1229{
1230        struct kfd_local_mem_info mem_info;
1231
1232        if (debug_largebar) {
1233                pr_debug("Simulate large-bar allocation on non large-bar machine\n");
1234                return true;
1235        }
1236
1237        if (dev->use_iommu_v2)
1238                return false;
1239
1240        amdgpu_amdkfd_get_local_mem_info(dev->kgd, &mem_info);
1241        if (mem_info.local_mem_size_private == 0 &&
1242                        mem_info.local_mem_size_public > 0)
1243                return true;
1244        return false;
1245}
1246
1247static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
1248                                        struct kfd_process *p, void *data)
1249{
1250        struct kfd_ioctl_alloc_memory_of_gpu_args *args = data;
1251        struct kfd_process_device *pdd;
1252        void *mem;
1253        struct kfd_dev *dev;
1254        int idr_handle;
1255        long err;
1256        uint64_t offset = args->mmap_offset;
1257        uint32_t flags = args->flags;
1258
1259        if (args->size == 0)
1260                return -EINVAL;
1261
1262        dev = kfd_device_by_id(args->gpu_id);
1263        if (!dev)
1264                return -EINVAL;
1265
1266        if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) &&
1267                (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) &&
1268                !kfd_dev_is_large_bar(dev)) {
1269                pr_err("Alloc host visible vram on small bar is not allowed\n");
1270                return -EINVAL;
1271        }
1272
1273        mutex_lock(&p->mutex);
1274
1275        pdd = kfd_bind_process_to_device(dev, p);
1276        if (IS_ERR(pdd)) {
1277                err = PTR_ERR(pdd);
1278                goto err_unlock;
1279        }
1280
1281        if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
1282                if (args->size != kfd_doorbell_process_slice(dev)) {
1283                        err = -EINVAL;
1284                        goto err_unlock;
1285                }
1286                offset = kfd_get_process_doorbells(pdd);
1287        } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
1288                if (args->size != PAGE_SIZE) {
1289                        err = -EINVAL;
1290                        goto err_unlock;
1291                }
1292                offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
1293                if (!offset) {
1294                        err = -ENOMEM;
1295                        goto err_unlock;
1296                }
1297        }
1298
1299        err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
1300                dev->kgd, args->va_addr, args->size,
1301                pdd->drm_priv, (struct kgd_mem **) &mem, &offset,
1302                flags);
1303
1304        if (err)
1305                goto err_unlock;
1306
1307        idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
1308        if (idr_handle < 0) {
1309                err = -EFAULT;
1310                goto err_free;
1311        }
1312
1313        /* Update the VRAM usage count */
1314        if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
1315                WRITE_ONCE(pdd->vram_usage, pdd->vram_usage + args->size);
1316
1317        mutex_unlock(&p->mutex);
1318
1319        args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
1320        args->mmap_offset = offset;
1321
1322        /* MMIO is mapped through kfd device
1323         * Generate a kfd mmap offset
1324         */
1325        if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)
1326                args->mmap_offset = KFD_MMAP_TYPE_MMIO
1327                                        | KFD_MMAP_GPU_ID(args->gpu_id);
1328
1329        return 0;
1330
1331err_free:
1332        amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem,
1333                                               pdd->drm_priv, NULL);
1334err_unlock:
1335        mutex_unlock(&p->mutex);
1336        return err;
1337}
1338
1339static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
1340                                        struct kfd_process *p, void *data)
1341{
1342        struct kfd_ioctl_free_memory_of_gpu_args *args = data;
1343        struct kfd_process_device *pdd;
1344        void *mem;
1345        struct kfd_dev *dev;
1346        int ret;
1347        uint64_t size = 0;
1348
1349        dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1350        if (!dev)
1351                return -EINVAL;
1352
1353        mutex_lock(&p->mutex);
1354
1355        pdd = kfd_get_process_device_data(dev, p);
1356        if (!pdd) {
1357                pr_err("Process device data doesn't exist\n");
1358                ret = -EINVAL;
1359                goto err_unlock;
1360        }
1361
1362        mem = kfd_process_device_translate_handle(
1363                pdd, GET_IDR_HANDLE(args->handle));
1364        if (!mem) {
1365                ret = -EINVAL;
1366                goto err_unlock;
1367        }
1368
1369        ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd,
1370                                (struct kgd_mem *)mem, pdd->drm_priv, &size);
1371
1372        /* If freeing the buffer failed, leave the handle in place for
1373         * clean-up during process tear-down.
1374         */
1375        if (!ret)
1376                kfd_process_device_remove_obj_handle(
1377                        pdd, GET_IDR_HANDLE(args->handle));
1378
1379        WRITE_ONCE(pdd->vram_usage, pdd->vram_usage - size);
1380
1381err_unlock:
1382        mutex_unlock(&p->mutex);
1383        return ret;
1384}
1385
1386static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
1387                                        struct kfd_process *p, void *data)
1388{
1389        struct kfd_ioctl_map_memory_to_gpu_args *args = data;
1390        struct kfd_process_device *pdd, *peer_pdd;
1391        void *mem;
1392        struct kfd_dev *dev, *peer;
1393        long err = 0;
1394        int i;
1395        uint32_t *devices_arr = NULL;
1396        bool table_freed = false;
1397
1398        dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1399        if (!dev)
1400                return -EINVAL;
1401
1402        if (!args->n_devices) {
1403                pr_debug("Device IDs array empty\n");
1404                return -EINVAL;
1405        }
1406        if (args->n_success > args->n_devices) {
1407                pr_debug("n_success exceeds n_devices\n");
1408                return -EINVAL;
1409        }
1410
1411        devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
1412                                    GFP_KERNEL);
1413        if (!devices_arr)
1414                return -ENOMEM;
1415
1416        err = copy_from_user(devices_arr,
1417                             (void __user *)args->device_ids_array_ptr,
1418                             args->n_devices * sizeof(*devices_arr));
1419        if (err != 0) {
1420                err = -EFAULT;
1421                goto copy_from_user_failed;
1422        }
1423
1424        mutex_lock(&p->mutex);
1425
1426        pdd = kfd_bind_process_to_device(dev, p);
1427        if (IS_ERR(pdd)) {
1428                err = PTR_ERR(pdd);
1429                goto bind_process_to_device_failed;
1430        }
1431
1432        mem = kfd_process_device_translate_handle(pdd,
1433                                                GET_IDR_HANDLE(args->handle));
1434        if (!mem) {
1435                err = -ENOMEM;
1436                goto get_mem_obj_from_handle_failed;
1437        }
1438
1439        for (i = args->n_success; i < args->n_devices; i++) {
1440                peer = kfd_device_by_id(devices_arr[i]);
1441                if (!peer) {
1442                        pr_debug("Getting device by id failed for 0x%x\n",
1443                                 devices_arr[i]);
1444                        err = -EINVAL;
1445                        goto get_mem_obj_from_handle_failed;
1446                }
1447
1448                peer_pdd = kfd_bind_process_to_device(peer, p);
1449                if (IS_ERR(peer_pdd)) {
1450                        err = PTR_ERR(peer_pdd);
1451                        goto get_mem_obj_from_handle_failed;
1452                }
1453                err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
1454                        peer->kgd, (struct kgd_mem *)mem,
1455                        peer_pdd->drm_priv, &table_freed);
1456                if (err) {
1457                        pr_err("Failed to map to gpu %d/%d\n",
1458                               i, args->n_devices);
1459                        goto map_memory_to_gpu_failed;
1460                }
1461                args->n_success = i+1;
1462        }
1463
1464        mutex_unlock(&p->mutex);
1465
1466        err = amdgpu_amdkfd_gpuvm_sync_memory(dev->kgd, (struct kgd_mem *) mem, true);
1467        if (err) {
1468                pr_debug("Sync memory failed, wait interrupted by user signal\n");
1469                goto sync_memory_failed;
1470        }
1471
1472        /* Flush TLBs after waiting for the page table updates to complete */
1473        if (table_freed) {
1474                for (i = 0; i < args->n_devices; i++) {
1475                        peer = kfd_device_by_id(devices_arr[i]);
1476                        if (WARN_ON_ONCE(!peer))
1477                                continue;
1478                        peer_pdd = kfd_get_process_device_data(peer, p);
1479                        if (WARN_ON_ONCE(!peer_pdd))
1480                                continue;
1481                        kfd_flush_tlb(peer_pdd, TLB_FLUSH_LEGACY);
1482                }
1483        }
1484        kfree(devices_arr);
1485
1486        return err;
1487
1488bind_process_to_device_failed:
1489get_mem_obj_from_handle_failed:
1490map_memory_to_gpu_failed:
1491        mutex_unlock(&p->mutex);
1492copy_from_user_failed:
1493sync_memory_failed:
1494        kfree(devices_arr);
1495
1496        return err;
1497}
1498
1499static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
1500                                        struct kfd_process *p, void *data)
1501{
1502        struct kfd_ioctl_unmap_memory_from_gpu_args *args = data;
1503        struct kfd_process_device *pdd, *peer_pdd;
1504        void *mem;
1505        struct kfd_dev *dev, *peer;
1506        long err = 0;
1507        uint32_t *devices_arr = NULL, i;
1508
1509        dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1510        if (!dev)
1511                return -EINVAL;
1512
1513        if (!args->n_devices) {
1514                pr_debug("Device IDs array empty\n");
1515                return -EINVAL;
1516        }
1517        if (args->n_success > args->n_devices) {
1518                pr_debug("n_success exceeds n_devices\n");
1519                return -EINVAL;
1520        }
1521
1522        devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
1523                                    GFP_KERNEL);
1524        if (!devices_arr)
1525                return -ENOMEM;
1526
1527        err = copy_from_user(devices_arr,
1528                             (void __user *)args->device_ids_array_ptr,
1529                             args->n_devices * sizeof(*devices_arr));
1530        if (err != 0) {
1531                err = -EFAULT;
1532                goto copy_from_user_failed;
1533        }
1534
1535        mutex_lock(&p->mutex);
1536
1537        pdd = kfd_get_process_device_data(dev, p);
1538        if (!pdd) {
1539                err = -EINVAL;
1540                goto bind_process_to_device_failed;
1541        }
1542
1543        mem = kfd_process_device_translate_handle(pdd,
1544                                                GET_IDR_HANDLE(args->handle));
1545        if (!mem) {
1546                err = -ENOMEM;
1547                goto get_mem_obj_from_handle_failed;
1548        }
1549
1550        for (i = args->n_success; i < args->n_devices; i++) {
1551                peer = kfd_device_by_id(devices_arr[i]);
1552                if (!peer) {
1553                        err = -EINVAL;
1554                        goto get_mem_obj_from_handle_failed;
1555                }
1556
1557                peer_pdd = kfd_get_process_device_data(peer, p);
1558                if (!peer_pdd) {
1559                        err = -ENODEV;
1560                        goto get_mem_obj_from_handle_failed;
1561                }
1562                err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
1563                        peer->kgd, (struct kgd_mem *)mem, peer_pdd->drm_priv);
1564                if (err) {
1565                        pr_err("Failed to unmap from gpu %d/%d\n",
1566                               i, args->n_devices);
1567                        goto unmap_memory_from_gpu_failed;
1568                }
1569                args->n_success = i+1;
1570        }
1571        mutex_unlock(&p->mutex);
1572
1573        if (dev->device_info->asic_family == CHIP_ALDEBARAN) {
1574                err = amdgpu_amdkfd_gpuvm_sync_memory(dev->kgd,
1575                                (struct kgd_mem *) mem, true);
1576                if (err) {
1577                        pr_debug("Sync memory failed, wait interrupted by user signal\n");
1578                        goto sync_memory_failed;
1579                }
1580
1581                /* Flush TLBs after waiting for the page table updates to complete */
1582                for (i = 0; i < args->n_devices; i++) {
1583                        peer = kfd_device_by_id(devices_arr[i]);
1584                        if (WARN_ON_ONCE(!peer))
1585                                continue;
1586                        peer_pdd = kfd_get_process_device_data(peer, p);
1587                        if (WARN_ON_ONCE(!peer_pdd))
1588                                continue;
1589                        kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT);
1590                }
1591        }
1592        kfree(devices_arr);
1593
1594        return 0;
1595
1596bind_process_to_device_failed:
1597get_mem_obj_from_handle_failed:
1598unmap_memory_from_gpu_failed:
1599        mutex_unlock(&p->mutex);
1600copy_from_user_failed:
1601sync_memory_failed:
1602        kfree(devices_arr);
1603        return err;
1604}
1605
1606static int kfd_ioctl_alloc_queue_gws(struct file *filep,
1607                struct kfd_process *p, void *data)
1608{
1609        int retval;
1610        struct kfd_ioctl_alloc_queue_gws_args *args = data;
1611        struct queue *q;
1612        struct kfd_dev *dev;
1613
1614        mutex_lock(&p->mutex);
1615        q = pqm_get_user_queue(&p->pqm, args->queue_id);
1616
1617        if (q) {
1618                dev = q->device;
1619        } else {
1620                retval = -EINVAL;
1621                goto out_unlock;
1622        }
1623
1624        if (!dev->gws) {
1625                retval = -ENODEV;
1626                goto out_unlock;
1627        }
1628
1629        if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
1630                retval = -ENODEV;
1631                goto out_unlock;
1632        }
1633
1634        retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL);
1635        mutex_unlock(&p->mutex);
1636
1637        args->first_gws = 0;
1638        return retval;
1639
1640out_unlock:
1641        mutex_unlock(&p->mutex);
1642        return retval;
1643}
1644
1645static int kfd_ioctl_get_dmabuf_info(struct file *filep,
1646                struct kfd_process *p, void *data)
1647{
1648        struct kfd_ioctl_get_dmabuf_info_args *args = data;
1649        struct kfd_dev *dev = NULL;
1650        struct kgd_dev *dma_buf_kgd;
1651        void *metadata_buffer = NULL;
1652        uint32_t flags;
1653        unsigned int i;
1654        int r;
1655
1656        /* Find a KFD GPU device that supports the get_dmabuf_info query */
1657        for (i = 0; kfd_topology_enum_kfd_devices(i, &dev) == 0; i++)
1658                if (dev)
1659                        break;
1660        if (!dev)
1661                return -EINVAL;
1662
1663        if (args->metadata_ptr) {
1664                metadata_buffer = kzalloc(args->metadata_size, GFP_KERNEL);
1665                if (!metadata_buffer)
1666                        return -ENOMEM;
1667        }
1668
1669        /* Get dmabuf info from KGD */
1670        r = amdgpu_amdkfd_get_dmabuf_info(dev->kgd, args->dmabuf_fd,
1671                                          &dma_buf_kgd, &args->size,
1672                                          metadata_buffer, args->metadata_size,
1673                                          &args->metadata_size, &flags);
1674        if (r)
1675                goto exit;
1676
1677        /* Reverse-lookup gpu_id from kgd pointer */
1678        dev = kfd_device_by_kgd(dma_buf_kgd);
1679        if (!dev) {
1680                r = -EINVAL;
1681                goto exit;
1682        }
1683        args->gpu_id = dev->id;
1684        args->flags = flags;
1685
1686        /* Copy metadata buffer to user mode */
1687        if (metadata_buffer) {
1688                r = copy_to_user((void __user *)args->metadata_ptr,
1689                                 metadata_buffer, args->metadata_size);
1690                if (r != 0)
1691                        r = -EFAULT;
1692        }
1693
1694exit:
1695        kfree(metadata_buffer);
1696
1697        return r;
1698}
1699
1700static int kfd_ioctl_import_dmabuf(struct file *filep,
1701                                   struct kfd_process *p, void *data)
1702{
1703        struct kfd_ioctl_import_dmabuf_args *args = data;
1704        struct kfd_process_device *pdd;
1705        struct dma_buf *dmabuf;
1706        struct kfd_dev *dev;
1707        int idr_handle;
1708        uint64_t size;
1709        void *mem;
1710        int r;
1711
1712        dev = kfd_device_by_id(args->gpu_id);
1713        if (!dev)
1714                return -EINVAL;
1715
1716        dmabuf = dma_buf_get(args->dmabuf_fd);
1717        if (IS_ERR(dmabuf))
1718                return PTR_ERR(dmabuf);
1719
1720        mutex_lock(&p->mutex);
1721
1722        pdd = kfd_bind_process_to_device(dev, p);
1723        if (IS_ERR(pdd)) {
1724                r = PTR_ERR(pdd);
1725                goto err_unlock;
1726        }
1727
1728        r = amdgpu_amdkfd_gpuvm_import_dmabuf(dev->kgd, dmabuf,
1729                                              args->va_addr, pdd->drm_priv,
1730                                              (struct kgd_mem **)&mem, &size,
1731                                              NULL);
1732        if (r)
1733                goto err_unlock;
1734
1735        idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
1736        if (idr_handle < 0) {
1737                r = -EFAULT;
1738                goto err_free;
1739        }
1740
1741        mutex_unlock(&p->mutex);
1742        dma_buf_put(dmabuf);
1743
1744        args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
1745
1746        return 0;
1747
1748err_free:
1749        amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem,
1750                                               pdd->drm_priv, NULL);
1751err_unlock:
1752        mutex_unlock(&p->mutex);
1753        dma_buf_put(dmabuf);
1754        return r;
1755}
1756
1757/* Handle requests for watching SMI events */
1758static int kfd_ioctl_smi_events(struct file *filep,
1759                                struct kfd_process *p, void *data)
1760{
1761        struct kfd_ioctl_smi_events_args *args = data;
1762        struct kfd_dev *dev;
1763
1764        dev = kfd_device_by_id(args->gpuid);
1765        if (!dev)
1766                return -EINVAL;
1767
1768        return kfd_smi_event_open(dev, &args->anon_fd);
1769}
1770
1771static int kfd_ioctl_set_xnack_mode(struct file *filep,
1772                                    struct kfd_process *p, void *data)
1773{
1774        struct kfd_ioctl_set_xnack_mode_args *args = data;
1775        int r = 0;
1776
1777        mutex_lock(&p->mutex);
1778        if (args->xnack_enabled >= 0) {
1779                if (!list_empty(&p->pqm.queues)) {
1780                        pr_debug("Process has user queues running\n");
1781                        mutex_unlock(&p->mutex);
1782                        return -EBUSY;
1783                }
1784                if (args->xnack_enabled && !kfd_process_xnack_mode(p, true))
1785                        r = -EPERM;
1786                else
1787                        p->xnack_enabled = args->xnack_enabled;
1788        } else {
1789                args->xnack_enabled = p->xnack_enabled;
1790        }
1791        mutex_unlock(&p->mutex);
1792
1793        return r;
1794}
1795
1796#if IS_ENABLED(CONFIG_HSA_AMD_SVM)
1797static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
1798{
1799        struct kfd_ioctl_svm_args *args = data;
1800        int r = 0;
1801
1802        pr_debug("start 0x%llx size 0x%llx op 0x%x nattr 0x%x\n",
1803                 args->start_addr, args->size, args->op, args->nattr);
1804
1805        if ((args->start_addr & ~PAGE_MASK) || (args->size & ~PAGE_MASK))
1806                return -EINVAL;
1807        if (!args->start_addr || !args->size)
1808                return -EINVAL;
1809
1810        mutex_lock(&p->mutex);
1811
1812        r = svm_ioctl(p, args->op, args->start_addr, args->size, args->nattr,
1813                      args->attrs);
1814
1815        mutex_unlock(&p->mutex);
1816
1817        return r;
1818}
1819#else
1820static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
1821{
1822        return -EPERM;
1823}
1824#endif
1825
1826#define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
1827        [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
1828                            .cmd_drv = 0, .name = #ioctl}
1829
1830/** Ioctl table */
1831static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
1832        AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_VERSION,
1833                        kfd_ioctl_get_version, 0),
1834
1835        AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_QUEUE,
1836                        kfd_ioctl_create_queue, 0),
1837
1838        AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_QUEUE,
1839                        kfd_ioctl_destroy_queue, 0),
1840
1841        AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_MEMORY_POLICY,
1842                        kfd_ioctl_set_memory_policy, 0),
1843
1844        AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_CLOCK_COUNTERS,
1845                        kfd_ioctl_get_clock_counters, 0),
1846
1847        AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES,
1848                        kfd_ioctl_get_process_apertures, 0),
1849
1850        AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE,
1851                        kfd_ioctl_update_queue, 0),
1852
1853        AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_EVENT,
1854                        kfd_ioctl_create_event, 0),
1855
1856        AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_EVENT,
1857                        kfd_ioctl_destroy_event, 0),
1858
1859        AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_EVENT,
1860                        kfd_ioctl_set_event, 0),
1861
1862        AMDKFD_IOCTL_DEF(AMDKFD_IOC_RESET_EVENT,
1863                        kfd_ioctl_reset_event, 0),
1864
1865        AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS,
1866                        kfd_ioctl_wait_events, 0),
1867
1868        AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER,
1869                        kfd_ioctl_dbg_register, 0),
1870
1871        AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER,
1872                        kfd_ioctl_dbg_unregister, 0),
1873
1874        AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH,
1875                        kfd_ioctl_dbg_address_watch, 0),
1876
1877        AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL,
1878                        kfd_ioctl_dbg_wave_control, 0),
1879
1880        AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA,
1881                        kfd_ioctl_set_scratch_backing_va, 0),
1882
1883        AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG,
1884                        kfd_ioctl_get_tile_config, 0),
1885
1886        AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER,
1887                        kfd_ioctl_set_trap_handler, 0),
1888
1889        AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW,
1890                        kfd_ioctl_get_process_apertures_new, 0),
1891
1892        AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM,
1893                        kfd_ioctl_acquire_vm, 0),
1894
1895        AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU,
1896                        kfd_ioctl_alloc_memory_of_gpu, 0),
1897
1898        AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU,
1899                        kfd_ioctl_free_memory_of_gpu, 0),
1900
1901        AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU,
1902                        kfd_ioctl_map_memory_to_gpu, 0),
1903
1904        AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU,
1905                        kfd_ioctl_unmap_memory_from_gpu, 0),
1906
1907        AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_CU_MASK,
1908                        kfd_ioctl_set_cu_mask, 0),
1909
1910        AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_QUEUE_WAVE_STATE,
1911                        kfd_ioctl_get_queue_wave_state, 0),
1912
1913        AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_DMABUF_INFO,
1914                                kfd_ioctl_get_dmabuf_info, 0),
1915
1916        AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF,
1917                                kfd_ioctl_import_dmabuf, 0),
1918
1919        AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS,
1920                        kfd_ioctl_alloc_queue_gws, 0),
1921
1922        AMDKFD_IOCTL_DEF(AMDKFD_IOC_SMI_EVENTS,
1923                        kfd_ioctl_smi_events, 0),
1924
1925        AMDKFD_IOCTL_DEF(AMDKFD_IOC_SVM, kfd_ioctl_svm, 0),
1926
1927        AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_XNACK_MODE,
1928                        kfd_ioctl_set_xnack_mode, 0),
1929};
1930
1931#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
1932
1933static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
1934{
1935        struct kfd_process *process;
1936        amdkfd_ioctl_t *func;
1937        const struct amdkfd_ioctl_desc *ioctl = NULL;
1938        unsigned int nr = _IOC_NR(cmd);
1939        char stack_kdata[128];
1940        char *kdata = NULL;
1941        unsigned int usize, asize;
1942        int retcode = -EINVAL;
1943
1944        if (nr >= AMDKFD_CORE_IOCTL_COUNT)
1945                goto err_i1;
1946
1947        if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) {
1948                u32 amdkfd_size;
1949
1950                ioctl = &amdkfd_ioctls[nr];
1951
1952                amdkfd_size = _IOC_SIZE(ioctl->cmd);
1953                usize = asize = _IOC_SIZE(cmd);
1954                if (amdkfd_size > asize)
1955                        asize = amdkfd_size;
1956
1957                cmd = ioctl->cmd;
1958        } else
1959                goto err_i1;
1960
1961        dev_dbg(kfd_device, "ioctl cmd 0x%x (#0x%x), arg 0x%lx\n", cmd, nr, arg);
1962
1963        /* Get the process struct from the filep. Only the process
1964         * that opened /dev/kfd can use the file descriptor. Child
1965         * processes need to create their own KFD device context.
1966         */
1967        process = filep->private_data;
1968        if (process->lead_thread != current->group_leader) {
1969                dev_dbg(kfd_device, "Using KFD FD in wrong process\n");
1970                retcode = -EBADF;
1971                goto err_i1;
1972        }
1973
1974        /* Do not trust userspace, use our own definition */
1975        func = ioctl->func;
1976
1977        if (unlikely(!func)) {
1978                dev_dbg(kfd_device, "no function\n");
1979                retcode = -EINVAL;
1980                goto err_i1;
1981        }
1982
1983        if (cmd & (IOC_IN | IOC_OUT)) {
1984                if (asize <= sizeof(stack_kdata)) {
1985                        kdata = stack_kdata;
1986                } else {
1987                        kdata = kmalloc(asize, GFP_KERNEL);
1988                        if (!kdata) {
1989                                retcode = -ENOMEM;
1990                                goto err_i1;
1991                        }
1992                }
1993                if (asize > usize)
1994                        memset(kdata + usize, 0, asize - usize);
1995        }
1996
1997        if (cmd & IOC_IN) {
1998                if (copy_from_user(kdata, (void __user *)arg, usize) != 0) {
1999                        retcode = -EFAULT;
2000                        goto err_i1;
2001                }
2002        } else if (cmd & IOC_OUT) {
2003                memset(kdata, 0, usize);
2004        }
2005
2006        retcode = func(filep, process, kdata);
2007
2008        if (cmd & IOC_OUT)
2009                if (copy_to_user((void __user *)arg, kdata, usize) != 0)
2010                        retcode = -EFAULT;
2011
2012err_i1:
2013        if (!ioctl)
2014                dev_dbg(kfd_device, "invalid ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n",
2015                          task_pid_nr(current), cmd, nr);
2016
2017        if (kdata != stack_kdata)
2018                kfree(kdata);
2019
2020        if (retcode)
2021                dev_dbg(kfd_device, "ioctl cmd (#0x%x), arg 0x%lx, ret = %d\n",
2022                                nr, arg, retcode);
2023
2024        return retcode;
2025}
2026
2027static int kfd_mmio_mmap(struct kfd_dev *dev, struct kfd_process *process,
2028                      struct vm_area_struct *vma)
2029{
2030        phys_addr_t address;
2031        int ret;
2032
2033        if (vma->vm_end - vma->vm_start != PAGE_SIZE)
2034                return -EINVAL;
2035
2036        address = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
2037
2038        vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
2039                                VM_DONTDUMP | VM_PFNMAP;
2040
2041        vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
2042
2043        pr_debug("pasid 0x%x mapping mmio page\n"
2044                 "     target user address == 0x%08llX\n"
2045                 "     physical address    == 0x%08llX\n"
2046                 "     vm_flags            == 0x%04lX\n"
2047                 "     size                == 0x%04lX\n",
2048                 process->pasid, (unsigned long long) vma->vm_start,
2049                 address, vma->vm_flags, PAGE_SIZE);
2050
2051        ret = io_remap_pfn_range(vma,
2052                                vma->vm_start,
2053                                address >> PAGE_SHIFT,
2054                                PAGE_SIZE,
2055                                vma->vm_page_prot);
2056        return ret;
2057}
2058
2059
2060static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
2061{
2062        struct kfd_process *process;
2063        struct kfd_dev *dev = NULL;
2064        unsigned long mmap_offset;
2065        unsigned int gpu_id;
2066
2067        process = kfd_get_process(current);
2068        if (IS_ERR(process))
2069                return PTR_ERR(process);
2070
2071        mmap_offset = vma->vm_pgoff << PAGE_SHIFT;
2072        gpu_id = KFD_MMAP_GET_GPU_ID(mmap_offset);
2073        if (gpu_id)
2074                dev = kfd_device_by_id(gpu_id);
2075
2076        switch (mmap_offset & KFD_MMAP_TYPE_MASK) {
2077        case KFD_MMAP_TYPE_DOORBELL:
2078                if (!dev)
2079                        return -ENODEV;
2080                return kfd_doorbell_mmap(dev, process, vma);
2081
2082        case KFD_MMAP_TYPE_EVENTS:
2083                return kfd_event_mmap(process, vma);
2084
2085        case KFD_MMAP_TYPE_RESERVED_MEM:
2086                if (!dev)
2087                        return -ENODEV;
2088                return kfd_reserved_mem_mmap(dev, process, vma);
2089        case KFD_MMAP_TYPE_MMIO:
2090                if (!dev)
2091                        return -ENODEV;
2092                return kfd_mmio_mmap(dev, process, vma);
2093        }
2094
2095        return -EFAULT;
2096}
2097