linux/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
<<
>>
Prefs
   1/*
   2 * Copyright 2014 Advanced Micro Devices, Inc.
   3 *
   4 * Permission is hereby granted, free of charge, to any person obtaining a
   5 * copy of this software and associated documentation files (the "Software"),
   6 * to deal in the Software without restriction, including without limitation
   7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8 * and/or sell copies of the Software, and to permit persons to whom the
   9 * Software is furnished to do so, subject to the following conditions:
  10 *
  11 * The above copyright notice and this permission notice shall be included in
  12 * all copies or substantial portions of the Software.
  13 *
  14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20 * OTHER DEALINGS IN THE SOFTWARE.
  21 */
  22
  23#include <linux/device.h>
  24#include <linux/export.h>
  25#include <linux/err.h>
  26#include <linux/fs.h>
  27#include <linux/file.h>
  28#include <linux/sched.h>
  29#include <linux/slab.h>
  30#include <linux/uaccess.h>
  31#include <linux/compat.h>
  32#include <uapi/linux/kfd_ioctl.h>
  33#include <linux/time.h>
  34#include <linux/mm.h>
  35#include <linux/mman.h>
  36#include <linux/dma-buf.h>
  37#include <asm/processor.h>
  38#include "kfd_priv.h"
  39#include "kfd_device_queue_manager.h"
  40#include "kfd_dbgmgr.h"
  41#include "amdgpu_amdkfd.h"
  42
  43static long kfd_ioctl(struct file *, unsigned int, unsigned long);
  44static int kfd_open(struct inode *, struct file *);
  45static int kfd_mmap(struct file *, struct vm_area_struct *);
  46
  47static const char kfd_dev_name[] = "kfd";
  48
  49static const struct file_operations kfd_fops = {
  50        .owner = THIS_MODULE,
  51        .unlocked_ioctl = kfd_ioctl,
  52        .compat_ioctl = kfd_ioctl,
  53        .open = kfd_open,
  54        .mmap = kfd_mmap,
  55};
  56
  57static int kfd_char_dev_major = -1;
  58static struct class *kfd_class;
  59struct device *kfd_device;
  60
  61int kfd_chardev_init(void)
  62{
  63        int err = 0;
  64
  65        kfd_char_dev_major = register_chrdev(0, kfd_dev_name, &kfd_fops);
  66        err = kfd_char_dev_major;
  67        if (err < 0)
  68                goto err_register_chrdev;
  69
  70        kfd_class = class_create(THIS_MODULE, kfd_dev_name);
  71        err = PTR_ERR(kfd_class);
  72        if (IS_ERR(kfd_class))
  73                goto err_class_create;
  74
  75        kfd_device = device_create(kfd_class, NULL,
  76                                        MKDEV(kfd_char_dev_major, 0),
  77                                        NULL, kfd_dev_name);
  78        err = PTR_ERR(kfd_device);
  79        if (IS_ERR(kfd_device))
  80                goto err_device_create;
  81
  82        return 0;
  83
  84err_device_create:
  85        class_destroy(kfd_class);
  86err_class_create:
  87        unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
  88err_register_chrdev:
  89        return err;
  90}
  91
  92void kfd_chardev_exit(void)
  93{
  94        device_destroy(kfd_class, MKDEV(kfd_char_dev_major, 0));
  95        class_destroy(kfd_class);
  96        unregister_chrdev(kfd_char_dev_major, kfd_dev_name);
  97}
  98
  99struct device *kfd_chardev(void)
 100{
 101        return kfd_device;
 102}
 103
 104
 105static int kfd_open(struct inode *inode, struct file *filep)
 106{
 107        struct kfd_process *process;
 108        bool is_32bit_user_mode;
 109
 110        if (iminor(inode) != 0)
 111                return -ENODEV;
 112
 113        is_32bit_user_mode = in_compat_syscall();
 114
 115        if (is_32bit_user_mode) {
 116                dev_warn(kfd_device,
 117                        "Process %d (32-bit) failed to open /dev/kfd\n"
 118                        "32-bit processes are not supported by amdkfd\n",
 119                        current->pid);
 120                return -EPERM;
 121        }
 122
 123        process = kfd_create_process(filep);
 124        if (IS_ERR(process))
 125                return PTR_ERR(process);
 126
 127        if (kfd_is_locked())
 128                return -EAGAIN;
 129
 130        dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n",
 131                process->pasid, process->is_32bit_user_mode);
 132
 133        return 0;
 134}
 135
 136static int kfd_ioctl_get_version(struct file *filep, struct kfd_process *p,
 137                                        void *data)
 138{
 139        struct kfd_ioctl_get_version_args *args = data;
 140
 141        args->major_version = KFD_IOCTL_MAJOR_VERSION;
 142        args->minor_version = KFD_IOCTL_MINOR_VERSION;
 143
 144        return 0;
 145}
 146
 147static int set_queue_properties_from_user(struct queue_properties *q_properties,
 148                                struct kfd_ioctl_create_queue_args *args)
 149{
 150        if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
 151                pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
 152                return -EINVAL;
 153        }
 154
 155        if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
 156                pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
 157                return -EINVAL;
 158        }
 159
 160        if ((args->ring_base_address) &&
 161                (!access_ok((const void __user *) args->ring_base_address,
 162                        sizeof(uint64_t)))) {
 163                pr_err("Can't access ring base address\n");
 164                return -EFAULT;
 165        }
 166
 167        if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
 168                pr_err("Ring size must be a power of 2 or 0\n");
 169                return -EINVAL;
 170        }
 171
 172        if (!access_ok((const void __user *) args->read_pointer_address,
 173                        sizeof(uint32_t))) {
 174                pr_err("Can't access read pointer\n");
 175                return -EFAULT;
 176        }
 177
 178        if (!access_ok((const void __user *) args->write_pointer_address,
 179                        sizeof(uint32_t))) {
 180                pr_err("Can't access write pointer\n");
 181                return -EFAULT;
 182        }
 183
 184        if (args->eop_buffer_address &&
 185                !access_ok((const void __user *) args->eop_buffer_address,
 186                        sizeof(uint32_t))) {
 187                pr_debug("Can't access eop buffer");
 188                return -EFAULT;
 189        }
 190
 191        if (args->ctx_save_restore_address &&
 192                !access_ok((const void __user *) args->ctx_save_restore_address,
 193                        sizeof(uint32_t))) {
 194                pr_debug("Can't access ctx save restore buffer");
 195                return -EFAULT;
 196        }
 197
 198        q_properties->is_interop = false;
 199        q_properties->queue_percent = args->queue_percentage;
 200        q_properties->priority = args->queue_priority;
 201        q_properties->queue_address = args->ring_base_address;
 202        q_properties->queue_size = args->ring_size;
 203        q_properties->read_ptr = (uint32_t *) args->read_pointer_address;
 204        q_properties->write_ptr = (uint32_t *) args->write_pointer_address;
 205        q_properties->eop_ring_buffer_address = args->eop_buffer_address;
 206        q_properties->eop_ring_buffer_size = args->eop_buffer_size;
 207        q_properties->ctx_save_restore_area_address =
 208                        args->ctx_save_restore_address;
 209        q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size;
 210        q_properties->ctl_stack_size = args->ctl_stack_size;
 211        if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE ||
 212                args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
 213                q_properties->type = KFD_QUEUE_TYPE_COMPUTE;
 214        else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA)
 215                q_properties->type = KFD_QUEUE_TYPE_SDMA;
 216        else
 217                return -ENOTSUPP;
 218
 219        if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL)
 220                q_properties->format = KFD_QUEUE_FORMAT_AQL;
 221        else
 222                q_properties->format = KFD_QUEUE_FORMAT_PM4;
 223
 224        pr_debug("Queue Percentage: %d, %d\n",
 225                        q_properties->queue_percent, args->queue_percentage);
 226
 227        pr_debug("Queue Priority: %d, %d\n",
 228                        q_properties->priority, args->queue_priority);
 229
 230        pr_debug("Queue Address: 0x%llX, 0x%llX\n",
 231                        q_properties->queue_address, args->ring_base_address);
 232
 233        pr_debug("Queue Size: 0x%llX, %u\n",
 234                        q_properties->queue_size, args->ring_size);
 235
 236        pr_debug("Queue r/w Pointers: %px, %px\n",
 237                        q_properties->read_ptr,
 238                        q_properties->write_ptr);
 239
 240        pr_debug("Queue Format: %d\n", q_properties->format);
 241
 242        pr_debug("Queue EOP: 0x%llX\n", q_properties->eop_ring_buffer_address);
 243
 244        pr_debug("Queue CTX save area: 0x%llX\n",
 245                        q_properties->ctx_save_restore_area_address);
 246
 247        return 0;
 248}
 249
 250static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,
 251                                        void *data)
 252{
 253        struct kfd_ioctl_create_queue_args *args = data;
 254        struct kfd_dev *dev;
 255        int err = 0;
 256        unsigned int queue_id;
 257        struct kfd_process_device *pdd;
 258        struct queue_properties q_properties;
 259
 260        memset(&q_properties, 0, sizeof(struct queue_properties));
 261
 262        pr_debug("Creating queue ioctl\n");
 263
 264        err = set_queue_properties_from_user(&q_properties, args);
 265        if (err)
 266                return err;
 267
 268        pr_debug("Looking for gpu id 0x%x\n", args->gpu_id);
 269        dev = kfd_device_by_id(args->gpu_id);
 270        if (!dev) {
 271                pr_debug("Could not find gpu id 0x%x\n", args->gpu_id);
 272                return -EINVAL;
 273        }
 274
 275        mutex_lock(&p->mutex);
 276
 277        pdd = kfd_bind_process_to_device(dev, p);
 278        if (IS_ERR(pdd)) {
 279                err = -ESRCH;
 280                goto err_bind_process;
 281        }
 282
 283        pr_debug("Creating queue for PASID %d on gpu 0x%x\n",
 284                        p->pasid,
 285                        dev->id);
 286
 287        err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id);
 288        if (err != 0)
 289                goto err_create_queue;
 290
 291        args->queue_id = queue_id;
 292
 293
 294        /* Return gpu_id as doorbell offset for mmap usage */
 295        args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL;
 296        args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
 297        args->doorbell_offset <<= PAGE_SHIFT;
 298        if (KFD_IS_SOC15(dev->device_info->asic_family))
 299                /* On SOC15 ASICs, doorbell allocation must be
 300                 * per-device, and independent from the per-process
 301                 * queue_id. Return the doorbell offset within the
 302                 * doorbell aperture to user mode.
 303                 */
 304                args->doorbell_offset |= q_properties.doorbell_off;
 305
 306        mutex_unlock(&p->mutex);
 307
 308        pr_debug("Queue id %d was created successfully\n", args->queue_id);
 309
 310        pr_debug("Ring buffer address == 0x%016llX\n",
 311                        args->ring_base_address);
 312
 313        pr_debug("Read ptr address    == 0x%016llX\n",
 314                        args->read_pointer_address);
 315
 316        pr_debug("Write ptr address   == 0x%016llX\n",
 317                        args->write_pointer_address);
 318
 319        return 0;
 320
 321err_create_queue:
 322err_bind_process:
 323        mutex_unlock(&p->mutex);
 324        return err;
 325}
 326
 327static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p,
 328                                        void *data)
 329{
 330        int retval;
 331        struct kfd_ioctl_destroy_queue_args *args = data;
 332
 333        pr_debug("Destroying queue id %d for pasid %d\n",
 334                                args->queue_id,
 335                                p->pasid);
 336
 337        mutex_lock(&p->mutex);
 338
 339        retval = pqm_destroy_queue(&p->pqm, args->queue_id);
 340
 341        mutex_unlock(&p->mutex);
 342        return retval;
 343}
 344
 345static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
 346                                        void *data)
 347{
 348        int retval;
 349        struct kfd_ioctl_update_queue_args *args = data;
 350        struct queue_properties properties;
 351
 352        if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) {
 353                pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n");
 354                return -EINVAL;
 355        }
 356
 357        if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) {
 358                pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n");
 359                return -EINVAL;
 360        }
 361
 362        if ((args->ring_base_address) &&
 363                (!access_ok((const void __user *) args->ring_base_address,
 364                        sizeof(uint64_t)))) {
 365                pr_err("Can't access ring base address\n");
 366                return -EFAULT;
 367        }
 368
 369        if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) {
 370                pr_err("Ring size must be a power of 2 or 0\n");
 371                return -EINVAL;
 372        }
 373
 374        properties.queue_address = args->ring_base_address;
 375        properties.queue_size = args->ring_size;
 376        properties.queue_percent = args->queue_percentage;
 377        properties.priority = args->queue_priority;
 378
 379        pr_debug("Updating queue id %d for pasid %d\n",
 380                        args->queue_id, p->pasid);
 381
 382        mutex_lock(&p->mutex);
 383
 384        retval = pqm_update_queue(&p->pqm, args->queue_id, &properties);
 385
 386        mutex_unlock(&p->mutex);
 387
 388        return retval;
 389}
 390
 391static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p,
 392                                        void *data)
 393{
 394        int retval;
 395        const int max_num_cus = 1024;
 396        struct kfd_ioctl_set_cu_mask_args *args = data;
 397        struct queue_properties properties;
 398        uint32_t __user *cu_mask_ptr = (uint32_t __user *)args->cu_mask_ptr;
 399        size_t cu_mask_size = sizeof(uint32_t) * (args->num_cu_mask / 32);
 400
 401        if ((args->num_cu_mask % 32) != 0) {
 402                pr_debug("num_cu_mask 0x%x must be a multiple of 32",
 403                                args->num_cu_mask);
 404                return -EINVAL;
 405        }
 406
 407        properties.cu_mask_count = args->num_cu_mask;
 408        if (properties.cu_mask_count == 0) {
 409                pr_debug("CU mask cannot be 0");
 410                return -EINVAL;
 411        }
 412
 413        /* To prevent an unreasonably large CU mask size, set an arbitrary
 414         * limit of max_num_cus bits.  We can then just drop any CU mask bits
 415         * past max_num_cus bits and just use the first max_num_cus bits.
 416         */
 417        if (properties.cu_mask_count > max_num_cus) {
 418                pr_debug("CU mask cannot be greater than 1024 bits");
 419                properties.cu_mask_count = max_num_cus;
 420                cu_mask_size = sizeof(uint32_t) * (max_num_cus/32);
 421        }
 422
 423        properties.cu_mask = kzalloc(cu_mask_size, GFP_KERNEL);
 424        if (!properties.cu_mask)
 425                return -ENOMEM;
 426
 427        retval = copy_from_user(properties.cu_mask, cu_mask_ptr, cu_mask_size);
 428        if (retval) {
 429                pr_debug("Could not copy CU mask from userspace");
 430                kfree(properties.cu_mask);
 431                return -EFAULT;
 432        }
 433
 434        mutex_lock(&p->mutex);
 435
 436        retval = pqm_set_cu_mask(&p->pqm, args->queue_id, &properties);
 437
 438        mutex_unlock(&p->mutex);
 439
 440        if (retval)
 441                kfree(properties.cu_mask);
 442
 443        return retval;
 444}
 445
 446static int kfd_ioctl_get_queue_wave_state(struct file *filep,
 447                                          struct kfd_process *p, void *data)
 448{
 449        struct kfd_ioctl_get_queue_wave_state_args *args = data;
 450        int r;
 451
 452        mutex_lock(&p->mutex);
 453
 454        r = pqm_get_wave_state(&p->pqm, args->queue_id,
 455                               (void __user *)args->ctl_stack_address,
 456                               &args->ctl_stack_used_size,
 457                               &args->save_area_used_size);
 458
 459        mutex_unlock(&p->mutex);
 460
 461        return r;
 462}
 463
 464static int kfd_ioctl_set_memory_policy(struct file *filep,
 465                                        struct kfd_process *p, void *data)
 466{
 467        struct kfd_ioctl_set_memory_policy_args *args = data;
 468        struct kfd_dev *dev;
 469        int err = 0;
 470        struct kfd_process_device *pdd;
 471        enum cache_policy default_policy, alternate_policy;
 472
 473        if (args->default_policy != KFD_IOC_CACHE_POLICY_COHERENT
 474            && args->default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
 475                return -EINVAL;
 476        }
 477
 478        if (args->alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT
 479            && args->alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) {
 480                return -EINVAL;
 481        }
 482
 483        dev = kfd_device_by_id(args->gpu_id);
 484        if (!dev)
 485                return -EINVAL;
 486
 487        mutex_lock(&p->mutex);
 488
 489        pdd = kfd_bind_process_to_device(dev, p);
 490        if (IS_ERR(pdd)) {
 491                err = -ESRCH;
 492                goto out;
 493        }
 494
 495        default_policy = (args->default_policy == KFD_IOC_CACHE_POLICY_COHERENT)
 496                         ? cache_policy_coherent : cache_policy_noncoherent;
 497
 498        alternate_policy =
 499                (args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT)
 500                   ? cache_policy_coherent : cache_policy_noncoherent;
 501
 502        if (!dev->dqm->ops.set_cache_memory_policy(dev->dqm,
 503                                &pdd->qpd,
 504                                default_policy,
 505                                alternate_policy,
 506                                (void __user *)args->alternate_aperture_base,
 507                                args->alternate_aperture_size))
 508                err = -EINVAL;
 509
 510out:
 511        mutex_unlock(&p->mutex);
 512
 513        return err;
 514}
 515
 516static int kfd_ioctl_set_trap_handler(struct file *filep,
 517                                        struct kfd_process *p, void *data)
 518{
 519        struct kfd_ioctl_set_trap_handler_args *args = data;
 520        struct kfd_dev *dev;
 521        int err = 0;
 522        struct kfd_process_device *pdd;
 523
 524        dev = kfd_device_by_id(args->gpu_id);
 525        if (dev == NULL)
 526                return -EINVAL;
 527
 528        mutex_lock(&p->mutex);
 529
 530        pdd = kfd_bind_process_to_device(dev, p);
 531        if (IS_ERR(pdd)) {
 532                err = -ESRCH;
 533                goto out;
 534        }
 535
 536        if (dev->dqm->ops.set_trap_handler(dev->dqm,
 537                                        &pdd->qpd,
 538                                        args->tba_addr,
 539                                        args->tma_addr))
 540                err = -EINVAL;
 541
 542out:
 543        mutex_unlock(&p->mutex);
 544
 545        return err;
 546}
 547
 548static int kfd_ioctl_dbg_register(struct file *filep,
 549                                struct kfd_process *p, void *data)
 550{
 551        struct kfd_ioctl_dbg_register_args *args = data;
 552        struct kfd_dev *dev;
 553        struct kfd_dbgmgr *dbgmgr_ptr;
 554        struct kfd_process_device *pdd;
 555        bool create_ok;
 556        long status = 0;
 557
 558        dev = kfd_device_by_id(args->gpu_id);
 559        if (!dev)
 560                return -EINVAL;
 561
 562        if (dev->device_info->asic_family == CHIP_CARRIZO) {
 563                pr_debug("kfd_ioctl_dbg_register not supported on CZ\n");
 564                return -EINVAL;
 565        }
 566
 567        mutex_lock(&p->mutex);
 568        mutex_lock(kfd_get_dbgmgr_mutex());
 569
 570        /*
 571         * make sure that we have pdd, if this the first queue created for
 572         * this process
 573         */
 574        pdd = kfd_bind_process_to_device(dev, p);
 575        if (IS_ERR(pdd)) {
 576                status = PTR_ERR(pdd);
 577                goto out;
 578        }
 579
 580        if (!dev->dbgmgr) {
 581                /* In case of a legal call, we have no dbgmgr yet */
 582                create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev);
 583                if (create_ok) {
 584                        status = kfd_dbgmgr_register(dbgmgr_ptr, p);
 585                        if (status != 0)
 586                                kfd_dbgmgr_destroy(dbgmgr_ptr);
 587                        else
 588                                dev->dbgmgr = dbgmgr_ptr;
 589                }
 590        } else {
 591                pr_debug("debugger already registered\n");
 592                status = -EINVAL;
 593        }
 594
 595out:
 596        mutex_unlock(kfd_get_dbgmgr_mutex());
 597        mutex_unlock(&p->mutex);
 598
 599        return status;
 600}
 601
 602static int kfd_ioctl_dbg_unregister(struct file *filep,
 603                                struct kfd_process *p, void *data)
 604{
 605        struct kfd_ioctl_dbg_unregister_args *args = data;
 606        struct kfd_dev *dev;
 607        long status;
 608
 609        dev = kfd_device_by_id(args->gpu_id);
 610        if (!dev || !dev->dbgmgr)
 611                return -EINVAL;
 612
 613        if (dev->device_info->asic_family == CHIP_CARRIZO) {
 614                pr_debug("kfd_ioctl_dbg_unregister not supported on CZ\n");
 615                return -EINVAL;
 616        }
 617
 618        mutex_lock(kfd_get_dbgmgr_mutex());
 619
 620        status = kfd_dbgmgr_unregister(dev->dbgmgr, p);
 621        if (!status) {
 622                kfd_dbgmgr_destroy(dev->dbgmgr);
 623                dev->dbgmgr = NULL;
 624        }
 625
 626        mutex_unlock(kfd_get_dbgmgr_mutex());
 627
 628        return status;
 629}
 630
 631/*
 632 * Parse and generate variable size data structure for address watch.
 633 * Total size of the buffer and # watch points is limited in order
 634 * to prevent kernel abuse. (no bearing to the much smaller HW limitation
 635 * which is enforced by dbgdev module)
 636 * please also note that the watch address itself are not "copied from user",
 637 * since it be set into the HW in user mode values.
 638 *
 639 */
 640static int kfd_ioctl_dbg_address_watch(struct file *filep,
 641                                        struct kfd_process *p, void *data)
 642{
 643        struct kfd_ioctl_dbg_address_watch_args *args = data;
 644        struct kfd_dev *dev;
 645        struct dbg_address_watch_info aw_info;
 646        unsigned char *args_buff;
 647        long status;
 648        void __user *cmd_from_user;
 649        uint64_t watch_mask_value = 0;
 650        unsigned int args_idx = 0;
 651
 652        memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info));
 653
 654        dev = kfd_device_by_id(args->gpu_id);
 655        if (!dev)
 656                return -EINVAL;
 657
 658        if (dev->device_info->asic_family == CHIP_CARRIZO) {
 659                pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
 660                return -EINVAL;
 661        }
 662
 663        cmd_from_user = (void __user *) args->content_ptr;
 664
 665        /* Validate arguments */
 666
 667        if ((args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE) ||
 668                (args->buf_size_in_bytes <= sizeof(*args) + sizeof(int) * 2) ||
 669                (cmd_from_user == NULL))
 670                return -EINVAL;
 671
 672        /* this is the actual buffer to work with */
 673        args_buff = memdup_user(cmd_from_user,
 674                                args->buf_size_in_bytes - sizeof(*args));
 675        if (IS_ERR(args_buff))
 676                return PTR_ERR(args_buff);
 677
 678        aw_info.process = p;
 679
 680        aw_info.num_watch_points = *((uint32_t *)(&args_buff[args_idx]));
 681        args_idx += sizeof(aw_info.num_watch_points);
 682
 683        aw_info.watch_mode = (enum HSA_DBG_WATCH_MODE *) &args_buff[args_idx];
 684        args_idx += sizeof(enum HSA_DBG_WATCH_MODE) * aw_info.num_watch_points;
 685
 686        /*
 687         * set watch address base pointer to point on the array base
 688         * within args_buff
 689         */
 690        aw_info.watch_address = (uint64_t *) &args_buff[args_idx];
 691
 692        /* skip over the addresses buffer */
 693        args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points;
 694
 695        if (args_idx >= args->buf_size_in_bytes - sizeof(*args)) {
 696                status = -EINVAL;
 697                goto out;
 698        }
 699
 700        watch_mask_value = (uint64_t) args_buff[args_idx];
 701
 702        if (watch_mask_value > 0) {
 703                /*
 704                 * There is an array of masks.
 705                 * set watch mask base pointer to point on the array base
 706                 * within args_buff
 707                 */
 708                aw_info.watch_mask = (uint64_t *) &args_buff[args_idx];
 709
 710                /* skip over the masks buffer */
 711                args_idx += sizeof(aw_info.watch_mask) *
 712                                aw_info.num_watch_points;
 713        } else {
 714                /* just the NULL mask, set to NULL and skip over it */
 715                aw_info.watch_mask = NULL;
 716                args_idx += sizeof(aw_info.watch_mask);
 717        }
 718
 719        if (args_idx >= args->buf_size_in_bytes - sizeof(args)) {
 720                status = -EINVAL;
 721                goto out;
 722        }
 723
 724        /* Currently HSA Event is not supported for DBG */
 725        aw_info.watch_event = NULL;
 726
 727        mutex_lock(kfd_get_dbgmgr_mutex());
 728
 729        status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info);
 730
 731        mutex_unlock(kfd_get_dbgmgr_mutex());
 732
 733out:
 734        kfree(args_buff);
 735
 736        return status;
 737}
 738
 739/* Parse and generate fixed size data structure for wave control */
 740static int kfd_ioctl_dbg_wave_control(struct file *filep,
 741                                        struct kfd_process *p, void *data)
 742{
 743        struct kfd_ioctl_dbg_wave_control_args *args = data;
 744        struct kfd_dev *dev;
 745        struct dbg_wave_control_info wac_info;
 746        unsigned char *args_buff;
 747        uint32_t computed_buff_size;
 748        long status;
 749        void __user *cmd_from_user;
 750        unsigned int args_idx = 0;
 751
 752        memset((void *) &wac_info, 0, sizeof(struct dbg_wave_control_info));
 753
 754        /* we use compact form, independent of the packing attribute value */
 755        computed_buff_size = sizeof(*args) +
 756                                sizeof(wac_info.mode) +
 757                                sizeof(wac_info.operand) +
 758                                sizeof(wac_info.dbgWave_msg.DbgWaveMsg) +
 759                                sizeof(wac_info.dbgWave_msg.MemoryVA) +
 760                                sizeof(wac_info.trapId);
 761
 762        dev = kfd_device_by_id(args->gpu_id);
 763        if (!dev)
 764                return -EINVAL;
 765
 766        if (dev->device_info->asic_family == CHIP_CARRIZO) {
 767                pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n");
 768                return -EINVAL;
 769        }
 770
 771        /* input size must match the computed "compact" size */
 772        if (args->buf_size_in_bytes != computed_buff_size) {
 773                pr_debug("size mismatch, computed : actual %u : %u\n",
 774                                args->buf_size_in_bytes, computed_buff_size);
 775                return -EINVAL;
 776        }
 777
 778        cmd_from_user = (void __user *) args->content_ptr;
 779
 780        if (cmd_from_user == NULL)
 781                return -EINVAL;
 782
 783        /* copy the entire buffer from user */
 784
 785        args_buff = memdup_user(cmd_from_user,
 786                                args->buf_size_in_bytes - sizeof(*args));
 787        if (IS_ERR(args_buff))
 788                return PTR_ERR(args_buff);
 789
 790        /* move ptr to the start of the "pay-load" area */
 791        wac_info.process = p;
 792
 793        wac_info.operand = *((enum HSA_DBG_WAVEOP *)(&args_buff[args_idx]));
 794        args_idx += sizeof(wac_info.operand);
 795
 796        wac_info.mode = *((enum HSA_DBG_WAVEMODE *)(&args_buff[args_idx]));
 797        args_idx += sizeof(wac_info.mode);
 798
 799        wac_info.trapId = *((uint32_t *)(&args_buff[args_idx]));
 800        args_idx += sizeof(wac_info.trapId);
 801
 802        wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value =
 803                                        *((uint32_t *)(&args_buff[args_idx]));
 804        wac_info.dbgWave_msg.MemoryVA = NULL;
 805
 806        mutex_lock(kfd_get_dbgmgr_mutex());
 807
 808        pr_debug("Calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n",
 809                        wac_info.process, wac_info.operand,
 810                        wac_info.mode, wac_info.trapId,
 811                        wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value);
 812
 813        status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info);
 814
 815        pr_debug("Returned status of dbg manager is %ld\n", status);
 816
 817        mutex_unlock(kfd_get_dbgmgr_mutex());
 818
 819        kfree(args_buff);
 820
 821        return status;
 822}
 823
 824static int kfd_ioctl_get_clock_counters(struct file *filep,
 825                                struct kfd_process *p, void *data)
 826{
 827        struct kfd_ioctl_get_clock_counters_args *args = data;
 828        struct kfd_dev *dev;
 829
 830        dev = kfd_device_by_id(args->gpu_id);
 831        if (dev)
 832                /* Reading GPU clock counter from KGD */
 833                args->gpu_clock_counter = amdgpu_amdkfd_get_gpu_clock_counter(dev->kgd);
 834        else
 835                /* Node without GPU resource */
 836                args->gpu_clock_counter = 0;
 837
 838        /* No access to rdtsc. Using raw monotonic time */
 839        args->cpu_clock_counter = ktime_get_raw_ns();
 840        args->system_clock_counter = ktime_get_boot_ns();
 841
 842        /* Since the counter is in nano-seconds we use 1GHz frequency */
 843        args->system_clock_freq = 1000000000;
 844
 845        return 0;
 846}
 847
 848
 849static int kfd_ioctl_get_process_apertures(struct file *filp,
 850                                struct kfd_process *p, void *data)
 851{
 852        struct kfd_ioctl_get_process_apertures_args *args = data;
 853        struct kfd_process_device_apertures *pAperture;
 854        struct kfd_process_device *pdd;
 855
 856        dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid);
 857
 858        args->num_of_nodes = 0;
 859
 860        mutex_lock(&p->mutex);
 861
 862        /*if the process-device list isn't empty*/
 863        if (kfd_has_process_device_data(p)) {
 864                /* Run over all pdd of the process */
 865                pdd = kfd_get_first_process_device_data(p);
 866                do {
 867                        pAperture =
 868                                &args->process_apertures[args->num_of_nodes];
 869                        pAperture->gpu_id = pdd->dev->id;
 870                        pAperture->lds_base = pdd->lds_base;
 871                        pAperture->lds_limit = pdd->lds_limit;
 872                        pAperture->gpuvm_base = pdd->gpuvm_base;
 873                        pAperture->gpuvm_limit = pdd->gpuvm_limit;
 874                        pAperture->scratch_base = pdd->scratch_base;
 875                        pAperture->scratch_limit = pdd->scratch_limit;
 876
 877                        dev_dbg(kfd_device,
 878                                "node id %u\n", args->num_of_nodes);
 879                        dev_dbg(kfd_device,
 880                                "gpu id %u\n", pdd->dev->id);
 881                        dev_dbg(kfd_device,
 882                                "lds_base %llX\n", pdd->lds_base);
 883                        dev_dbg(kfd_device,
 884                                "lds_limit %llX\n", pdd->lds_limit);
 885                        dev_dbg(kfd_device,
 886                                "gpuvm_base %llX\n", pdd->gpuvm_base);
 887                        dev_dbg(kfd_device,
 888                                "gpuvm_limit %llX\n", pdd->gpuvm_limit);
 889                        dev_dbg(kfd_device,
 890                                "scratch_base %llX\n", pdd->scratch_base);
 891                        dev_dbg(kfd_device,
 892                                "scratch_limit %llX\n", pdd->scratch_limit);
 893
 894                        args->num_of_nodes++;
 895
 896                        pdd = kfd_get_next_process_device_data(p, pdd);
 897                } while (pdd && (args->num_of_nodes < NUM_OF_SUPPORTED_GPUS));
 898        }
 899
 900        mutex_unlock(&p->mutex);
 901
 902        return 0;
 903}
 904
 905static int kfd_ioctl_get_process_apertures_new(struct file *filp,
 906                                struct kfd_process *p, void *data)
 907{
 908        struct kfd_ioctl_get_process_apertures_new_args *args = data;
 909        struct kfd_process_device_apertures *pa;
 910        struct kfd_process_device *pdd;
 911        uint32_t nodes = 0;
 912        int ret;
 913
 914        dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid);
 915
 916        if (args->num_of_nodes == 0) {
 917                /* Return number of nodes, so that user space can alloacate
 918                 * sufficient memory
 919                 */
 920                mutex_lock(&p->mutex);
 921
 922                if (!kfd_has_process_device_data(p))
 923                        goto out_unlock;
 924
 925                /* Run over all pdd of the process */
 926                pdd = kfd_get_first_process_device_data(p);
 927                do {
 928                        args->num_of_nodes++;
 929                        pdd = kfd_get_next_process_device_data(p, pdd);
 930                } while (pdd);
 931
 932                goto out_unlock;
 933        }
 934
 935        /* Fill in process-aperture information for all available
 936         * nodes, but not more than args->num_of_nodes as that is
 937         * the amount of memory allocated by user
 938         */
 939        pa = kzalloc((sizeof(struct kfd_process_device_apertures) *
 940                                args->num_of_nodes), GFP_KERNEL);
 941        if (!pa)
 942                return -ENOMEM;
 943
 944        mutex_lock(&p->mutex);
 945
 946        if (!kfd_has_process_device_data(p)) {
 947                args->num_of_nodes = 0;
 948                kfree(pa);
 949                goto out_unlock;
 950        }
 951
 952        /* Run over all pdd of the process */
 953        pdd = kfd_get_first_process_device_data(p);
 954        do {
 955                pa[nodes].gpu_id = pdd->dev->id;
 956                pa[nodes].lds_base = pdd->lds_base;
 957                pa[nodes].lds_limit = pdd->lds_limit;
 958                pa[nodes].gpuvm_base = pdd->gpuvm_base;
 959                pa[nodes].gpuvm_limit = pdd->gpuvm_limit;
 960                pa[nodes].scratch_base = pdd->scratch_base;
 961                pa[nodes].scratch_limit = pdd->scratch_limit;
 962
 963                dev_dbg(kfd_device,
 964                        "gpu id %u\n", pdd->dev->id);
 965                dev_dbg(kfd_device,
 966                        "lds_base %llX\n", pdd->lds_base);
 967                dev_dbg(kfd_device,
 968                        "lds_limit %llX\n", pdd->lds_limit);
 969                dev_dbg(kfd_device,
 970                        "gpuvm_base %llX\n", pdd->gpuvm_base);
 971                dev_dbg(kfd_device,
 972                        "gpuvm_limit %llX\n", pdd->gpuvm_limit);
 973                dev_dbg(kfd_device,
 974                        "scratch_base %llX\n", pdd->scratch_base);
 975                dev_dbg(kfd_device,
 976                        "scratch_limit %llX\n", pdd->scratch_limit);
 977                nodes++;
 978
 979                pdd = kfd_get_next_process_device_data(p, pdd);
 980        } while (pdd && (nodes < args->num_of_nodes));
 981        mutex_unlock(&p->mutex);
 982
 983        args->num_of_nodes = nodes;
 984        ret = copy_to_user(
 985                        (void __user *)args->kfd_process_device_apertures_ptr,
 986                        pa,
 987                        (nodes * sizeof(struct kfd_process_device_apertures)));
 988        kfree(pa);
 989        return ret ? -EFAULT : 0;
 990
 991out_unlock:
 992        mutex_unlock(&p->mutex);
 993        return 0;
 994}
 995
 996static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
 997                                        void *data)
 998{
 999        struct kfd_ioctl_create_event_args *args = data;
1000        int err;
1001
1002        /* For dGPUs the event page is allocated in user mode. The
1003         * handle is passed to KFD with the first call to this IOCTL
1004         * through the event_page_offset field.
1005         */
1006        if (args->event_page_offset) {
1007                struct kfd_dev *kfd;
1008                struct kfd_process_device *pdd;
1009                void *mem, *kern_addr;
1010                uint64_t size;
1011
1012                if (p->signal_page) {
1013                        pr_err("Event page is already set\n");
1014                        return -EINVAL;
1015                }
1016
1017                kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset));
1018                if (!kfd) {
1019                        pr_err("Getting device by id failed in %s\n", __func__);
1020                        return -EINVAL;
1021                }
1022
1023                mutex_lock(&p->mutex);
1024                pdd = kfd_bind_process_to_device(kfd, p);
1025                if (IS_ERR(pdd)) {
1026                        err = PTR_ERR(pdd);
1027                        goto out_unlock;
1028                }
1029
1030                mem = kfd_process_device_translate_handle(pdd,
1031                                GET_IDR_HANDLE(args->event_page_offset));
1032                if (!mem) {
1033                        pr_err("Can't find BO, offset is 0x%llx\n",
1034                               args->event_page_offset);
1035                        err = -EINVAL;
1036                        goto out_unlock;
1037                }
1038                mutex_unlock(&p->mutex);
1039
1040                err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->kgd,
1041                                                mem, &kern_addr, &size);
1042                if (err) {
1043                        pr_err("Failed to map event page to kernel\n");
1044                        return err;
1045                }
1046
1047                err = kfd_event_page_set(p, kern_addr, size);
1048                if (err) {
1049                        pr_err("Failed to set event page\n");
1050                        return err;
1051                }
1052        }
1053
1054        err = kfd_event_create(filp, p, args->event_type,
1055                                args->auto_reset != 0, args->node_id,
1056                                &args->event_id, &args->event_trigger_data,
1057                                &args->event_page_offset,
1058                                &args->event_slot_index);
1059
1060        return err;
1061
1062out_unlock:
1063        mutex_unlock(&p->mutex);
1064        return err;
1065}
1066
1067static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p,
1068                                        void *data)
1069{
1070        struct kfd_ioctl_destroy_event_args *args = data;
1071
1072        return kfd_event_destroy(p, args->event_id);
1073}
1074
1075static int kfd_ioctl_set_event(struct file *filp, struct kfd_process *p,
1076                                void *data)
1077{
1078        struct kfd_ioctl_set_event_args *args = data;
1079
1080        return kfd_set_event(p, args->event_id);
1081}
1082
1083static int kfd_ioctl_reset_event(struct file *filp, struct kfd_process *p,
1084                                void *data)
1085{
1086        struct kfd_ioctl_reset_event_args *args = data;
1087
1088        return kfd_reset_event(p, args->event_id);
1089}
1090
1091static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p,
1092                                void *data)
1093{
1094        struct kfd_ioctl_wait_events_args *args = data;
1095        int err;
1096
1097        err = kfd_wait_on_events(p, args->num_events,
1098                        (void __user *)args->events_ptr,
1099                        (args->wait_for_all != 0),
1100                        args->timeout, &args->wait_result);
1101
1102        return err;
1103}
1104static int kfd_ioctl_set_scratch_backing_va(struct file *filep,
1105                                        struct kfd_process *p, void *data)
1106{
1107        struct kfd_ioctl_set_scratch_backing_va_args *args = data;
1108        struct kfd_process_device *pdd;
1109        struct kfd_dev *dev;
1110        long err;
1111
1112        dev = kfd_device_by_id(args->gpu_id);
1113        if (!dev)
1114                return -EINVAL;
1115
1116        mutex_lock(&p->mutex);
1117
1118        pdd = kfd_bind_process_to_device(dev, p);
1119        if (IS_ERR(pdd)) {
1120                err = PTR_ERR(pdd);
1121                goto bind_process_to_device_fail;
1122        }
1123
1124        pdd->qpd.sh_hidden_private_base = args->va_addr;
1125
1126        mutex_unlock(&p->mutex);
1127
1128        if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS &&
1129            pdd->qpd.vmid != 0)
1130                dev->kfd2kgd->set_scratch_backing_va(
1131                        dev->kgd, args->va_addr, pdd->qpd.vmid);
1132
1133        return 0;
1134
1135bind_process_to_device_fail:
1136        mutex_unlock(&p->mutex);
1137        return err;
1138}
1139
1140static int kfd_ioctl_get_tile_config(struct file *filep,
1141                struct kfd_process *p, void *data)
1142{
1143        struct kfd_ioctl_get_tile_config_args *args = data;
1144        struct kfd_dev *dev;
1145        struct tile_config config;
1146        int err = 0;
1147
1148        dev = kfd_device_by_id(args->gpu_id);
1149        if (!dev)
1150                return -EINVAL;
1151
1152        dev->kfd2kgd->get_tile_config(dev->kgd, &config);
1153
1154        args->gb_addr_config = config.gb_addr_config;
1155        args->num_banks = config.num_banks;
1156        args->num_ranks = config.num_ranks;
1157
1158        if (args->num_tile_configs > config.num_tile_configs)
1159                args->num_tile_configs = config.num_tile_configs;
1160        err = copy_to_user((void __user *)args->tile_config_ptr,
1161                        config.tile_config_ptr,
1162                        args->num_tile_configs * sizeof(uint32_t));
1163        if (err) {
1164                args->num_tile_configs = 0;
1165                return -EFAULT;
1166        }
1167
1168        if (args->num_macro_tile_configs > config.num_macro_tile_configs)
1169                args->num_macro_tile_configs =
1170                                config.num_macro_tile_configs;
1171        err = copy_to_user((void __user *)args->macro_tile_config_ptr,
1172                        config.macro_tile_config_ptr,
1173                        args->num_macro_tile_configs * sizeof(uint32_t));
1174        if (err) {
1175                args->num_macro_tile_configs = 0;
1176                return -EFAULT;
1177        }
1178
1179        return 0;
1180}
1181
1182static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p,
1183                                void *data)
1184{
1185        struct kfd_ioctl_acquire_vm_args *args = data;
1186        struct kfd_process_device *pdd;
1187        struct kfd_dev *dev;
1188        struct file *drm_file;
1189        int ret;
1190
1191        dev = kfd_device_by_id(args->gpu_id);
1192        if (!dev)
1193                return -EINVAL;
1194
1195        drm_file = fget(args->drm_fd);
1196        if (!drm_file)
1197                return -EINVAL;
1198
1199        mutex_lock(&p->mutex);
1200
1201        pdd = kfd_get_process_device_data(dev, p);
1202        if (!pdd) {
1203                ret = -EINVAL;
1204                goto err_unlock;
1205        }
1206
1207        if (pdd->drm_file) {
1208                ret = pdd->drm_file == drm_file ? 0 : -EBUSY;
1209                goto err_unlock;
1210        }
1211
1212        ret = kfd_process_device_init_vm(pdd, drm_file);
1213        if (ret)
1214                goto err_unlock;
1215        /* On success, the PDD keeps the drm_file reference */
1216        mutex_unlock(&p->mutex);
1217
1218        return 0;
1219
1220err_unlock:
1221        mutex_unlock(&p->mutex);
1222        fput(drm_file);
1223        return ret;
1224}
1225
1226bool kfd_dev_is_large_bar(struct kfd_dev *dev)
1227{
1228        struct kfd_local_mem_info mem_info;
1229
1230        if (debug_largebar) {
1231                pr_debug("Simulate large-bar allocation on non large-bar machine\n");
1232                return true;
1233        }
1234
1235        if (dev->device_info->needs_iommu_device)
1236                return false;
1237
1238        amdgpu_amdkfd_get_local_mem_info(dev->kgd, &mem_info);
1239        if (mem_info.local_mem_size_private == 0 &&
1240                        mem_info.local_mem_size_public > 0)
1241                return true;
1242        return false;
1243}
1244
1245static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
1246                                        struct kfd_process *p, void *data)
1247{
1248        struct kfd_ioctl_alloc_memory_of_gpu_args *args = data;
1249        struct kfd_process_device *pdd;
1250        void *mem;
1251        struct kfd_dev *dev;
1252        int idr_handle;
1253        long err;
1254        uint64_t offset = args->mmap_offset;
1255        uint32_t flags = args->flags;
1256
1257        if (args->size == 0)
1258                return -EINVAL;
1259
1260        dev = kfd_device_by_id(args->gpu_id);
1261        if (!dev)
1262                return -EINVAL;
1263
1264        if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) &&
1265                (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) &&
1266                !kfd_dev_is_large_bar(dev)) {
1267                pr_err("Alloc host visible vram on small bar is not allowed\n");
1268                return -EINVAL;
1269        }
1270
1271        if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
1272                if (args->size != kfd_doorbell_process_slice(dev))
1273                        return -EINVAL;
1274                offset = kfd_get_process_doorbells(dev, p);
1275        }
1276
1277        mutex_lock(&p->mutex);
1278
1279        pdd = kfd_bind_process_to_device(dev, p);
1280        if (IS_ERR(pdd)) {
1281                err = PTR_ERR(pdd);
1282                goto err_unlock;
1283        }
1284
1285        err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
1286                dev->kgd, args->va_addr, args->size,
1287                pdd->vm, (struct kgd_mem **) &mem, &offset,
1288                flags);
1289
1290        if (err)
1291                goto err_unlock;
1292
1293        idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
1294        if (idr_handle < 0) {
1295                err = -EFAULT;
1296                goto err_free;
1297        }
1298
1299        mutex_unlock(&p->mutex);
1300
1301        args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
1302        args->mmap_offset = offset;
1303
1304        return 0;
1305
1306err_free:
1307        amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
1308err_unlock:
1309        mutex_unlock(&p->mutex);
1310        return err;
1311}
1312
1313static int kfd_ioctl_free_memory_of_gpu(struct file *filep,
1314                                        struct kfd_process *p, void *data)
1315{
1316        struct kfd_ioctl_free_memory_of_gpu_args *args = data;
1317        struct kfd_process_device *pdd;
1318        void *mem;
1319        struct kfd_dev *dev;
1320        int ret;
1321
1322        dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1323        if (!dev)
1324                return -EINVAL;
1325
1326        mutex_lock(&p->mutex);
1327
1328        pdd = kfd_get_process_device_data(dev, p);
1329        if (!pdd) {
1330                pr_err("Process device data doesn't exist\n");
1331                ret = -EINVAL;
1332                goto err_unlock;
1333        }
1334
1335        mem = kfd_process_device_translate_handle(
1336                pdd, GET_IDR_HANDLE(args->handle));
1337        if (!mem) {
1338                ret = -EINVAL;
1339                goto err_unlock;
1340        }
1341
1342        ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd,
1343                                                (struct kgd_mem *)mem);
1344
1345        /* If freeing the buffer failed, leave the handle in place for
1346         * clean-up during process tear-down.
1347         */
1348        if (!ret)
1349                kfd_process_device_remove_obj_handle(
1350                        pdd, GET_IDR_HANDLE(args->handle));
1351
1352err_unlock:
1353        mutex_unlock(&p->mutex);
1354        return ret;
1355}
1356
1357static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
1358                                        struct kfd_process *p, void *data)
1359{
1360        struct kfd_ioctl_map_memory_to_gpu_args *args = data;
1361        struct kfd_process_device *pdd, *peer_pdd;
1362        void *mem;
1363        struct kfd_dev *dev, *peer;
1364        long err = 0;
1365        int i;
1366        uint32_t *devices_arr = NULL;
1367
1368        dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1369        if (!dev)
1370                return -EINVAL;
1371
1372        if (!args->n_devices) {
1373                pr_debug("Device IDs array empty\n");
1374                return -EINVAL;
1375        }
1376        if (args->n_success > args->n_devices) {
1377                pr_debug("n_success exceeds n_devices\n");
1378                return -EINVAL;
1379        }
1380
1381        devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
1382                                    GFP_KERNEL);
1383        if (!devices_arr)
1384                return -ENOMEM;
1385
1386        err = copy_from_user(devices_arr,
1387                             (void __user *)args->device_ids_array_ptr,
1388                             args->n_devices * sizeof(*devices_arr));
1389        if (err != 0) {
1390                err = -EFAULT;
1391                goto copy_from_user_failed;
1392        }
1393
1394        mutex_lock(&p->mutex);
1395
1396        pdd = kfd_bind_process_to_device(dev, p);
1397        if (IS_ERR(pdd)) {
1398                err = PTR_ERR(pdd);
1399                goto bind_process_to_device_failed;
1400        }
1401
1402        mem = kfd_process_device_translate_handle(pdd,
1403                                                GET_IDR_HANDLE(args->handle));
1404        if (!mem) {
1405                err = -ENOMEM;
1406                goto get_mem_obj_from_handle_failed;
1407        }
1408
1409        for (i = args->n_success; i < args->n_devices; i++) {
1410                peer = kfd_device_by_id(devices_arr[i]);
1411                if (!peer) {
1412                        pr_debug("Getting device by id failed for 0x%x\n",
1413                                 devices_arr[i]);
1414                        err = -EINVAL;
1415                        goto get_mem_obj_from_handle_failed;
1416                }
1417
1418                peer_pdd = kfd_bind_process_to_device(peer, p);
1419                if (IS_ERR(peer_pdd)) {
1420                        err = PTR_ERR(peer_pdd);
1421                        goto get_mem_obj_from_handle_failed;
1422                }
1423                err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
1424                        peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
1425                if (err) {
1426                        pr_err("Failed to map to gpu %d/%d\n",
1427                               i, args->n_devices);
1428                        goto map_memory_to_gpu_failed;
1429                }
1430                args->n_success = i+1;
1431        }
1432
1433        mutex_unlock(&p->mutex);
1434
1435        err = amdgpu_amdkfd_gpuvm_sync_memory(dev->kgd, (struct kgd_mem *) mem, true);
1436        if (err) {
1437                pr_debug("Sync memory failed, wait interrupted by user signal\n");
1438                goto sync_memory_failed;
1439        }
1440
1441        /* Flush TLBs after waiting for the page table updates to complete */
1442        for (i = 0; i < args->n_devices; i++) {
1443                peer = kfd_device_by_id(devices_arr[i]);
1444                if (WARN_ON_ONCE(!peer))
1445                        continue;
1446                peer_pdd = kfd_get_process_device_data(peer, p);
1447                if (WARN_ON_ONCE(!peer_pdd))
1448                        continue;
1449                kfd_flush_tlb(peer_pdd);
1450        }
1451
1452        kfree(devices_arr);
1453
1454        return err;
1455
1456bind_process_to_device_failed:
1457get_mem_obj_from_handle_failed:
1458map_memory_to_gpu_failed:
1459        mutex_unlock(&p->mutex);
1460copy_from_user_failed:
1461sync_memory_failed:
1462        kfree(devices_arr);
1463
1464        return err;
1465}
1466
1467static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
1468                                        struct kfd_process *p, void *data)
1469{
1470        struct kfd_ioctl_unmap_memory_from_gpu_args *args = data;
1471        struct kfd_process_device *pdd, *peer_pdd;
1472        void *mem;
1473        struct kfd_dev *dev, *peer;
1474        long err = 0;
1475        uint32_t *devices_arr = NULL, i;
1476
1477        dev = kfd_device_by_id(GET_GPU_ID(args->handle));
1478        if (!dev)
1479                return -EINVAL;
1480
1481        if (!args->n_devices) {
1482                pr_debug("Device IDs array empty\n");
1483                return -EINVAL;
1484        }
1485        if (args->n_success > args->n_devices) {
1486                pr_debug("n_success exceeds n_devices\n");
1487                return -EINVAL;
1488        }
1489
1490        devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr),
1491                                    GFP_KERNEL);
1492        if (!devices_arr)
1493                return -ENOMEM;
1494
1495        err = copy_from_user(devices_arr,
1496                             (void __user *)args->device_ids_array_ptr,
1497                             args->n_devices * sizeof(*devices_arr));
1498        if (err != 0) {
1499                err = -EFAULT;
1500                goto copy_from_user_failed;
1501        }
1502
1503        mutex_lock(&p->mutex);
1504
1505        pdd = kfd_get_process_device_data(dev, p);
1506        if (!pdd) {
1507                err = -EINVAL;
1508                goto bind_process_to_device_failed;
1509        }
1510
1511        mem = kfd_process_device_translate_handle(pdd,
1512                                                GET_IDR_HANDLE(args->handle));
1513        if (!mem) {
1514                err = -ENOMEM;
1515                goto get_mem_obj_from_handle_failed;
1516        }
1517
1518        for (i = args->n_success; i < args->n_devices; i++) {
1519                peer = kfd_device_by_id(devices_arr[i]);
1520                if (!peer) {
1521                        err = -EINVAL;
1522                        goto get_mem_obj_from_handle_failed;
1523                }
1524
1525                peer_pdd = kfd_get_process_device_data(peer, p);
1526                if (!peer_pdd) {
1527                        err = -ENODEV;
1528                        goto get_mem_obj_from_handle_failed;
1529                }
1530                err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
1531                        peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm);
1532                if (err) {
1533                        pr_err("Failed to unmap from gpu %d/%d\n",
1534                               i, args->n_devices);
1535                        goto unmap_memory_from_gpu_failed;
1536                }
1537                args->n_success = i+1;
1538        }
1539        kfree(devices_arr);
1540
1541        mutex_unlock(&p->mutex);
1542
1543        return 0;
1544
1545bind_process_to_device_failed:
1546get_mem_obj_from_handle_failed:
1547unmap_memory_from_gpu_failed:
1548        mutex_unlock(&p->mutex);
1549copy_from_user_failed:
1550        kfree(devices_arr);
1551        return err;
1552}
1553
1554static int kfd_ioctl_get_dmabuf_info(struct file *filep,
1555                struct kfd_process *p, void *data)
1556{
1557        struct kfd_ioctl_get_dmabuf_info_args *args = data;
1558        struct kfd_dev *dev = NULL;
1559        struct kgd_dev *dma_buf_kgd;
1560        void *metadata_buffer = NULL;
1561        uint32_t flags;
1562        unsigned int i;
1563        int r;
1564
1565        /* Find a KFD GPU device that supports the get_dmabuf_info query */
1566        for (i = 0; kfd_topology_enum_kfd_devices(i, &dev) == 0; i++)
1567                if (dev)
1568                        break;
1569        if (!dev)
1570                return -EINVAL;
1571
1572        if (args->metadata_ptr) {
1573                metadata_buffer = kzalloc(args->metadata_size, GFP_KERNEL);
1574                if (!metadata_buffer)
1575                        return -ENOMEM;
1576        }
1577
1578        /* Get dmabuf info from KGD */
1579        r = amdgpu_amdkfd_get_dmabuf_info(dev->kgd, args->dmabuf_fd,
1580                                          &dma_buf_kgd, &args->size,
1581                                          metadata_buffer, args->metadata_size,
1582                                          &args->metadata_size, &flags);
1583        if (r)
1584                goto exit;
1585
1586        /* Reverse-lookup gpu_id from kgd pointer */
1587        dev = kfd_device_by_kgd(dma_buf_kgd);
1588        if (!dev) {
1589                r = -EINVAL;
1590                goto exit;
1591        }
1592        args->gpu_id = dev->id;
1593        args->flags = flags;
1594
1595        /* Copy metadata buffer to user mode */
1596        if (metadata_buffer) {
1597                r = copy_to_user((void __user *)args->metadata_ptr,
1598                                 metadata_buffer, args->metadata_size);
1599                if (r != 0)
1600                        r = -EFAULT;
1601        }
1602
1603exit:
1604        kfree(metadata_buffer);
1605
1606        return r;
1607}
1608
1609static int kfd_ioctl_import_dmabuf(struct file *filep,
1610                                   struct kfd_process *p, void *data)
1611{
1612        struct kfd_ioctl_import_dmabuf_args *args = data;
1613        struct kfd_process_device *pdd;
1614        struct dma_buf *dmabuf;
1615        struct kfd_dev *dev;
1616        int idr_handle;
1617        uint64_t size;
1618        void *mem;
1619        int r;
1620
1621        dev = kfd_device_by_id(args->gpu_id);
1622        if (!dev)
1623                return -EINVAL;
1624
1625        dmabuf = dma_buf_get(args->dmabuf_fd);
1626        if (IS_ERR(dmabuf))
1627                return PTR_ERR(dmabuf);
1628
1629        mutex_lock(&p->mutex);
1630
1631        pdd = kfd_bind_process_to_device(dev, p);
1632        if (IS_ERR(pdd)) {
1633                r = PTR_ERR(pdd);
1634                goto err_unlock;
1635        }
1636
1637        r = amdgpu_amdkfd_gpuvm_import_dmabuf(dev->kgd, dmabuf,
1638                                              args->va_addr, pdd->vm,
1639                                              (struct kgd_mem **)&mem, &size,
1640                                              NULL);
1641        if (r)
1642                goto err_unlock;
1643
1644        idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
1645        if (idr_handle < 0) {
1646                r = -EFAULT;
1647                goto err_free;
1648        }
1649
1650        mutex_unlock(&p->mutex);
1651
1652        args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
1653
1654        return 0;
1655
1656err_free:
1657        amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
1658err_unlock:
1659        mutex_unlock(&p->mutex);
1660        return r;
1661}
1662
1663#define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
1664        [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
1665                            .cmd_drv = 0, .name = #ioctl}
1666
1667/** Ioctl table */
1668static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
1669        AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_VERSION,
1670                        kfd_ioctl_get_version, 0),
1671
1672        AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_QUEUE,
1673                        kfd_ioctl_create_queue, 0),
1674
1675        AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_QUEUE,
1676                        kfd_ioctl_destroy_queue, 0),
1677
1678        AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_MEMORY_POLICY,
1679                        kfd_ioctl_set_memory_policy, 0),
1680
1681        AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_CLOCK_COUNTERS,
1682                        kfd_ioctl_get_clock_counters, 0),
1683
1684        AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES,
1685                        kfd_ioctl_get_process_apertures, 0),
1686
1687        AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE,
1688                        kfd_ioctl_update_queue, 0),
1689
1690        AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_EVENT,
1691                        kfd_ioctl_create_event, 0),
1692
1693        AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_EVENT,
1694                        kfd_ioctl_destroy_event, 0),
1695
1696        AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_EVENT,
1697                        kfd_ioctl_set_event, 0),
1698
1699        AMDKFD_IOCTL_DEF(AMDKFD_IOC_RESET_EVENT,
1700                        kfd_ioctl_reset_event, 0),
1701
1702        AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS,
1703                        kfd_ioctl_wait_events, 0),
1704
1705        AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER,
1706                        kfd_ioctl_dbg_register, 0),
1707
1708        AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER,
1709                        kfd_ioctl_dbg_unregister, 0),
1710
1711        AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH,
1712                        kfd_ioctl_dbg_address_watch, 0),
1713
1714        AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL,
1715                        kfd_ioctl_dbg_wave_control, 0),
1716
1717        AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA,
1718                        kfd_ioctl_set_scratch_backing_va, 0),
1719
1720        AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG,
1721                        kfd_ioctl_get_tile_config, 0),
1722
1723        AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER,
1724                        kfd_ioctl_set_trap_handler, 0),
1725
1726        AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW,
1727                        kfd_ioctl_get_process_apertures_new, 0),
1728
1729        AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM,
1730                        kfd_ioctl_acquire_vm, 0),
1731
1732        AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU,
1733                        kfd_ioctl_alloc_memory_of_gpu, 0),
1734
1735        AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU,
1736                        kfd_ioctl_free_memory_of_gpu, 0),
1737
1738        AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU,
1739                        kfd_ioctl_map_memory_to_gpu, 0),
1740
1741        AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU,
1742                        kfd_ioctl_unmap_memory_from_gpu, 0),
1743
1744        AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_CU_MASK,
1745                        kfd_ioctl_set_cu_mask, 0),
1746
1747        AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_QUEUE_WAVE_STATE,
1748                        kfd_ioctl_get_queue_wave_state, 0),
1749
1750        AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_DMABUF_INFO,
1751                                kfd_ioctl_get_dmabuf_info, 0),
1752
1753        AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF,
1754                                kfd_ioctl_import_dmabuf, 0),
1755
1756};
1757
1758#define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls)
1759
1760static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
1761{
1762        struct kfd_process *process;
1763        amdkfd_ioctl_t *func;
1764        const struct amdkfd_ioctl_desc *ioctl = NULL;
1765        unsigned int nr = _IOC_NR(cmd);
1766        char stack_kdata[128];
1767        char *kdata = NULL;
1768        unsigned int usize, asize;
1769        int retcode = -EINVAL;
1770
1771        if (nr >= AMDKFD_CORE_IOCTL_COUNT)
1772                goto err_i1;
1773
1774        if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) {
1775                u32 amdkfd_size;
1776
1777                ioctl = &amdkfd_ioctls[nr];
1778
1779                amdkfd_size = _IOC_SIZE(ioctl->cmd);
1780                usize = asize = _IOC_SIZE(cmd);
1781                if (amdkfd_size > asize)
1782                        asize = amdkfd_size;
1783
1784                cmd = ioctl->cmd;
1785        } else
1786                goto err_i1;
1787
1788        dev_dbg(kfd_device, "ioctl cmd 0x%x (#%d), arg 0x%lx\n", cmd, nr, arg);
1789
1790        process = kfd_get_process(current);
1791        if (IS_ERR(process)) {
1792                dev_dbg(kfd_device, "no process\n");
1793                goto err_i1;
1794        }
1795
1796        /* Do not trust userspace, use our own definition */
1797        func = ioctl->func;
1798
1799        if (unlikely(!func)) {
1800                dev_dbg(kfd_device, "no function\n");
1801                retcode = -EINVAL;
1802                goto err_i1;
1803        }
1804
1805        if (cmd & (IOC_IN | IOC_OUT)) {
1806                if (asize <= sizeof(stack_kdata)) {
1807                        kdata = stack_kdata;
1808                } else {
1809                        kdata = kmalloc(asize, GFP_KERNEL);
1810                        if (!kdata) {
1811                                retcode = -ENOMEM;
1812                                goto err_i1;
1813                        }
1814                }
1815                if (asize > usize)
1816                        memset(kdata + usize, 0, asize - usize);
1817        }
1818
1819        if (cmd & IOC_IN) {
1820                if (copy_from_user(kdata, (void __user *)arg, usize) != 0) {
1821                        retcode = -EFAULT;
1822                        goto err_i1;
1823                }
1824        } else if (cmd & IOC_OUT) {
1825                memset(kdata, 0, usize);
1826        }
1827
1828        retcode = func(filep, process, kdata);
1829
1830        if (cmd & IOC_OUT)
1831                if (copy_to_user((void __user *)arg, kdata, usize) != 0)
1832                        retcode = -EFAULT;
1833
1834err_i1:
1835        if (!ioctl)
1836                dev_dbg(kfd_device, "invalid ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n",
1837                          task_pid_nr(current), cmd, nr);
1838
1839        if (kdata != stack_kdata)
1840                kfree(kdata);
1841
1842        if (retcode)
1843                dev_dbg(kfd_device, "ret = %d\n", retcode);
1844
1845        return retcode;
1846}
1847
1848static int kfd_mmap(struct file *filp, struct vm_area_struct *vma)
1849{
1850        struct kfd_process *process;
1851        struct kfd_dev *dev = NULL;
1852        unsigned long vm_pgoff;
1853        unsigned int gpu_id;
1854
1855        process = kfd_get_process(current);
1856        if (IS_ERR(process))
1857                return PTR_ERR(process);
1858
1859        vm_pgoff = vma->vm_pgoff;
1860        vma->vm_pgoff = KFD_MMAP_OFFSET_VALUE_GET(vm_pgoff);
1861        gpu_id = KFD_MMAP_GPU_ID_GET(vm_pgoff);
1862        if (gpu_id)
1863                dev = kfd_device_by_id(gpu_id);
1864
1865        switch (vm_pgoff & KFD_MMAP_TYPE_MASK) {
1866        case KFD_MMAP_TYPE_DOORBELL:
1867                if (!dev)
1868                        return -ENODEV;
1869                return kfd_doorbell_mmap(dev, process, vma);
1870
1871        case KFD_MMAP_TYPE_EVENTS:
1872                return kfd_event_mmap(process, vma);
1873
1874        case KFD_MMAP_TYPE_RESERVED_MEM:
1875                if (!dev)
1876                        return -ENODEV;
1877                return kfd_reserved_mem_mmap(dev, process, vma);
1878        }
1879
1880        return -EFAULT;
1881}
1882